2020-04-22 04:15:08

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH v3 -tip 0/5] kvm: Use rcuwait for vcpu blocking

The following is an updated revision of the kvm vcpu to rcuwait conversion[0],
following the work on completions using simple waitqueues.

Changes from v2:
- new patch 3 which adds prepare_to_rcuwait and finish_rcu helpers.
- fixed broken sleep and tracepoint semantics in patch 4. (Marc and Paolo)

This has only been run tested on x86 but compile tested on mips, powerpc
and arm32.

This series applies on top of current -tip.

[0] https://lore.kernel.org/lkml/[email protected]/

Thanks!

Davidlohr Bueso (5):
rcuwait: Fix stale wake call name in comment
rcuwait: Let rcuwait_wake_up() return whether or not a task was awoken
rcuwait: Introduce prepare_to and finish_rcuwait
kvm: Replace vcpu->swait with rcuwait
sched/swait: Reword some of the main description

arch/mips/kvm/mips.c | 6 ++----
arch/powerpc/include/asm/kvm_book3s.h | 2 +-
arch/powerpc/include/asm/kvm_host.h | 2 +-
arch/powerpc/kvm/book3s_hv.c | 22 ++++++++--------------
arch/powerpc/kvm/powerpc.c | 2 +-
arch/x86/kvm/lapic.c | 2 +-
include/linux/kvm_host.h | 10 +++++-----
include/linux/rcuwait.h | 23 +++++++++++++++++------
include/linux/swait.h | 23 +++++------------------
kernel/exit.c | 9 ++++++---
virt/kvm/arm/arch_timer.c | 2 +-
virt/kvm/arm/arm.c | 9 +++++----
virt/kvm/async_pf.c | 3 +--
virt/kvm/kvm_main.c | 19 +++++++++----------
14 files changed, 63 insertions(+), 71 deletions(-)

--
2.16.4


2020-04-22 04:15:40

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

The use of any sort of waitqueue (simple or regular) for
wait/waking vcpus has always been an overkill and semantically
wrong. Because this is per-vcpu (which is blocked) there is
only ever a single waiting vcpu, thus no need for any sort of
queue.

As such, make use of the rcuwait primitive, with the following
considerations:

- rcuwait already provides the proper barriers that serialize
concurrent waiter and waker.

- Task wakeup is done in rcu read critical region, with a
stable task pointer.

- Because there is no concurrency among waiters, we need
not worry about rcuwait_wait_event() calls corrupting
the wait->task. As a consequence, this saves the locking
done in swait when modifying the queue. This also applies
to per-vcore wait for powerpc kvm-hv.

The x86 tscdeadline_latency test mentioned in 8577370fb0cb
("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
latency is reduced by around 15-20% with this change.

Cc: Paul Mackerras <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Davidlohr Bueso <[email protected]>
---
arch/mips/kvm/mips.c | 6 ++----
arch/powerpc/include/asm/kvm_book3s.h | 2 +-
arch/powerpc/include/asm/kvm_host.h | 2 +-
arch/powerpc/kvm/book3s_hv.c | 22 ++++++++--------------
arch/powerpc/kvm/powerpc.c | 2 +-
arch/x86/kvm/lapic.c | 2 +-
include/linux/kvm_host.h | 10 +++++-----
virt/kvm/arm/arch_timer.c | 2 +-
virt/kvm/arm/arm.c | 9 +++++----
virt/kvm/async_pf.c | 3 +--
virt/kvm/kvm_main.c | 19 +++++++++----------
11 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8f05dd0a0f4e..fad6acce46e4 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -284,8 +284,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
kvm_mips_callbacks->queue_timer_int(vcpu);

vcpu->arch.wait = 0;
- if (swq_has_sleeper(&vcpu->wq))
- swake_up_one(&vcpu->wq);
+ rcuwait_wake_up(&vcpu->wait);

return kvm_mips_count_timeout(vcpu);
}
@@ -511,8 +510,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,

dvcpu->arch.wait = 0;

- if (swq_has_sleeper(&dvcpu->wq))
- swake_up_one(&dvcpu->wq);
+ rcuwait_wake_up(&dvcpu->wait);

return 0;
}
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 506e4df2d730..6e5d85ba588d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -78,7 +78,7 @@ struct kvmppc_vcore {
struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
struct list_head preempt_list;
spinlock_t lock;
- struct swait_queue_head wq;
+ struct rcuwait wait;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1dc63101ffe1..337047ba4a56 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -751,7 +751,7 @@ struct kvm_vcpu_arch {
u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;

- struct swait_queue_head *wqp;
+ struct rcuwait *waitp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 93493f0cbfe8..b8d42f523ca7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu)
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
int cpu;
- struct swait_queue_head *wqp;
+ struct rcuwait *wait;

- wqp = kvm_arch_vcpu_wq(vcpu);
- if (swq_has_sleeper(wqp)) {
- swake_up_one(wqp);
+ wait = kvm_arch_vcpu_get_wait(vcpu);
+ if (rcuwait_wake_up(wait))
++vcpu->stat.halt_wakeup;
- }

cpu = READ_ONCE(vcpu->arch.thread_cpu);
if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)

spin_lock_init(&vcore->lock);
spin_lock_init(&vcore->stoltb_lock);
- init_swait_queue_head(&vcore->wq);
+ rcuwait_init(&vcore->wait);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = id;
@@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
ktime_t cur, start_poll, start_wait;
int do_sleep = 1;
u64 block_ns;
- DECLARE_SWAITQUEUE(wait);

/* Poll for pending exceptions and ceded state */
cur = start_poll = ktime_get();
@@ -3812,10 +3809,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
}
}

- prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
-
if (kvmppc_vcore_check_block(vc)) {
- finish_swait(&vc->wq, &wait);
do_sleep = 0;
/* If we polled, count this as a successful poll */
if (vc->halt_poll_ns)
@@ -3828,8 +3822,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_SLEEPING;
trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
- schedule();
- finish_swait(&vc->wq, &wait);
+ rcuwait_wait_event(&vc->wait,
+ kvmppc_vcore_check_block(vc), TASK_INTERRUPTIBLE);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
trace_kvmppc_vcore_blocked(vc, 1);
@@ -3940,7 +3934,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- swake_up_one(&vc->wq);
+ rcuwait_wake_up(&vc->wait);
}

}
@@ -4279,7 +4273,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
user_vrsave = mfspr(SPRN_VRSAVE);

- vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+ vcpu->arch.waitp = &vcpu->arch.vcore->wait;
vcpu->arch.pgdir = kvm->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e15166b0a16d..4a074b587520 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -751,7 +751,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
goto out_vcpu_uninit;

- vcpu->arch.wqp = &vcpu->wq;
+ vcpu->arch.waitp = &vcpu->wait;
kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
return 0;

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9af25c97612a..fb2f56ba171d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1833,7 +1833,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
/* If the preempt notifier has already run, it also called apic_timer_expired */
if (!apic->lapic_timer.hv_timer_in_use)
goto out;
- WARN_ON(swait_active(&vcpu->wq));
+ WARN_ON(rcu_dereference(vcpu->wait.task));
cancel_hv_timer(apic);
apic_timer_expired(apic);

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6d58beb65454..fc34021546bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,7 +23,7 @@
#include <linux/irqflags.h>
#include <linux/context_tracking.h>
#include <linux/irqbypass.h>
-#include <linux/swait.h>
+#include <linux/rcuwait.h>
#include <linux/refcount.h>
#include <linux/nospec.h>
#include <asm/signal.h>
@@ -277,7 +277,7 @@ struct kvm_vcpu {
struct mutex mutex;
struct kvm_run *run;

- struct swait_queue_head wq;
+ struct rcuwait wait;
struct pid __rcu *pid;
int sigset_active;
sigset_t sigset;
@@ -956,12 +956,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
}
#endif

-static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
{
#ifdef __KVM_HAVE_ARCH_WQP
- return vcpu->arch.wqp;
+ return vcpu->arch.waitp;
#else
- return &vcpu->wq;
+ return &vcpu->wait;
#endif
}

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 93bd59b46848..b2805105bbe5 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);

- if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+ if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
kvm_timer_blocking(vcpu);

/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 48d0ec44ad77..f94a10bb1251 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)

kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.pause = false;
- swake_up_one(kvm_arch_vcpu_wq(vcpu));
+ rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
}
}

static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
{
- struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);

- swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
- (!vcpu->arch.pause)));
+ rcuwait_wait_event(*wait,
+ (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+ TASK_INTERRUPTIBLE);

if (vcpu->arch.power_off || vcpu->arch.pause) {
/* Awaken to handle a signal, request we sleep again later. */
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 15e5b037f92d..10b533f641a6 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,8 +80,7 @@ static void async_pf_execute(struct work_struct *work)

trace_kvm_async_pf_completed(addr, cr2_or_gpa);

- if (swq_has_sleeper(&vcpu->wq))
- swake_up_one(&vcpu->wq);
+ rcuwait_wake_up(&vcpu->wait);

mmput(mm);
kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 74bdb7bf3295..835fb109badf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -341,7 +341,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
- init_swait_queue_head(&vcpu->wq);
+ rcuwait_init(&vcpu->wait);
kvm_async_pf_vcpu_init(vcpu);

vcpu->pre_pcpu = -1;
@@ -2671,7 +2671,6 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
void kvm_vcpu_block(struct kvm_vcpu *vcpu)
{
ktime_t start, cur;
- DECLARE_SWAITQUEUE(wait);
bool waited = false;
u64 block_ns;

@@ -2697,8 +2696,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
} while (single_task_running() && ktime_before(cur, stop));
}

+ prepare_to_rcuwait(&vcpu->wait);
for (;;) {
- prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE);

if (kvm_vcpu_check_block(vcpu) < 0)
break;
@@ -2706,8 +2706,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
waited = true;
schedule();
}
-
- finish_swait(&vcpu->wq, &wait);
+ finish_rcuwait(&vcpu->wait);
cur = ktime_get();
out:
kvm_arch_vcpu_unblocking(vcpu);
@@ -2738,11 +2737,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block);

bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
- struct swait_queue_head *wqp;
+ struct rcuwait *wait;

- wqp = kvm_arch_vcpu_wq(vcpu);
- if (swq_has_sleeper(wqp)) {
- swake_up_one(wqp);
+ wait = kvm_arch_vcpu_get_wait(vcpu);
+ if (rcuwait_wake_up(wait)) {
WRITE_ONCE(vcpu->ready, true);
++vcpu->stat.halt_wakeup;
return true;
@@ -2884,7 +2882,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
if (vcpu == me)
continue;
- if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+ if (rcu_dereference(vcpu->wait.task) &&
+ !vcpu_dy_runnable(vcpu))
continue;
if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
!kvm_arch_vcpu_in_kernel(vcpu))
--
2.16.4

2020-04-22 04:16:00

by Davidlohr Bueso

[permalink] [raw]
Subject: [PATCH 5/5] sched/swait: Reword some of the main description

With both the increased use of swait and kvm no longer using
it, we can reword some of the comments. While removing Linus'
valid rant, I've also cared to explicitly mention that swait
is very different than regular wait. In addition it is
mentioned against using swait in favor of the regular flavor.

Signed-off-by: Davidlohr Bueso <[email protected]>
---
include/linux/swait.h | 23 +++++------------------
1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/include/linux/swait.h b/include/linux/swait.h
index 73e06e9986d4..6a8c22b8c2a5 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -9,23 +9,10 @@
#include <asm/current.h>

/*
- * BROKEN wait-queues.
- *
- * These "simple" wait-queues are broken garbage, and should never be
- * used. The comments below claim that they are "similar" to regular
- * wait-queues, but the semantics are actually completely different, and
- * every single user we have ever had has been buggy (or pointless).
- *
- * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
- * "wake_up()" does, and has led to problems. In other cases, it has
- * been fine, because there's only ever one waiter (kvm), but in that
- * case gthe whole "simple" wait-queue is just pointless to begin with,
- * since there is no "queue". Use "wake_up_process()" with a direct
- * pointer instead.
- *
- * While these are very similar to regular wait queues (wait.h) the most
- * important difference is that the simple waitqueue allows for deterministic
- * behaviour -- IOW it has strictly bounded IRQ and lock hold times.
+ * Simple waitqueues are semantically very different to regular wait queues
+ * (wait.h). The most important difference is that the simple waitqueue allows
+ * for deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
*
* Mainly, this is accomplished by two things. Firstly not allowing swake_up_all
* from IRQ disabled, and dropping the lock upon every wakeup, giving a higher
@@ -39,7 +26,7 @@
* sleeper state.
*
* - the !exclusive mode; because that leads to O(n) wakeups, everything is
- * exclusive.
+ * exclusive. As such swake_up_one will only ever awake _one_ waiter.
*
* - custom wake callback functions; because you cannot give any guarantees
* about random code. This also allows swait to be used in RT, such that
--
2.16.4

2020-04-22 08:35:51

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

On 22/04/20 06:07, Davidlohr Bueso wrote:
> The use of any sort of waitqueue (simple or regular) for
> wait/waking vcpus has always been an overkill and semantically
> wrong. Because this is per-vcpu (which is blocked) there is
> only ever a single waiting vcpu, thus no need for any sort of
> queue.
>
> As such, make use of the rcuwait primitive, with the following
> considerations:
>
> - rcuwait already provides the proper barriers that serialize
> concurrent waiter and waker.
>
> - Task wakeup is done in rcu read critical region, with a
> stable task pointer.
>
> - Because there is no concurrency among waiters, we need
> not worry about rcuwait_wait_event() calls corrupting
> the wait->task. As a consequence, this saves the locking
> done in swait when modifying the queue. This also applies
> to per-vcore wait for powerpc kvm-hv.
>
> The x86 tscdeadline_latency test mentioned in 8577370fb0cb
> ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
> latency is reduced by around 15-20% with this change.
>
> Cc: Paul Mackerras <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Davidlohr Bueso <[email protected]>

Reviewed-by: Paolo Bonzini <[email protected]>

> ---
> arch/mips/kvm/mips.c | 6 ++----
> arch/powerpc/include/asm/kvm_book3s.h | 2 +-
> arch/powerpc/include/asm/kvm_host.h | 2 +-
> arch/powerpc/kvm/book3s_hv.c | 22 ++++++++--------------
> arch/powerpc/kvm/powerpc.c | 2 +-
> arch/x86/kvm/lapic.c | 2 +-
> include/linux/kvm_host.h | 10 +++++-----
> virt/kvm/arm/arch_timer.c | 2 +-
> virt/kvm/arm/arm.c | 9 +++++----
> virt/kvm/async_pf.c | 3 +--
> virt/kvm/kvm_main.c | 19 +++++++++----------
> 11 files changed, 35 insertions(+), 44 deletions(-)
>
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index 8f05dd0a0f4e..fad6acce46e4 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -284,8 +284,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
> kvm_mips_callbacks->queue_timer_int(vcpu);
>
> vcpu->arch.wait = 0;
> - if (swq_has_sleeper(&vcpu->wq))
> - swake_up_one(&vcpu->wq);
> + rcuwait_wake_up(&vcpu->wait);
>
> return kvm_mips_count_timeout(vcpu);
> }
> @@ -511,8 +510,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
>
> dvcpu->arch.wait = 0;
>
> - if (swq_has_sleeper(&dvcpu->wq))
> - swake_up_one(&dvcpu->wq);
> + rcuwait_wake_up(&dvcpu->wait);
>
> return 0;
> }
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 506e4df2d730..6e5d85ba588d 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -78,7 +78,7 @@ struct kvmppc_vcore {
> struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
> struct list_head preempt_list;
> spinlock_t lock;
> - struct swait_queue_head wq;
> + struct rcuwait wait;
> spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
> u64 stolen_tb;
> u64 preempt_tb;
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 1dc63101ffe1..337047ba4a56 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -751,7 +751,7 @@ struct kvm_vcpu_arch {
> u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
> u32 last_inst;
>
> - struct swait_queue_head *wqp;
> + struct rcuwait *waitp;
> struct kvmppc_vcore *vcore;
> int ret;
> int trap;
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 93493f0cbfe8..b8d42f523ca7 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu)
> static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
> {
> int cpu;
> - struct swait_queue_head *wqp;
> + struct rcuwait *wait;
>
> - wqp = kvm_arch_vcpu_wq(vcpu);
> - if (swq_has_sleeper(wqp)) {
> - swake_up_one(wqp);
> + wait = kvm_arch_vcpu_get_wait(vcpu);
> + if (rcuwait_wake_up(wait))
> ++vcpu->stat.halt_wakeup;
> - }
>
> cpu = READ_ONCE(vcpu->arch.thread_cpu);
> if (cpu >= 0 && kvmppc_ipi_thread(cpu))
> @@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
>
> spin_lock_init(&vcore->lock);
> spin_lock_init(&vcore->stoltb_lock);
> - init_swait_queue_head(&vcore->wq);
> + rcuwait_init(&vcore->wait);
> vcore->preempt_tb = TB_NIL;
> vcore->lpcr = kvm->arch.lpcr;
> vcore->first_vcpuid = id;
> @@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
> ktime_t cur, start_poll, start_wait;
> int do_sleep = 1;
> u64 block_ns;
> - DECLARE_SWAITQUEUE(wait);
>
> /* Poll for pending exceptions and ceded state */
> cur = start_poll = ktime_get();
> @@ -3812,10 +3809,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
> }
> }
>
> - prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
> -
> if (kvmppc_vcore_check_block(vc)) {
> - finish_swait(&vc->wq, &wait);
> do_sleep = 0;
> /* If we polled, count this as a successful poll */
> if (vc->halt_poll_ns)
> @@ -3828,8 +3822,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
> vc->vcore_state = VCORE_SLEEPING;
> trace_kvmppc_vcore_blocked(vc, 0);
> spin_unlock(&vc->lock);
> - schedule();
> - finish_swait(&vc->wq, &wait);
> + rcuwait_wait_event(&vc->wait,
> + kvmppc_vcore_check_block(vc), TASK_INTERRUPTIBLE);
> spin_lock(&vc->lock);
> vc->vcore_state = VCORE_INACTIVE;
> trace_kvmppc_vcore_blocked(vc, 1);
> @@ -3940,7 +3934,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> kvmppc_start_thread(vcpu, vc);
> trace_kvm_guest_enter(vcpu);
> } else if (vc->vcore_state == VCORE_SLEEPING) {
> - swake_up_one(&vc->wq);
> + rcuwait_wake_up(&vc->wait);
> }
>
> }
> @@ -4279,7 +4273,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
> }
> user_vrsave = mfspr(SPRN_VRSAVE);
>
> - vcpu->arch.wqp = &vcpu->arch.vcore->wq;
> + vcpu->arch.waitp = &vcpu->arch.vcore->wait;
> vcpu->arch.pgdir = kvm->mm->pgd;
> vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
>
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index e15166b0a16d..4a074b587520 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -751,7 +751,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> if (err)
> goto out_vcpu_uninit;
>
> - vcpu->arch.wqp = &vcpu->wq;
> + vcpu->arch.waitp = &vcpu->wait;
> kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
> return 0;
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 9af25c97612a..fb2f56ba171d 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1833,7 +1833,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
> /* If the preempt notifier has already run, it also called apic_timer_expired */
> if (!apic->lapic_timer.hv_timer_in_use)
> goto out;
> - WARN_ON(swait_active(&vcpu->wq));
> + WARN_ON(rcu_dereference(vcpu->wait.task));
> cancel_hv_timer(apic);
> apic_timer_expired(apic);
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6d58beb65454..fc34021546bd 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -23,7 +23,7 @@
> #include <linux/irqflags.h>
> #include <linux/context_tracking.h>
> #include <linux/irqbypass.h>
> -#include <linux/swait.h>
> +#include <linux/rcuwait.h>
> #include <linux/refcount.h>
> #include <linux/nospec.h>
> #include <asm/signal.h>
> @@ -277,7 +277,7 @@ struct kvm_vcpu {
> struct mutex mutex;
> struct kvm_run *run;
>
> - struct swait_queue_head wq;
> + struct rcuwait wait;
> struct pid __rcu *pid;
> int sigset_active;
> sigset_t sigset;
> @@ -956,12 +956,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
> }
> #endif
>
> -static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
> +static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
> {
> #ifdef __KVM_HAVE_ARCH_WQP
> - return vcpu->arch.wqp;
> + return vcpu->arch.waitp;
> #else
> - return &vcpu->wq;
> + return &vcpu->wait;
> #endif
> }
>
> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 93bd59b46848..b2805105bbe5 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
> if (map.emul_ptimer)
> soft_timer_cancel(&map.emul_ptimer->hrtimer);
>
> - if (swait_active(kvm_arch_vcpu_wq(vcpu)))
> + if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
> kvm_timer_blocking(vcpu);
>
> /*
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 48d0ec44ad77..f94a10bb1251 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)
>
> kvm_for_each_vcpu(i, vcpu, kvm) {
> vcpu->arch.pause = false;
> - swake_up_one(kvm_arch_vcpu_wq(vcpu));
> + rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
> }
> }
>
> static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
> {
> - struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
> + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
>
> - swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
> - (!vcpu->arch.pause)));
> + rcuwait_wait_event(*wait,
> + (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
> + TASK_INTERRUPTIBLE);
>
> if (vcpu->arch.power_off || vcpu->arch.pause) {
> /* Awaken to handle a signal, request we sleep again later. */
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index 15e5b037f92d..10b533f641a6 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -80,8 +80,7 @@ static void async_pf_execute(struct work_struct *work)
>
> trace_kvm_async_pf_completed(addr, cr2_or_gpa);
>
> - if (swq_has_sleeper(&vcpu->wq))
> - swake_up_one(&vcpu->wq);
> + rcuwait_wake_up(&vcpu->wait);
>
> mmput(mm);
> kvm_put_kvm(vcpu->kvm);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 74bdb7bf3295..835fb109badf 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -341,7 +341,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
> vcpu->kvm = kvm;
> vcpu->vcpu_id = id;
> vcpu->pid = NULL;
> - init_swait_queue_head(&vcpu->wq);
> + rcuwait_init(&vcpu->wait);
> kvm_async_pf_vcpu_init(vcpu);
>
> vcpu->pre_pcpu = -1;
> @@ -2671,7 +2671,6 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
> void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> {
> ktime_t start, cur;
> - DECLARE_SWAITQUEUE(wait);
> bool waited = false;
> u64 block_ns;
>
> @@ -2697,8 +2696,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> } while (single_task_running() && ktime_before(cur, stop));
> }
>
> + prepare_to_rcuwait(&vcpu->wait);
> for (;;) {
> - prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
> + set_current_state(TASK_INTERRUPTIBLE);
>
> if (kvm_vcpu_check_block(vcpu) < 0)
> break;
> @@ -2706,8 +2706,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> waited = true;
> schedule();
> }
> -
> - finish_swait(&vcpu->wq, &wait);
> + finish_rcuwait(&vcpu->wait);
> cur = ktime_get();
> out:
> kvm_arch_vcpu_unblocking(vcpu);
> @@ -2738,11 +2737,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block);
>
> bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
> {
> - struct swait_queue_head *wqp;
> + struct rcuwait *wait;
>
> - wqp = kvm_arch_vcpu_wq(vcpu);
> - if (swq_has_sleeper(wqp)) {
> - swake_up_one(wqp);
> + wait = kvm_arch_vcpu_get_wait(vcpu);
> + if (rcuwait_wake_up(wait)) {
> WRITE_ONCE(vcpu->ready, true);
> ++vcpu->stat.halt_wakeup;
> return true;
> @@ -2884,7 +2882,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
> continue;
> if (vcpu == me)
> continue;
> - if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
> + if (rcu_dereference(vcpu->wait.task) &&
> + !vcpu_dy_runnable(vcpu))
> continue;
> if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
> !kvm_arch_vcpu_in_kernel(vcpu))
>

2020-04-22 10:53:36

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

On 2020-04-22 05:07, Davidlohr Bueso wrote:
> The use of any sort of waitqueue (simple or regular) for
> wait/waking vcpus has always been an overkill and semantically
> wrong. Because this is per-vcpu (which is blocked) there is
> only ever a single waiting vcpu, thus no need for any sort of
> queue.
>
> As such, make use of the rcuwait primitive, with the following
> considerations:
>
> - rcuwait already provides the proper barriers that serialize
> concurrent waiter and waker.
>
> - Task wakeup is done in rcu read critical region, with a
> stable task pointer.
>
> - Because there is no concurrency among waiters, we need
> not worry about rcuwait_wait_event() calls corrupting
> the wait->task. As a consequence, this saves the locking
> done in swait when modifying the queue. This also applies
> to per-vcore wait for powerpc kvm-hv.
>
> The x86 tscdeadline_latency test mentioned in 8577370fb0cb
> ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
> latency is reduced by around 15-20% with this change.
>
> Cc: Paul Mackerras <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Davidlohr Bueso <[email protected]>

Reviewed-by: Marc Zyngier <[email protected]>

Thanks,

M.
--
Who you jivin' with that Cosmik Debris?

2020-04-22 11:35:02

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v3 -tip 0/5] kvm: Use rcuwait for vcpu blocking

On Tue, Apr 21, 2020 at 09:07:34PM -0700, Davidlohr Bueso wrote:
> Davidlohr Bueso (5):
> rcuwait: Fix stale wake call name in comment
> rcuwait: Let rcuwait_wake_up() return whether or not a task was awoken
> rcuwait: Introduce prepare_to and finish_rcuwait
> kvm: Replace vcpu->swait with rcuwait
> sched/swait: Reword some of the main description

Thanks Dave!

2020-04-23 08:46:53

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

On Tue, 21 Apr 2020 21:07:38 -0700
Davidlohr Bueso <[email protected]> wrote:

> The use of any sort of waitqueue (simple or regular) for
> wait/waking vcpus has always been an overkill and semantically
> wrong. Because this is per-vcpu (which is blocked) there is
> only ever a single waiting vcpu, thus no need for any sort of
> queue.
>
> As such, make use of the rcuwait primitive, with the following
> considerations:
>
> - rcuwait already provides the proper barriers that serialize
> concurrent waiter and waker.
>
> - Task wakeup is done in rcu read critical region, with a
> stable task pointer.
>
> - Because there is no concurrency among waiters, we need
> not worry about rcuwait_wait_event() calls corrupting
> the wait->task. As a consequence, this saves the locking
> done in swait when modifying the queue. This also applies
> to per-vcore wait for powerpc kvm-hv.
>
> The x86 tscdeadline_latency test mentioned in 8577370fb0cb
> ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
> latency is reduced by around 15-20% with this change.
>
> Cc: Paul Mackerras <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Davidlohr Bueso <[email protected]>
> ---
> arch/mips/kvm/mips.c | 6 ++----
> arch/powerpc/include/asm/kvm_book3s.h | 2 +-
> arch/powerpc/include/asm/kvm_host.h | 2 +-
> arch/powerpc/kvm/book3s_hv.c | 22 ++++++++--------------
> arch/powerpc/kvm/powerpc.c | 2 +-
> arch/x86/kvm/lapic.c | 2 +-
> include/linux/kvm_host.h | 10 +++++-----
> virt/kvm/arm/arch_timer.c | 2 +-
> virt/kvm/arm/arm.c | 9 +++++----
> virt/kvm/async_pf.c | 3 +--
> virt/kvm/kvm_main.c | 19 +++++++++----------
> 11 files changed, 35 insertions(+), 44 deletions(-)

[...]

I should have tested it *before* acking it, really.

> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 93bd59b46848..b2805105bbe5 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
> if (map.emul_ptimer)
> soft_timer_cancel(&map.emul_ptimer->hrtimer);
>
> - if (swait_active(kvm_arch_vcpu_wq(vcpu)))
> + if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)

This doesn't compile (wrong function name, and rcu_dereference takes a
variable). But whatever it would do if we fixed it looks dodgy. it isn't
the rcuwait structure that you want to dereference, but rcuwait->task
(we are checking whether we are called because we are blocking or being
preempted).

> kvm_timer_blocking(vcpu);
>
> /*
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 48d0ec44ad77..f94a10bb1251 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)
>
> kvm_for_each_vcpu(i, vcpu, kvm) {
> vcpu->arch.pause = false;
> - swake_up_one(kvm_arch_vcpu_wq(vcpu));
> + rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
> }
> }
>
> static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
> {
> - struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
> + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
>
> - swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
> - (!vcpu->arch.pause)));
> + rcuwait_wait_event(*wait,
> + (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
> + TASK_INTERRUPTIBLE);

As noticed by the kbuild robot, this doesn't compile either.

I fixed it as follow, and it survived a very basic test run in a model
(more testing later).

Thanks,

M.

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index b2805105bbe56..2dbd14dcae9fb 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -569,6 +569,7 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)

void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
struct timer_map map;

@@ -593,7 +594,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
if (map.emul_ptimer)
soft_timer_cancel(&map.emul_ptimer->hrtimer);

- if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
+ if (rcu_dereference(wait->task))
kvm_timer_blocking(vcpu);

/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f94a10bb1251b..479f36d02418d 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -587,7 +587,7 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
{
struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);

- rcuwait_wait_event(*wait,
+ rcuwait_wait_event(wait,
(!vcpu->arch.power_off) &&(!vcpu->arch.pause),
TASK_INTERRUPTIBLE);


--
Jazz is not dead. It just smells funny...

2020-04-23 09:02:37

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

On 23/04/20 10:41, Marc Zyngier wrote:
>>
>> - if (swait_active(kvm_arch_vcpu_wq(vcpu)))
>> + if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
> This doesn't compile (wrong function name, and rcu_dereference takes a
> variable). But whatever it would do if we fixed it looks dodgy. it isn't
> the rcuwait structure that you want to dereference, but rcuwait->task
> (we are checking whether we are called because we are blocking or being
> preempted).
>

Yes, I agree. Replacing swait with rcuwait is all good, but please make
the API look the same first. Just like you added prepare_to_rcuwait and
finish_rcuwait, let's add rcuwait_active as well.

Actually let's do it like this:

1) Davidlohr, please post only patches 1-3 to "equalize" the swait and
rcuwait APIs.

2) Peter, please prepare a topic branch for those, or provide Acked-by

3) let's get everything else through the KVM tree.

Thanks,

Paolo

2020-04-23 09:21:34

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

On Thu, Apr 23, 2020 at 10:57:57AM +0200, Paolo Bonzini wrote:
> On 23/04/20 10:41, Marc Zyngier wrote:
> >>
> >> - if (swait_active(kvm_arch_vcpu_wq(vcpu)))
> >> + if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
> > This doesn't compile (wrong function name, and rcu_dereference takes a
> > variable). But whatever it would do if we fixed it looks dodgy. it isn't
> > the rcuwait structure that you want to dereference, but rcuwait->task
> > (we are checking whether we are called because we are blocking or being
> > preempted).
> >
>
> Yes, I agree. Replacing swait with rcuwait is all good, but please make
> the API look the same first. Just like you added prepare_to_rcuwait and
> finish_rcuwait, let's add rcuwait_active as well.
>
> Actually let's do it like this:
>
> 1) Davidlohr, please post only patches 1-3 to "equalize" the swait and
> rcuwait APIs.
>
> 2) Peter, please prepare a topic branch for those, or provide Acked-by

I don't think I have anything that conflicts with this, so sure, take
the whole thing through KVM.

For 1-3 (and I'll send a small niggle for 3 right after this):

Acked-by: Peter Zijlstra (Intel) <[email protected]>

I'll keep 5 as it is unrelated.

2020-04-23 09:28:08

by Paolo Bonzini

[permalink] [raw]
Subject: Re: [PATCH 4/5] kvm: Replace vcpu->swait with rcuwait

On 23/04/20 11:19, Peter Zijlstra wrote:
>>
>> 1) Davidlohr, please post only patches 1-3 to "equalize" the swait and
>> rcuwait APIs.
>>
>> 2) Peter, please prepare a topic branch for those, or provide Acked-by
> I don't think I have anything that conflicts with this, so sure, take
> the whole thing through KVM.
>
> For 1-3 (and I'll send a small niggle for 3 right after this):
>
> Acked-by: Peter Zijlstra (Intel) <[email protected]>

Great thanks. I assume you have no issue with rcuwait_active either.

Paolo

> I'll keep 5 as it is unrelated.
>

Subject: [tip: sched/core] sched/swait: Reword some of the main description

The following commit has been merged into the sched/core branch of tip:

Commit-ID: 12ac6782a40ad7636b6ef45680741825b64ab221
Gitweb: https://git.kernel.org/tip/12ac6782a40ad7636b6ef45680741825b64ab221
Author: Davidlohr Bueso <[email protected]>
AuthorDate: Tue, 21 Apr 2020 21:07:39 -07:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Thu, 30 Apr 2020 20:14:41 +02:00

sched/swait: Reword some of the main description

With both the increased use of swait and kvm no longer using
it, we can reword some of the comments. While removing Linus'
valid rant, I've also cared to explicitly mention that swait
is very different than regular wait. In addition it is
mentioned against using swait in favor of the regular flavor.

Signed-off-by: Davidlohr Bueso <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
include/linux/swait.h | 23 +++++------------------
1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/include/linux/swait.h b/include/linux/swait.h
index 73e06e9..6a8c22b 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -9,23 +9,10 @@
#include <asm/current.h>

/*
- * BROKEN wait-queues.
- *
- * These "simple" wait-queues are broken garbage, and should never be
- * used. The comments below claim that they are "similar" to regular
- * wait-queues, but the semantics are actually completely different, and
- * every single user we have ever had has been buggy (or pointless).
- *
- * A "swake_up_one()" only wakes up _one_ waiter, which is not at all what
- * "wake_up()" does, and has led to problems. In other cases, it has
- * been fine, because there's only ever one waiter (kvm), but in that
- * case gthe whole "simple" wait-queue is just pointless to begin with,
- * since there is no "queue". Use "wake_up_process()" with a direct
- * pointer instead.
- *
- * While these are very similar to regular wait queues (wait.h) the most
- * important difference is that the simple waitqueue allows for deterministic
- * behaviour -- IOW it has strictly bounded IRQ and lock hold times.
+ * Simple waitqueues are semantically very different to regular wait queues
+ * (wait.h). The most important difference is that the simple waitqueue allows
+ * for deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold
+ * times.
*
* Mainly, this is accomplished by two things. Firstly not allowing swake_up_all
* from IRQ disabled, and dropping the lock upon every wakeup, giving a higher
@@ -39,7 +26,7 @@
* sleeper state.
*
* - the !exclusive mode; because that leads to O(n) wakeups, everything is
- * exclusive.
+ * exclusive. As such swake_up_one will only ever awake _one_ waiter.
*
* - custom wake callback functions; because you cannot give any guarantees
* about random code. This also allows swait to be used in RT, such that