2021-05-06 13:26:24

by Thomas Gleixner

[permalink] [raw]
Subject: KVM: x86: Prevent deadlock against tk_core.seq

syzbot reported a possible deadlock in pvclock_gtod_notify():

CPU 0 CPU 1
write_seqcount_begin(&tk_core.seq);
pvclock_gtod_notify() spin_lock(&pool->lock);
queue_work(..., &pvclock_gtod_work) ktime_get()
spin_lock(&pool->lock); do {
seq = read_seqcount_begin(tk_core.seq)
...
} while (read_seqcount_retry(&tk_core.seq, seq);

While this is unlikely to happen, it's possible.

Delegate queue_work() to irq_work() which postpones it until the
tk_core.seq write held region is left and interrupts are reenabled.

Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
Reported-by: [email protected]
Signed-off-by: Thomas Gleixner <[email protected]>
---
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/kvm/x86.c | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8040,6 +8040,18 @@ static void pvclock_gtod_update_fn(struc
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);

/*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+ queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
+/*
* Notification about pvclock gtod data update.
*/
static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
@@ -8050,13 +8062,14 @@ static int pvclock_gtod_notify(struct no

update_pvclock_gtod(tk);

- /* disable master clock if host does not trust, or does not
- * use, TSC based clocksource.
+ /*
+ * Disable master clock if host does not trust, or does not use,
+ * TSC based clocksource. Delegate queue_work() to irq_work as
+ * this is invoked with tk_core.seq write held.
*/
if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
atomic_read(&kvm_guest_has_master_clock) != 0)
- queue_work(system_long_wq, &pvclock_gtod_work);
-
+ irq_work_queue(&pvclock_irq_work);
return 0;
}

@@ -8168,6 +8181,7 @@ void kvm_arch_exit(void)
cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
#ifdef CONFIG_X86_64
pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+ irq_work_sync(&pvclock_irq_work);
cancel_work_sync(&pvclock_gtod_work);
#endif
kvm_x86_ops.hardware_enable = NULL;


2021-05-06 13:41:08

by Peter Zijlstra

[permalink] [raw]
Subject: Re: KVM: x86: Prevent deadlock against tk_core.seq

On Thu, May 06, 2021 at 03:21:37PM +0200, Thomas Gleixner wrote:
> syzbot reported a possible deadlock in pvclock_gtod_notify():
>
> CPU 0 CPU 1
> write_seqcount_begin(&tk_core.seq);
> pvclock_gtod_notify() spin_lock(&pool->lock);
> queue_work(..., &pvclock_gtod_work) ktime_get()
> spin_lock(&pool->lock); do {
> seq = read_seqcount_begin(tk_core.seq)
> ...
> } while (read_seqcount_retry(&tk_core.seq, seq);
>
> While this is unlikely to happen, it's possible.
>
> Delegate queue_work() to irq_work() which postpones it until the
> tk_core.seq write held region is left and interrupts are reenabled.
>
> Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
> Reported-by: [email protected]
> Signed-off-by: Thomas Gleixner <[email protected]>

Acked-by: Peter Zijlstra (Intel) <[email protected]>

2021-05-06 18:45:55

by Paolo Bonzini

[permalink] [raw]
Subject: Re: KVM: x86: Prevent deadlock against tk_core.seq

On 06/05/21 15:21, Thomas Gleixner wrote:
> syzbot reported a possible deadlock in pvclock_gtod_notify():
>
> CPU 0 CPU 1
> write_seqcount_begin(&tk_core.seq);
> pvclock_gtod_notify() spin_lock(&pool->lock);
> queue_work(..., &pvclock_gtod_work) ktime_get()
> spin_lock(&pool->lock); do {
> seq = read_seqcount_begin(tk_core.seq)
> ...
> } while (read_seqcount_retry(&tk_core.seq, seq);
>
> While this is unlikely to happen, it's possible.
>
> Delegate queue_work() to irq_work() which postpones it until the
> tk_core.seq write held region is left and interrupts are reenabled.
>
> Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
> Reported-by: [email protected]
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> Link: https://lore.kernel.org/r/[email protected]
> ---
> arch/x86/kvm/x86.c | 22 ++++++++++++++++++----
> 1 file changed, 18 insertions(+), 4 deletions(-)
>
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -8040,6 +8040,18 @@ static void pvclock_gtod_update_fn(struc
> static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
>
> /*
> + * Indirection to move queue_work() out of the tk_core.seq write held
> + * region to prevent possible deadlocks against time accessors which
> + * are invoked with work related locks held.
> + */
> +static void pvclock_irq_work_fn(struct irq_work *w)
> +{
> + queue_work(system_long_wq, &pvclock_gtod_work);
> +}
> +
> +static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
> +
> +/*
> * Notification about pvclock gtod data update.
> */
> static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
> @@ -8050,13 +8062,14 @@ static int pvclock_gtod_notify(struct no
>
> update_pvclock_gtod(tk);
>
> - /* disable master clock if host does not trust, or does not
> - * use, TSC based clocksource.
> + /*
> + * Disable master clock if host does not trust, or does not use,
> + * TSC based clocksource. Delegate queue_work() to irq_work as
> + * this is invoked with tk_core.seq write held.
> */
> if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
> atomic_read(&kvm_guest_has_master_clock) != 0)
> - queue_work(system_long_wq, &pvclock_gtod_work);
> -
> + irq_work_queue(&pvclock_irq_work);
> return 0;
> }
>
> @@ -8168,6 +8181,7 @@ void kvm_arch_exit(void)
> cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
> #ifdef CONFIG_X86_64
> pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
> + irq_work_sync(&pvclock_irq_work);
> cancel_work_sync(&pvclock_gtod_work);
> #endif
> kvm_x86_ops.hardware_enable = NULL;
>

Queued, thanks.

Paolo