2014-12-10 13:22:51

by David Hildenbrand

[permalink] [raw]
Subject: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

Commit b2c4623dcd07 ("rcu: More on deadlock between CPU hotplug and expedited
grace periods") introduced another problem that can easily be reproduced by
starting/stopping cpus in a loop.

E.g.:
for i in `seq 5000`; do
echo 1 > /sys/devices/system/cpu/cpu1/online
echo 0 > /sys/devices/system/cpu/cpu1/online
done

Will result in:
INFO: task /cpu_start_stop:1 blocked for more than 120 seconds.
Call Trace:
([<00000000006a028e>] __schedule+0x406/0x91c)
[<0000000000130f60>] cpu_hotplug_begin+0xd0/0xd4
[<0000000000130ff6>] _cpu_up+0x3e/0x1c4
[<0000000000131232>] cpu_up+0xb6/0xd4
[<00000000004a5720>] device_online+0x80/0xc0
[<00000000004a57f0>] online_store+0x90/0xb0
...

And a deadlock.

Problem is that if the last ref in put_online_cpus() can't get the
cpu_hotplug.lock the puts_pending count is incremented, but a sleeping
active_writer might never be woken up, therefore never exiting the loop in
cpu_hotplug_begin().

This fix removes puts_pending and turns refcount into an atomic variable. We
also introduce a wait queue for the active_writer, to avoid possible races and
use-after-free. There is no need to take the lock in put_online_cpus() anymore.

Also rearrange the lockdep anotations so we won't get false positives.

Can't reproduce it with this fix.

Signed-off-by: David Hildenbrand <[email protected]>
---
kernel/cpu.c | 60 ++++++++++++++++++++++++++----------------------------------
1 file changed, 26 insertions(+), 34 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 5d22023..3f1d5ba 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -58,22 +58,23 @@ static int cpu_hotplug_disabled;

static struct {
struct task_struct *active_writer;
- struct mutex lock; /* Synchronizes accesses to refcount, */
+ /* wait queue to wake up the active_writer */
+ wait_queue_head_t wq;
+ /* verifies that no writer will get active while readers are active */
+ struct mutex lock;
/*
* Also blocks the new readers during
* an ongoing cpu hotplug operation.
*/
- int refcount;
- /* And allows lockless put_online_cpus(). */
- atomic_t puts_pending;
+ atomic_t refcount;

#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
} cpu_hotplug = {
.active_writer = NULL,
+ .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
- .refcount = 0,
#ifdef CONFIG_DEBUG_LOCK_ALLOC
.dep_map = {.name = "cpu_hotplug.lock" },
#endif
@@ -86,15 +87,6 @@ static struct {
#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)

-static void apply_puts_pending(int max)
-{
- int delta;
-
- if (atomic_read(&cpu_hotplug.puts_pending) >= max) {
- delta = atomic_xchg(&cpu_hotplug.puts_pending, 0);
- cpu_hotplug.refcount -= delta;
- }
-}

void get_online_cpus(void)
{
@@ -103,9 +95,9 @@ void get_online_cpus(void)
return;
cpuhp_lock_acquire_read();
mutex_lock(&cpu_hotplug.lock);
- apply_puts_pending(65536);
- cpu_hotplug.refcount++;
+ atomic_inc(&cpu_hotplug.refcount);
mutex_unlock(&cpu_hotplug.lock);
+ cpuhp_lock_release();
}
EXPORT_SYMBOL_GPL(get_online_cpus);

@@ -116,9 +108,9 @@ bool try_get_online_cpus(void)
if (!mutex_trylock(&cpu_hotplug.lock))
return false;
cpuhp_lock_acquire_tryread();
- apply_puts_pending(65536);
- cpu_hotplug.refcount++;
+ atomic_inc(&cpu_hotplug.refcount);
mutex_unlock(&cpu_hotplug.lock);
+ cpuhp_lock_release();
return true;
}
EXPORT_SYMBOL_GPL(try_get_online_cpus);
@@ -127,20 +119,16 @@ void put_online_cpus(void)
{
if (cpu_hotplug.active_writer == current)
return;
- if (!mutex_trylock(&cpu_hotplug.lock)) {
- atomic_inc(&cpu_hotplug.puts_pending);
- cpuhp_lock_release();
- return;
- }
-
- if (WARN_ON(!cpu_hotplug.refcount))
- cpu_hotplug.refcount++; /* try to fix things up */

- if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
- wake_up_process(cpu_hotplug.active_writer);
- mutex_unlock(&cpu_hotplug.lock);
- cpuhp_lock_release();
+ if (atomic_dec_and_test(&cpu_hotplug.refcount) &&
+ waitqueue_active(&cpu_hotplug.wq))
+ wake_up(&cpu_hotplug.wq);

+ /* missing get - try to fix things up */
+ if (WARN_ON(atomic_read(&cpu_hotplug.refcount) < 0))
+ atomic_set(&cpu_hotplug.refcount, 0);
+ if (waitqueue_active(&cpu_hotplug.wq))
+ wake_up(&cpu_hotplug.wq);
}
EXPORT_SYMBOL_GPL(put_online_cpus);

@@ -168,18 +156,22 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
*/
void cpu_hotplug_begin(void)
{
+ DEFINE_WAIT(wait);
+
cpu_hotplug.active_writer = current;

- cpuhp_lock_acquire();
for (;;) {
+ cpuhp_lock_acquire();
mutex_lock(&cpu_hotplug.lock);
- apply_puts_pending(1);
- if (likely(!cpu_hotplug.refcount))
+ prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
+ if (likely(!atomic_read(&cpu_hotplug.refcount)))
break;
- __set_current_state(TASK_UNINTERRUPTIBLE);
mutex_unlock(&cpu_hotplug.lock);
+ cpuhp_lock_release();
schedule();
}
+
+ finish_wait(&cpu_hotplug.wq, &wait);
}

void cpu_hotplug_done(void)
--
1.8.5.5


2014-12-10 13:26:28

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

> Commit b2c4623dcd07 ("rcu: More on deadlock between CPU hotplug and expedited
> grace periods") introduced another problem that can easily be reproduced by
> starting/stopping cpus in a loop.
>
> E.g.:
> for i in `seq 5000`; do
> echo 1 > /sys/devices/system/cpu/cpu1/online
> echo 0 > /sys/devices/system/cpu/cpu1/online
> done
>
> Will result in:
> INFO: task /cpu_start_stop:1 blocked for more than 120 seconds.
> Call Trace:
> ([<00000000006a028e>] __schedule+0x406/0x91c)
> [<0000000000130f60>] cpu_hotplug_begin+0xd0/0xd4
> [<0000000000130ff6>] _cpu_up+0x3e/0x1c4
> [<0000000000131232>] cpu_up+0xb6/0xd4
> [<00000000004a5720>] device_online+0x80/0xc0
> [<00000000004a57f0>] online_store+0x90/0xb0
> ...
>
> And a deadlock.
>
> Problem is that if the last ref in put_online_cpus() can't get the
> cpu_hotplug.lock the puts_pending count is incremented, but a sleeping
> active_writer might never be woken up, therefore never exiting the loop in
> cpu_hotplug_begin().
>
> This fix removes puts_pending and turns refcount into an atomic variable. We
> also introduce a wait queue for the active_writer, to avoid possible races and
> use-after-free. There is no need to take the lock in put_online_cpus() anymore.
>
> Also rearrange the lockdep anotations so we won't get false positives.
>
> Can't reproduce it with this fix.
>
> Signed-off-by: David Hildenbrand <[email protected]>
> ---
> kernel/cpu.c | 60 ++++++++++++++++++++++++++----------------------------------
> 1 file changed, 26 insertions(+), 34 deletions(-)
>
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 5d22023..3f1d5ba 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -58,22 +58,23 @@ static int cpu_hotplug_disabled;
>
> static struct {
> struct task_struct *active_writer;
> - struct mutex lock; /* Synchronizes accesses to refcount, */
> + /* wait queue to wake up the active_writer */
> + wait_queue_head_t wq;
> + /* verifies that no writer will get active while readers are active */
> + struct mutex lock;
> /*
> * Also blocks the new readers during
> * an ongoing cpu hotplug operation.
> */
> - int refcount;
> - /* And allows lockless put_online_cpus(). */
> - atomic_t puts_pending;
> + atomic_t refcount;
>
> #ifdef CONFIG_DEBUG_LOCK_ALLOC
> struct lockdep_map dep_map;
> #endif
> } cpu_hotplug = {
> .active_writer = NULL,
> + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
> .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
> - .refcount = 0,
> #ifdef CONFIG_DEBUG_LOCK_ALLOC
> .dep_map = {.name = "cpu_hotplug.lock" },
> #endif
> @@ -86,15 +87,6 @@ static struct {
> #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
> #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
>
> -static void apply_puts_pending(int max)
> -{
> - int delta;
> -
> - if (atomic_read(&cpu_hotplug.puts_pending) >= max) {
> - delta = atomic_xchg(&cpu_hotplug.puts_pending, 0);
> - cpu_hotplug.refcount -= delta;
> - }
> -}
>
> void get_online_cpus(void)
> {
> @@ -103,9 +95,9 @@ void get_online_cpus(void)
> return;
> cpuhp_lock_acquire_read();
> mutex_lock(&cpu_hotplug.lock);
> - apply_puts_pending(65536);
> - cpu_hotplug.refcount++;
> + atomic_inc(&cpu_hotplug.refcount);
> mutex_unlock(&cpu_hotplug.lock);
> + cpuhp_lock_release();
> }
> EXPORT_SYMBOL_GPL(get_online_cpus);
>
> @@ -116,9 +108,9 @@ bool try_get_online_cpus(void)
> if (!mutex_trylock(&cpu_hotplug.lock))
> return false;
> cpuhp_lock_acquire_tryread();
> - apply_puts_pending(65536);
> - cpu_hotplug.refcount++;
> + atomic_inc(&cpu_hotplug.refcount);
> mutex_unlock(&cpu_hotplug.lock);
> + cpuhp_lock_release();
> return true;
> }
> EXPORT_SYMBOL_GPL(try_get_online_cpus);
> @@ -127,20 +119,16 @@ void put_online_cpus(void)
> {
> if (cpu_hotplug.active_writer == current)
> return;
> - if (!mutex_trylock(&cpu_hotplug.lock)) {
> - atomic_inc(&cpu_hotplug.puts_pending);
> - cpuhp_lock_release();
> - return;
> - }
> -
> - if (WARN_ON(!cpu_hotplug.refcount))
> - cpu_hotplug.refcount++; /* try to fix things up */
>
> - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
> - wake_up_process(cpu_hotplug.active_writer);
> - mutex_unlock(&cpu_hotplug.lock);
> - cpuhp_lock_release();
> + if (atomic_dec_and_test(&cpu_hotplug.refcount) &&
> + waitqueue_active(&cpu_hotplug.wq))
> + wake_up(&cpu_hotplug.wq);
>
> + /* missing get - try to fix things up */

The only thing that is bugging me is this part. Without the lock we can't
guarantee that another get_online_cpus() just arrived and bumped the refcount
to 0.

Of course this only applies to misuse of put/get_online_cpus.

We could hack some loop that tries to cmp_xchng with the old value until it
fits to work around this, but wouldn't make the code any better readable.

> + if (WARN_ON(atomic_read(&cpu_hotplug.refcount) < 0))
> + atomic_set(&cpu_hotplug.refcount, 0);
> + if (waitqueue_active(&cpu_hotplug.wq))
> + wake_up(&cpu_hotplug.wq);
> }
> EXPORT_SYMBOL_GPL(put_online_cpus);
>
> @@ -168,18 +156,22 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
> */
> void cpu_hotplug_begin(void)
> {
> + DEFINE_WAIT(wait);
> +
> cpu_hotplug.active_writer = current;
>
> - cpuhp_lock_acquire();
> for (;;) {
> + cpuhp_lock_acquire();
> mutex_lock(&cpu_hotplug.lock);
> - apply_puts_pending(1);
> - if (likely(!cpu_hotplug.refcount))
> + prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
> + if (likely(!atomic_read(&cpu_hotplug.refcount)))
> break;
> - __set_current_state(TASK_UNINTERRUPTIBLE);
> mutex_unlock(&cpu_hotplug.lock);
> + cpuhp_lock_release();
> schedule();
> }
> +
> + finish_wait(&cpu_hotplug.wq, &wait);
> }
>
> void cpu_hotplug_done(void)



David

2014-12-10 16:12:33

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

On Wed, Dec 10, 2014 at 02:26:19PM +0100, David Hildenbrand wrote:
> > Commit b2c4623dcd07 ("rcu: More on deadlock between CPU hotplug and expedited
> > grace periods") introduced another problem that can easily be reproduced by
> > starting/stopping cpus in a loop.
> >
> > E.g.:
> > for i in `seq 5000`; do
> > echo 1 > /sys/devices/system/cpu/cpu1/online
> > echo 0 > /sys/devices/system/cpu/cpu1/online
> > done
> >
> > Will result in:
> > INFO: task /cpu_start_stop:1 blocked for more than 120 seconds.
> > Call Trace:
> > ([<00000000006a028e>] __schedule+0x406/0x91c)
> > [<0000000000130f60>] cpu_hotplug_begin+0xd0/0xd4
> > [<0000000000130ff6>] _cpu_up+0x3e/0x1c4
> > [<0000000000131232>] cpu_up+0xb6/0xd4
> > [<00000000004a5720>] device_online+0x80/0xc0
> > [<00000000004a57f0>] online_store+0x90/0xb0
> > ...
> >
> > And a deadlock.
> >
> > Problem is that if the last ref in put_online_cpus() can't get the
> > cpu_hotplug.lock the puts_pending count is incremented, but a sleeping
> > active_writer might never be woken up, therefore never exiting the loop in
> > cpu_hotplug_begin().
> >
> > This fix removes puts_pending and turns refcount into an atomic variable. We
> > also introduce a wait queue for the active_writer, to avoid possible races and
> > use-after-free. There is no need to take the lock in put_online_cpus() anymore.
> >
> > Also rearrange the lockdep anotations so we won't get false positives.
> >
> > Can't reproduce it with this fix.
> >
> > Signed-off-by: David Hildenbrand <[email protected]>
> > ---
> > kernel/cpu.c | 60 ++++++++++++++++++++++++++----------------------------------
> > 1 file changed, 26 insertions(+), 34 deletions(-)
> >
> > diff --git a/kernel/cpu.c b/kernel/cpu.c
> > index 5d22023..3f1d5ba 100644
> > --- a/kernel/cpu.c
> > +++ b/kernel/cpu.c
> > @@ -58,22 +58,23 @@ static int cpu_hotplug_disabled;
> >
> > static struct {
> > struct task_struct *active_writer;
> > - struct mutex lock; /* Synchronizes accesses to refcount, */
> > + /* wait queue to wake up the active_writer */
> > + wait_queue_head_t wq;
> > + /* verifies that no writer will get active while readers are active */
> > + struct mutex lock;
> > /*
> > * Also blocks the new readers during
> > * an ongoing cpu hotplug operation.
> > */
> > - int refcount;
> > - /* And allows lockless put_online_cpus(). */
> > - atomic_t puts_pending;
> > + atomic_t refcount;
> >
> > #ifdef CONFIG_DEBUG_LOCK_ALLOC
> > struct lockdep_map dep_map;
> > #endif
> > } cpu_hotplug = {
> > .active_writer = NULL,
> > + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
> > .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
> > - .refcount = 0,
> > #ifdef CONFIG_DEBUG_LOCK_ALLOC
> > .dep_map = {.name = "cpu_hotplug.lock" },
> > #endif
> > @@ -86,15 +87,6 @@ static struct {
> > #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
> > #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
> >
> > -static void apply_puts_pending(int max)
> > -{
> > - int delta;
> > -
> > - if (atomic_read(&cpu_hotplug.puts_pending) >= max) {
> > - delta = atomic_xchg(&cpu_hotplug.puts_pending, 0);
> > - cpu_hotplug.refcount -= delta;
> > - }
> > -}
> >
> > void get_online_cpus(void)
> > {
> > @@ -103,9 +95,9 @@ void get_online_cpus(void)
> > return;
> > cpuhp_lock_acquire_read();
> > mutex_lock(&cpu_hotplug.lock);
> > - apply_puts_pending(65536);
> > - cpu_hotplug.refcount++;
> > + atomic_inc(&cpu_hotplug.refcount);
> > mutex_unlock(&cpu_hotplug.lock);
> > + cpuhp_lock_release();
> > }
> > EXPORT_SYMBOL_GPL(get_online_cpus);
> >
> > @@ -116,9 +108,9 @@ bool try_get_online_cpus(void)
> > if (!mutex_trylock(&cpu_hotplug.lock))
> > return false;
> > cpuhp_lock_acquire_tryread();
> > - apply_puts_pending(65536);
> > - cpu_hotplug.refcount++;
> > + atomic_inc(&cpu_hotplug.refcount);
> > mutex_unlock(&cpu_hotplug.lock);
> > + cpuhp_lock_release();
> > return true;
> > }
> > EXPORT_SYMBOL_GPL(try_get_online_cpus);
> > @@ -127,20 +119,16 @@ void put_online_cpus(void)
> > {
> > if (cpu_hotplug.active_writer == current)
> > return;
> > - if (!mutex_trylock(&cpu_hotplug.lock)) {
> > - atomic_inc(&cpu_hotplug.puts_pending);
> > - cpuhp_lock_release();
> > - return;
> > - }
> > -
> > - if (WARN_ON(!cpu_hotplug.refcount))
> > - cpu_hotplug.refcount++; /* try to fix things up */
> >
> > - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
> > - wake_up_process(cpu_hotplug.active_writer);
> > - mutex_unlock(&cpu_hotplug.lock);
> > - cpuhp_lock_release();
> > + if (atomic_dec_and_test(&cpu_hotplug.refcount) &&
> > + waitqueue_active(&cpu_hotplug.wq))
> > + wake_up(&cpu_hotplug.wq);
> >
> > + /* missing get - try to fix things up */
>
> The only thing that is bugging me is this part. Without the lock we can't
> guarantee that another get_online_cpus() just arrived and bumped the refcount
> to 0.
>
> Of course this only applies to misuse of put/get_online_cpus.
>
> We could hack some loop that tries to cmp_xchng with the old value until it
> fits to work around this, but wouldn't make the code any better readable.

Well, we didn't have this diagnostic in the original, so one approach
would be to simply leave it out. Another approach would be to just
have the WARN_ON() without the attempt to fix it up.

Thanx, Paul

> > + if (WARN_ON(atomic_read(&cpu_hotplug.refcount) < 0))
> > + atomic_set(&cpu_hotplug.refcount, 0);
> > + if (waitqueue_active(&cpu_hotplug.wq))
> > + wake_up(&cpu_hotplug.wq);
> > }
> > EXPORT_SYMBOL_GPL(put_online_cpus);
> >
> > @@ -168,18 +156,22 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
> > */
> > void cpu_hotplug_begin(void)
> > {
> > + DEFINE_WAIT(wait);
> > +
> > cpu_hotplug.active_writer = current;
> >
> > - cpuhp_lock_acquire();
> > for (;;) {
> > + cpuhp_lock_acquire();
> > mutex_lock(&cpu_hotplug.lock);
> > - apply_puts_pending(1);
> > - if (likely(!cpu_hotplug.refcount))
> > + prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
> > + if (likely(!atomic_read(&cpu_hotplug.refcount)))
> > break;
> > - __set_current_state(TASK_UNINTERRUPTIBLE);
> > mutex_unlock(&cpu_hotplug.lock);
> > + cpuhp_lock_release();
> > schedule();
> > }
> > +
> > + finish_wait(&cpu_hotplug.wq, &wait);
> > }
> >
> > void cpu_hotplug_done(void)
>
>
>
> David

2014-12-10 17:51:46

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

On 12/10, David Hildenbrand wrote:
>
> @@ -127,20 +119,16 @@ void put_online_cpus(void)
> {
> if (cpu_hotplug.active_writer == current)
> return;
> - if (!mutex_trylock(&cpu_hotplug.lock)) {
> - atomic_inc(&cpu_hotplug.puts_pending);
> - cpuhp_lock_release();
> - return;
> - }
> -
> - if (WARN_ON(!cpu_hotplug.refcount))
> - cpu_hotplug.refcount++; /* try to fix things up */
>
> - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
> - wake_up_process(cpu_hotplug.active_writer);
> - mutex_unlock(&cpu_hotplug.lock);
> - cpuhp_lock_release();
> + if (atomic_dec_and_test(&cpu_hotplug.refcount) &&
> + waitqueue_active(&cpu_hotplug.wq))
> + wake_up(&cpu_hotplug.wq);

OK, waitqueue_active() looks safe... prepare_to_wait() has a barrier.

> void cpu_hotplug_begin(void)
> {
> + DEFINE_WAIT(wait);
> +
> cpu_hotplug.active_writer = current;
>
> - cpuhp_lock_acquire();
> for (;;) {
> + cpuhp_lock_acquire();

not sure I understand why did you move cpuhp_lock_acquire() into
the loop, but this is minor.

> mutex_lock(&cpu_hotplug.lock);
> - apply_puts_pending(1);
> - if (likely(!cpu_hotplug.refcount))
> + prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
> + if (likely(!atomic_read(&cpu_hotplug.refcount)))
> break;
> - __set_current_state(TASK_UNINTERRUPTIBLE);
> mutex_unlock(&cpu_hotplug.lock);
> + cpuhp_lock_release();
> schedule();
> }
> +
> + finish_wait(&cpu_hotplug.wq, &wait);
> }

This is subjective, but how about

static bool xxx(void)
{
mutex_lock(&cpu_hotplug.lock);
if (atomic_read(&cpu_hotplug.refcount) == 0)
return true;
mutex_unlock(&cpu_hotplug.lock);
return false;
}

void cpu_hotplug_begin(void)
{
cpu_hotplug.active_writer = current;

cpuhp_lock_acquire();
wait_event(&cpu_hotplug.wq, xxx());
}

instead?

Oleg.

2014-12-10 19:21:46

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

> On 12/10, David Hildenbrand wrote:
> >
> > @@ -127,20 +119,16 @@ void put_online_cpus(void)
> > {
> > if (cpu_hotplug.active_writer == current)
> > return;
> > - if (!mutex_trylock(&cpu_hotplug.lock)) {
> > - atomic_inc(&cpu_hotplug.puts_pending);
> > - cpuhp_lock_release();
> > - return;
> > - }
> > -
> > - if (WARN_ON(!cpu_hotplug.refcount))
> > - cpu_hotplug.refcount++; /* try to fix things up */
> >
> > - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
> > - wake_up_process(cpu_hotplug.active_writer);
> > - mutex_unlock(&cpu_hotplug.lock);
> > - cpuhp_lock_release();
> > + if (atomic_dec_and_test(&cpu_hotplug.refcount) &&
> > + waitqueue_active(&cpu_hotplug.wq))
> > + wake_up(&cpu_hotplug.wq);
>
> OK, waitqueue_active() looks safe... prepare_to_wait() has a barrier.
>
> > void cpu_hotplug_begin(void)
> > {
> > + DEFINE_WAIT(wait);
> > +
> > cpu_hotplug.active_writer = current;
> >
> > - cpuhp_lock_acquire();
> > for (;;) {
> > + cpuhp_lock_acquire();
>
> not sure I understand why did you move cpuhp_lock_acquire() into
> the loop, but this is minor.

Well I got some lockdep issues and this way I was able to solve them.
(complain about same thread that called cpu_hotplug_begin() calling
put_online_cpus(), so we have to correctly tell lockdep when we get an release
the lock).

So I guess I also need that in the loop, or am I wrong (due to
cpuhp_lock_release())?

>
> > mutex_lock(&cpu_hotplug.lock);
> > - apply_puts_pending(1);
> > - if (likely(!cpu_hotplug.refcount))
> > + prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
> > + if (likely(!atomic_read(&cpu_hotplug.refcount)))
> > break;
> > - __set_current_state(TASK_UNINTERRUPTIBLE);
> > mutex_unlock(&cpu_hotplug.lock);
> > + cpuhp_lock_release();
> > schedule();
> > }
> > +
> > + finish_wait(&cpu_hotplug.wq, &wait);
> > }
>
> This is subjective, but how about
>
> static bool xxx(void)
> {
> mutex_lock(&cpu_hotplug.lock);
> if (atomic_read(&cpu_hotplug.refcount) == 0)
> return true;
> mutex_unlock(&cpu_hotplug.lock);
> return false;
> }
>
> void cpu_hotplug_begin(void)
> {
> cpu_hotplug.active_writer = current;
>
> cpuhp_lock_acquire();
> wait_event(&cpu_hotplug.wq, xxx());
> }
>
> instead?
>

What I don't like about that suggestion is that the mutex_lock() happens in
another level of indirection, so by looking at cpu_hotplug_begin() it isn't
obvious that that lock remains locked after this function has been called.

On the other hand this is really a compact one (+ possibly lockdep
annotations) :) .

> Oleg.
>

It is important that we do the state change to TASK_UNINTERRUPTIBLE prior to
checking for the condition.

Is it guaranteed with wait_event() that things like the following won't happen?

1. CPU1 wakes up the wq (refcount == 0)
2. CPU2 calls get_online_cpus() and increments refcount. (refcount == 1)
2. CPU3 executes xxx() up to "return false;" and gets scheduled away
3. CPU2 calls put_online_cpus(), decrementing the refcount (refcount == 0)
-> waitqueue not active -> no wake up
4. CPU3 continues executing and sleeps
-> refcount == 0 but writer is not woken up

Saying, does wait_event() take care wakeups while executing xxx()?
(w.g. activating the wait queue, setting TASK_UNINTERRUPTIBLE just before
calling xxx())

In my code, this is guaranteed by calling
prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE); prior to checking for the condition.

If that is guaranteed, this would work. Will verify that tomorrow.

Thanks a lot!

David

2014-12-10 19:23:54

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

only thing that is bugging me is this part. Without the lock we can't
> > guarantee that another get_online_cpus() just arrived and bumped the refcount
> > to 0.
> >
> > Of course this only applies to misuse of put/get_online_cpus.
> >
> > We could hack some loop that tries to cmp_xchng with the old value until it
> > fits to work around this, but wouldn't make the code any better readable.
>
> Well, we didn't have this diagnostic in the original, so one approach
> would be to simply leave it out. Another approach would be to just
> have the WARN_ON() without the attempt to fix it up.
>
> Thanx, Paul

Well, I think the WARN_ON would be sufficient for debugging purposes.

Thanks!

David

2014-12-11 09:57:11

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

> This is subjective, but how about
>
> static bool xxx(void)
> {
> mutex_lock(&cpu_hotplug.lock);
> if (atomic_read(&cpu_hotplug.refcount) == 0)
> return true;
> mutex_unlock(&cpu_hotplug.lock);
> return false;
> }
>
> void cpu_hotplug_begin(void)
> {
> cpu_hotplug.active_writer = current;
>
> cpuhp_lock_acquire();
> wait_event(&cpu_hotplug.wq, xxx());
> }
>
> instead?
>
> Oleg.
>

Just checked wait_event(). It is safe against such race conditions as the same
pattern is used is applied internally.

Thanks!

David

2014-12-12 08:46:49

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock


> This is subjective, but how about
>
> static bool xxx(void)
> {
> mutex_lock(&cpu_hotplug.lock);
> if (atomic_read(&cpu_hotplug.refcount) == 0)
> return true;
> mutex_unlock(&cpu_hotplug.lock);
> return false;
> }
>
> void cpu_hotplug_begin(void)
> {
> cpu_hotplug.active_writer = current;
>
> cpuhp_lock_acquire();
> wait_event(&cpu_hotplug.wq, xxx());
> }
>
> instead?
>
> Oleg.
>

[ 50.662459] do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000000017340e>] prepare_to_wait_event+0x7a/0x124
[ 50.662472] ------------[ cut here ]------------
[ 50.662475] WARNING: at kernel/sched/core.c:7301
[ 50.662477] Modules linked in:
[ 50.662482] CPU: 5 PID: 225 Comm: cpu_start_stop. Not tainted 3.18.0+ #59
[ 50.662485] task: 0000000001f94b20 ti: 0000000001ffc000 task.ti: 0000000001ffc000
...

Looks like your suggestion won't work. We can only set the task to
TASK_UNINTERRUPTIBLE after taking the lock.

David

2014-12-14 19:21:54

by Oleg Nesterov

[permalink] [raw]
Subject: Re: [PATCH v4] CPU hotplug: active_writer not woken up in some cases - deadlock

On 12/12, David Hildenbrand wrote:
>
> > This is subjective, but how about
> >
> > static bool xxx(void)
> > {
> > mutex_lock(&cpu_hotplug.lock);
> > if (atomic_read(&cpu_hotplug.refcount) == 0)
> > return true;
> > mutex_unlock(&cpu_hotplug.lock);
> > return false;
> > }
> >
> > void cpu_hotplug_begin(void)
> > {
> > cpu_hotplug.active_writer = current;
> >
> > cpuhp_lock_acquire();
> > wait_event(&cpu_hotplug.wq, xxx());
> > }
> >
> > instead?
> >
> > Oleg.
> >
>
> [ 50.662459] do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000000017340e>] prepare_to_wait_event+0x7a/0x124
> [ 50.662472] ------------[ cut here ]------------
> [ 50.662475] WARNING: at kernel/sched/core.c:7301
> [ 50.662477] Modules linked in:
> [ 50.662482] CPU: 5 PID: 225 Comm: cpu_start_stop. Not tainted 3.18.0+ #59
> [ 50.662485] task: 0000000001f94b20 ti: 0000000001ffc000 task.ti: 0000000001ffc000
> ...
>
> Looks like your suggestion won't work. We can only set the task to
> TASK_UNINTERRUPTIBLE after taking the lock.

Yeees, this warning (and wait_woken() helpers) was specially added
to catch/fix the problem like this, sorry for confusion.

Easy to fix, just

- mutex_lock(&cpu_hotplug.lock);
+ if (!mutex_trylock(&cpu_hotplug.lock))
+ return false;

If .lock is locked then it is hold by get_online_cpus(), and it is going
to increment the counter.

I would like to say that this is what I actually meant but now I can not
recall if this is true ;)

But please ignore. Your next version looks simple/clear enough.

Oleg.