2017-09-20 17:08:48

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH 6/7] smp/hotplug: Differentiate the AP-work lockdep class between up and down

With lockdep-crossrelease we get deadlock reports that span cpu-up and
cpu-down chains. Such deadlocks cannot possibly happen because cpu-up
and cpu-down are globally serialized.

CPU0 CPU1 CPU2
cpuhp_up_callbacks: takedown_cpu: cpuhp_thread_fun:

cpuhp_state
irq_lock_sparse()
irq_lock_sparse()
wait_for_completion()
cpuhp_state
complete()

Now that we have consistent AP state, we can trivially separate the
AP-work class between up and down using st->bringup.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
kernel/cpu.c | 41 ++++++++++++++++++++++++++++++++---------
1 file changed, 32 insertions(+), 9 deletions(-)

--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -68,9 +68,26 @@ struct cpuhp_cpu_state {
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);

#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
-static struct lock_class_key cpuhp_state_key;
-static struct lockdep_map cpuhp_state_lock_map =
- STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
+static struct lockdep_map cpuhp_state_up_map =
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
+static struct lockdep_map cpuhp_state_down_map =
+ STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
+
+
+static void inline cpuhp_lock_acquire(bool bringup)
+{
+ lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+
+static void inline cpuhp_lock_release(bool bringup)
+{
+ lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
+}
+#else
+
+static void inline cpuhp_lock_acquire(bool bringup) { }
+static void inline cpuhp_lock_release(bool bringup) { }
+
#endif

/**
@@ -512,7 +529,7 @@ static void cpuhp_thread_fun(unsigned in
if (WARN_ON_ONCE(!st->should_run))
return;

- lock_map_acquire(&cpuhp_state_lock_map);
+ cpuhp_lock_acquire(bringup);

if (st->single) {
state = st->cb_state;
@@ -564,7 +581,7 @@ static void cpuhp_thread_fun(unsigned in
}

next:
- lock_map_release(&cpuhp_state_lock_map);
+ cpuhp_lock_release(bringup);

if (!st->should_run)
complete(&st->done);
@@ -581,8 +598,11 @@ cpuhp_invoke_ap_callback(int cpu, enum c
if (!cpu_online(cpu))
return 0;

- lock_map_acquire(&cpuhp_state_lock_map);
- lock_map_release(&cpuhp_state_lock_map);
+ cpuhp_lock_acquire(false);
+ cpuhp_lock_release(false);
+
+ cpuhp_lock_acquire(true);
+ cpuhp_lock_release(true);

/*
* If we are up and running, use the hotplug thread. For early calls
@@ -620,8 +640,11 @@ static int cpuhp_kick_ap_work(unsigned i
enum cpuhp_state prev_state = st->state;
int ret;

- lock_map_acquire(&cpuhp_state_lock_map);
- lock_map_release(&cpuhp_state_lock_map);
+ cpuhp_lock_acquire(false);
+ cpuhp_lock_release(false);
+
+ cpuhp_lock_acquire(true);
+ cpuhp_lock_release(true);

trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
ret = cpuhp_kick_ap(st, st->target);



2017-09-25 08:55:17

by Byungchul Park

[permalink] [raw]
Subject: Re: [PATCH 6/7] smp/hotplug: Differentiate the AP-work lockdep class between up and down

On Wed, Sep 20, 2017 at 07:00:20PM +0200, Peter Zijlstra wrote:
> With lockdep-crossrelease we get deadlock reports that span cpu-up and
> cpu-down chains. Such deadlocks cannot possibly happen because cpu-up
> and cpu-down are globally serialized.
>
> CPU0 CPU1 CPU2
> cpuhp_up_callbacks: takedown_cpu: cpuhp_thread_fun:
>
> cpuhp_state
> irq_lock_sparse()
> irq_lock_sparse()
> wait_for_completion()
> cpuhp_state
> complete()
>
> Now that we have consistent AP state, we can trivially separate the
> AP-work class between up and down using st->bringup.

Could you tell me what branch you worked the patches based on?
This is similar to the problem of workqueue so I want to fix it on
top of yours, as well.

> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> kernel/cpu.c | 41 ++++++++++++++++++++++++++++++++---------
> 1 file changed, 32 insertions(+), 9 deletions(-)
>
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -68,9 +68,26 @@ struct cpuhp_cpu_state {
> static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
>
> #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
> -static struct lock_class_key cpuhp_state_key;
> -static struct lockdep_map cpuhp_state_lock_map =
> - STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
> +static struct lockdep_map cpuhp_state_up_map =
> + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
> +static struct lockdep_map cpuhp_state_down_map =
> + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
> +
> +
> +static void inline cpuhp_lock_acquire(bool bringup)
> +{
> + lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
> +}
> +
> +static void inline cpuhp_lock_release(bool bringup)
> +{
> + lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
> +}
> +#else
> +
> +static void inline cpuhp_lock_acquire(bool bringup) { }
> +static void inline cpuhp_lock_release(bool bringup) { }
> +
> #endif
>
> /**
> @@ -512,7 +529,7 @@ static void cpuhp_thread_fun(unsigned in
> if (WARN_ON_ONCE(!st->should_run))
> return;
>
> - lock_map_acquire(&cpuhp_state_lock_map);
> + cpuhp_lock_acquire(bringup);
>
> if (st->single) {
> state = st->cb_state;
> @@ -564,7 +581,7 @@ static void cpuhp_thread_fun(unsigned in
> }
>
> next:
> - lock_map_release(&cpuhp_state_lock_map);
> + cpuhp_lock_release(bringup);
>
> if (!st->should_run)
> complete(&st->done);
> @@ -581,8 +598,11 @@ cpuhp_invoke_ap_callback(int cpu, enum c
> if (!cpu_online(cpu))
> return 0;
>
> - lock_map_acquire(&cpuhp_state_lock_map);
> - lock_map_release(&cpuhp_state_lock_map);
> + cpuhp_lock_acquire(false);
> + cpuhp_lock_release(false);
> +
> + cpuhp_lock_acquire(true);
> + cpuhp_lock_release(true);
>
> /*
> * If we are up and running, use the hotplug thread. For early calls
> @@ -620,8 +640,11 @@ static int cpuhp_kick_ap_work(unsigned i
> enum cpuhp_state prev_state = st->state;
> int ret;
>
> - lock_map_acquire(&cpuhp_state_lock_map);
> - lock_map_release(&cpuhp_state_lock_map);
> + cpuhp_lock_acquire(false);
> + cpuhp_lock_release(false);
> +
> + cpuhp_lock_acquire(true);
> + cpuhp_lock_release(true);
>
> trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
> ret = cpuhp_kick_ap(st, st->target);
>

2017-09-25 09:16:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 6/7] smp/hotplug: Differentiate the AP-work lockdep class between up and down

On Mon, Sep 25, 2017 at 05:54:59PM +0900, Byungchul Park wrote:
> On Wed, Sep 20, 2017 at 07:00:20PM +0200, Peter Zijlstra wrote:
> > With lockdep-crossrelease we get deadlock reports that span cpu-up and
> > cpu-down chains. Such deadlocks cannot possibly happen because cpu-up
> > and cpu-down are globally serialized.
> >
> > CPU0 CPU1 CPU2
> > cpuhp_up_callbacks: takedown_cpu: cpuhp_thread_fun:
> >
> > cpuhp_state
> > irq_lock_sparse()
> > irq_lock_sparse()
> > wait_for_completion()
> > cpuhp_state
> > complete()
> >
> > Now that we have consistent AP state, we can trivially separate the
> > AP-work class between up and down using st->bringup.
>
> Could you tell me what branch you worked the patches based on?
> This is similar to the problem of workqueue so I want to fix it on
> top of yours, as well.

I wrote the patches on top of tip/master. Thomas maintains these bits so
hopefully he'll eventually merge them in the right tip tree.

2017-11-30 14:44:33

by Lai Jiangshan

[permalink] [raw]
Subject: Re: [PATCH 6/7] smp/hotplug: Differentiate the AP-work lockdep class between up and down

On Thu, Sep 21, 2017 at 1:00 AM, Peter Zijlstra <[email protected]> wrote:
> With lockdep-crossrelease we get deadlock reports that span cpu-up and
> cpu-down chains. Such deadlocks cannot possibly happen because cpu-up
> and cpu-down are globally serialized.
>
> CPU0 CPU1 CPU2
> cpuhp_up_callbacks: takedown_cpu: cpuhp_thread_fun:
>
> cpuhp_state
> irq_lock_sparse()
> irq_lock_sparse()
> wait_for_completion()
> cpuhp_state
> complete()
>
> Now that we have consistent AP state, we can trivially separate the
> AP-work class between up and down using st->bringup.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> kernel/cpu.c | 41 ++++++++++++++++++++++++++++++++---------
> 1 file changed, 32 insertions(+), 9 deletions(-)
>
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -68,9 +68,26 @@ struct cpuhp_cpu_state {
> static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
>
> #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
> -static struct lock_class_key cpuhp_state_key;
> -static struct lockdep_map cpuhp_state_lock_map =
> - STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
> +static struct lockdep_map cpuhp_state_up_map =
> + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
> +static struct lockdep_map cpuhp_state_down_map =
> + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
> +
> +
> +static void inline cpuhp_lock_acquire(bool bringup)
> +{
> + lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
> +}
> +
> +static void inline cpuhp_lock_release(bool bringup)
> +{
> + lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
> +}
> +#else
> +
> +static void inline cpuhp_lock_acquire(bool bringup) { }
> +static void inline cpuhp_lock_release(bool bringup) { }
> +
> #endif
>
> /**
> @@ -512,7 +529,7 @@ static void cpuhp_thread_fun(unsigned in
> if (WARN_ON_ONCE(!st->should_run))
> return;
>
> - lock_map_acquire(&cpuhp_state_lock_map);
> + cpuhp_lock_acquire(bringup);
>
> if (st->single) {
> state = st->cb_state;
> @@ -564,7 +581,7 @@ static void cpuhp_thread_fun(unsigned in
> }
>
> next:
> - lock_map_release(&cpuhp_state_lock_map);
> + cpuhp_lock_release(bringup);
>
> if (!st->should_run)
> complete(&st->done);
> @@ -581,8 +598,11 @@ cpuhp_invoke_ap_callback(int cpu, enum c
> if (!cpu_online(cpu))
> return 0;
>
> - lock_map_acquire(&cpuhp_state_lock_map);
> - lock_map_release(&cpuhp_state_lock_map);
> + cpuhp_lock_acquire(false);
> + cpuhp_lock_release(false);
> +
> + cpuhp_lock_acquire(true);
> + cpuhp_lock_release(true);

Hello, Peter,

I'm reading the code in kernel/cpu.c.
I couldn't understand why both lockep_map are acquired here?
Is the lockep_map matching for the argument @bringup enough here?

The log shows that the argument @bringup had been added
when the time this commit was applied. But it was quite probably
non-existed when you wrote the patch since the time was close.

thanks,
Lai.

>
> /*
> * If we are up and running, use the hotplug thread. For early calls
> @@ -620,8 +640,11 @@ static int cpuhp_kick_ap_work(unsigned i
> enum cpuhp_state prev_state = st->state;
> int ret;
>
> - lock_map_acquire(&cpuhp_state_lock_map);
> - lock_map_release(&cpuhp_state_lock_map);
> + cpuhp_lock_acquire(false);
> + cpuhp_lock_release(false);
> +
> + cpuhp_lock_acquire(true);
> + cpuhp_lock_release(true);
>
> trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
> ret = cpuhp_kick_ap(st, st->target);
>
>

From 1579509593862778284@xxx Mon Sep 25 11:08:52 +0000 2017
X-GM-THRID: 1579079324433647108
X-Gmail-Labels: Inbox,Category Forums