2016-04-04 10:27:35

by Anna-Maria Behnsen

[permalink] [raw]
Subject: [PATCH] s390/cpum_sf: Remove superfluous SMP function call

Since commit 1cf4f629d9d2 ("cpu/hotplug: Move online calls to
hotplugged cpu") it is ensured that callbacks of CPU_ONLINE and
CPU_DOWN_PREPARE are processed on the hotplugged CPU. Due to this SMP
function calls are no longer required.

Replace smp_call_function_single() with a direct call of
setup_pmc_cpu(). To keep the calling convention, interrupts are
explicitely disabled around the call.

Cc: Martin Schwidefsky <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: [email protected]
Signed-off-by: Anna-Maria Gleixner <[email protected]>
---
arch/s390/kernel/perf_cpum_sf.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)

--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1510,7 +1510,6 @@ static void cpumf_measurement_alert(stru
static int cpumf_pmu_notifier(struct notifier_block *self,
unsigned long action, void *hcpu)
{
- unsigned int cpu = (long) hcpu;
int flags;

/* Ignore the notification if no events are scheduled on the PMU.
@@ -1523,11 +1522,15 @@ static int cpumf_pmu_notifier(struct not
case CPU_ONLINE:
case CPU_DOWN_FAILED:
flags = PMC_INIT;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
break;
case CPU_DOWN_PREPARE:
flags = PMC_RELEASE;
- smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
+ local_irq_disable();
+ setup_pmc_cpu(&flags);
+ local_irq_enable();
break;
default:
break;


2016-04-05 10:49:25

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On Mon, Apr 04, 2016 at 12:27:20PM +0200, Anna-Maria Gleixner wrote:
> Since commit 1cf4f629d9d2 ("cpu/hotplug: Move online calls to
> hotplugged cpu") it is ensured that callbacks of CPU_ONLINE and
> CPU_DOWN_PREPARE are processed on the hotplugged CPU. Due to this SMP
> function calls are no longer required.
>
> Replace smp_call_function_single() with a direct call of
> setup_pmc_cpu(). To keep the calling convention, interrupts are
> explicitely disabled around the call.
>
> Cc: Martin Schwidefsky <[email protected]>
> Cc: Heiko Carstens <[email protected]>
> Cc: [email protected]
> Signed-off-by: Anna-Maria Gleixner <[email protected]>
> ---
> arch/s390/kernel/perf_cpum_sf.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
> --- a/arch/s390/kernel/perf_cpum_sf.c
> +++ b/arch/s390/kernel/perf_cpum_sf.c
> @@ -1510,7 +1510,6 @@ static void cpumf_measurement_alert(stru
> static int cpumf_pmu_notifier(struct notifier_block *self,
> unsigned long action, void *hcpu)
> {
> - unsigned int cpu = (long) hcpu;
> int flags;
>
> /* Ignore the notification if no events are scheduled on the PMU.
> @@ -1523,11 +1522,15 @@ static int cpumf_pmu_notifier(struct not
> case CPU_ONLINE:
> case CPU_DOWN_FAILED:
> flags = PMC_INIT;
> - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
> + local_irq_disable();
> + setup_pmc_cpu(&flags);
> + local_irq_enable();
> break;

...but at least the CPU_DOWN_FAILED callback will not necessarily be called
on the cpu that couldn't be brought offline.

Subject: Re: [PREEMPT-RT] [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On 04/05/2016 12:49 PM, Heiko Carstens wrote:
>> --- a/arch/s390/kernel/perf_cpum_sf.c
>> +++ b/arch/s390/kernel/perf_cpum_sf.c
>> @@ -1510,7 +1510,6 @@ static void cpumf_measurement_alert(stru
>> static int cpumf_pmu_notifier(struct notifier_block *self,
>> unsigned long action, void *hcpu)
>> {
>> - unsigned int cpu = (long) hcpu;
>> int flags;
>>
>> /* Ignore the notification if no events are scheduled on the PMU.
>> @@ -1523,11 +1522,15 @@ static int cpumf_pmu_notifier(struct not
>> case CPU_ONLINE:
>> case CPU_DOWN_FAILED:
>> flags = PMC_INIT;
>> - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
>> + local_irq_disable();
>> + setup_pmc_cpu(&flags);
>> + local_irq_enable();
>> break;
>
> ...but at least the CPU_DOWN_FAILED callback will not necessarily be called
> on the cpu that couldn't be brought offline.

I don't follow.

Sebastian

2016-04-05 11:23:47

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PREEMPT-RT] [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On Tue, Apr 05, 2016 at 01:13:06PM +0200, Sebastian Andrzej Siewior wrote:
> On 04/05/2016 12:49 PM, Heiko Carstens wrote:
> >> --- a/arch/s390/kernel/perf_cpum_sf.c
> >> +++ b/arch/s390/kernel/perf_cpum_sf.c
> >> @@ -1510,7 +1510,6 @@ static void cpumf_measurement_alert(stru
> >> static int cpumf_pmu_notifier(struct notifier_block *self,
> >> unsigned long action, void *hcpu)
> >> {
> >> - unsigned int cpu = (long) hcpu;
> >> int flags;
> >>
> >> /* Ignore the notification if no events are scheduled on the PMU.
> >> @@ -1523,11 +1522,15 @@ static int cpumf_pmu_notifier(struct not
> >> case CPU_ONLINE:
> >> case CPU_DOWN_FAILED:
> >> flags = PMC_INIT;
> >> - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
> >> + local_irq_disable();
> >> + setup_pmc_cpu(&flags);
> >> + local_irq_enable();
> >> break;
> >
> > ...but at least the CPU_DOWN_FAILED callback will not necessarily be called
> > on the cpu that couldn't be brought offline.
>
> I don't follow.

I was trying to say that if bringing a cpu down fails, then the cpu hotplug
notifier with CPU_DOWN_FAILED might be called on a cpu that is _not_ the
same cpu that was supposed to be brought offline.

Subsequently, in this case, the setup_pmc_cpu() call will be executed on
the wrong cpu.

2016-04-05 11:36:55

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PREEMPT-RT] [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On Tue, Apr 05, 2016 at 01:23:36PM +0200, Heiko Carstens wrote:
> On Tue, Apr 05, 2016 at 01:13:06PM +0200, Sebastian Andrzej Siewior wrote:
> > On 04/05/2016 12:49 PM, Heiko Carstens wrote:
> > >> --- a/arch/s390/kernel/perf_cpum_sf.c
> > >> +++ b/arch/s390/kernel/perf_cpum_sf.c
> > >> @@ -1510,7 +1510,6 @@ static void cpumf_measurement_alert(stru
> > >> static int cpumf_pmu_notifier(struct notifier_block *self,
> > >> unsigned long action, void *hcpu)
> > >> {
> > >> - unsigned int cpu = (long) hcpu;
> > >> int flags;
> > >>
> > >> /* Ignore the notification if no events are scheduled on the PMU.
> > >> @@ -1523,11 +1522,15 @@ static int cpumf_pmu_notifier(struct not
> > >> case CPU_ONLINE:
> > >> case CPU_DOWN_FAILED:
> > >> flags = PMC_INIT;
> > >> - smp_call_function_single(cpu, setup_pmc_cpu, &flags, 1);
> > >> + local_irq_disable();
> > >> + setup_pmc_cpu(&flags);
> > >> + local_irq_enable();
> > >> break;
> > >
> > > ...but at least the CPU_DOWN_FAILED callback will not necessarily be called
> > > on the cpu that couldn't be brought offline.
> >
> > I don't follow.
>
> I was trying to say that if bringing a cpu down fails, then the cpu hotplug
> notifier with CPU_DOWN_FAILED might be called on a cpu that is _not_ the
> same cpu that was supposed to be brought offline.
>
> Subsequently, in this case, the setup_pmc_cpu() call will be executed on
> the wrong cpu.

.. or to illustrate this behaviour: the following patch (white space
damaged due to copy-paste) results in the following:

# chcpu -d 2
[console] failed cpu: 2 - this cpu: 1

diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 40a6b4f9c36c..48d417abfdae 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -852,6 +852,7 @@ int __cpu_disable(void)
{
unsigned long cregs[16];

+ return -EBUSY;
/* Handle possible pending IPIs */
smp_handle_ext_call();
set_cpu_online(smp_processor_id(), false);
@@ -1065,6 +1066,10 @@ static int smp_cpu_notify(struct notifier_block *self, unsigned long action,
int err = 0;

switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_DOWN_FAILED:
+ printk("failed cpu: %d - this cpu: %d\n", cpu, get_cpu());
+ put_cpu();
+ break;
case CPU_ONLINE:
err = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
break;

2016-04-05 11:51:32

by Richard Cochran

[permalink] [raw]
Subject: Re: [PREEMPT-RT] [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On Tue, Apr 05, 2016 at 01:36:38PM +0200, Heiko Carstens wrote:
> On Tue, Apr 05, 2016 at 01:23:36PM +0200, Heiko Carstens wrote:
> > Subsequently, in this case, the setup_pmc_cpu() call will be executed on
> > the wrong cpu.
>
> .. or to illustrate this behaviour: the following patch (white space
> damaged due to copy-paste) results in the following:

I guess you are missing the following commit?


commit 1cf4f629d9d246519a1e76c021806f2a51ddba4d
Author: Thomas Gleixner <[email protected]>
Date: Fri Feb 26 18:43:39 2016 +0000

cpu/hotplug: Move online calls to hotplugged cpu


Thanks,
Richard

2016-04-05 11:55:55

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PREEMPT-RT] [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On Tue, Apr 05, 2016 at 01:51:29PM +0200, [email protected] wrote:
> On Tue, Apr 05, 2016 at 01:36:38PM +0200, Heiko Carstens wrote:
> > On Tue, Apr 05, 2016 at 01:23:36PM +0200, Heiko Carstens wrote:
> > > Subsequently, in this case, the setup_pmc_cpu() call will be executed on
> > > the wrong cpu.
> >
> > .. or to illustrate this behaviour: the following patch (white space
> > damaged due to copy-paste) results in the following:
>
> I guess you are missing the following commit?
>
>
> commit 1cf4f629d9d246519a1e76c021806f2a51ddba4d
> Author: Thomas Gleixner <[email protected]>
> Date: Fri Feb 26 18:43:39 2016 +0000
>
> cpu/hotplug: Move online calls to hotplugged cpu

No. It's included. I'm using latest Linus' master tree with git head being
1e1e5ce78ff0 "Merge tag 'linux-kselftest-4.6-rc3' of
git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest"

# uname -a
Linux p2345007 4.6.0-rc2-00042-g1e1e5ce78ff0-dirty #3 SMP PREEMPT Tue Apr 5 13:30:02 CEST 2016 s390x s390x s390x GNU/Linux

Subject: Re: [PREEMPT-RT] [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On 04/05/2016 01:51 PM, [email protected] wrote:
> On Tue, Apr 05, 2016 at 01:36:38PM +0200, Heiko Carstens wrote:
>> On Tue, Apr 05, 2016 at 01:23:36PM +0200, Heiko Carstens wrote:
>>> Subsequently, in this case, the setup_pmc_cpu() call will be executed on
>>> the wrong cpu.
>>
>> .. or to illustrate this behaviour: the following patch (white space
>> damaged due to copy-paste) results in the following:
>
> I guess you are missing the following commit?
?
> cpu/hotplug: Move online calls to hotplugged cpu

No, Heiko is right here. If one of the "CPU_DOWN_PREPARE" fails then
the following CPU_DOWN_FAILED will be invoked on the correct CPU.

However if we are further down the road and the final ARCH specific
"die" failed (just before CPU_DYING) are invoked then we get this done
on the wrong CPU.

> Thanks,
> Richard

Sebastian

2016-04-05 12:12:18

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PREEMPT-RT] [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On Tue, Apr 05, 2016 at 01:57:42PM +0200, Sebastian Andrzej Siewior wrote:
> On 04/05/2016 01:51 PM, [email protected] wrote:
> > On Tue, Apr 05, 2016 at 01:36:38PM +0200, Heiko Carstens wrote:
> >> On Tue, Apr 05, 2016 at 01:23:36PM +0200, Heiko Carstens wrote:
> >>> Subsequently, in this case, the setup_pmc_cpu() call will be executed on
> >>> the wrong cpu.
> >>
> >> .. or to illustrate this behaviour: the following patch (white space
> >> damaged due to copy-paste) results in the following:
> >
> > I guess you are missing the following commit?
> …
> > cpu/hotplug: Move online calls to hotplugged cpu
>
> No, Heiko is right here. If one of the "CPU_DOWN_PREPARE" fails then
> the following CPU_DOWN_FAILED will be invoked on the correct CPU.
>
> However if we are further down the road and the final ARCH specific
> "die" failed (just before CPU_DYING) are invoked then we get this done
> on the wrong CPU.

I think there is more broken: if I willingly let __cpu_disable() fail and
try to offline e.g. cpu 2 for the second time chcpu will never return.
Plus the console contains several "NOHZ: local_softirq_pending 01"
messages.

# cat /proc/1619/stack
[<000000000013e460>] cpuhp_kick_ap_work+0x78/0x1b8
[<00000000008a1972>] _cpu_down+0xca/0x1c0
[<000000000013f362>] do_cpu_down+0x5a/0x88
[<0000000000682308>] device_offline+0xb8/0xe0
[<000000000068244e>] online_store+0x5e/0x98
[<000000000037ecea>] kernfs_fop_write+0x13a/0x190
[<00000000002ee26e>] __vfs_write+0x36/0x108
[<00000000002ef3e4>] vfs_write+0x94/0x1a0
[<00000000002f0ace>] SyS_write+0x66/0xd8
[<00000000008aa944>] system_call+0x244/0x264
[<ffffffffffffffff>] 0xffffffffffffffff

(1619 is the pid of chcpu)

All of this works without problems on vanilla 4.5 kernel.

I think you can reproduce this on any architecture :)

Subject: Re: [PATCH] s390/cpum_sf: Remove superfluous SMP function call

On 04/05/2016 02:11 PM, Heiko Carstens wrote:
> I think there is more broken: if I willingly let __cpu_disable() fail and
> try to offline e.g. cpu 2 for the second time chcpu will never return.
> Plus the console contains several "NOHZ: local_softirq_pending 01"
> messages.


>
> All of this works without problems on vanilla 4.5 kernel.
>
> I think you can reproduce this on any architecture :)

oh yes.

Sebastian

Subject: [PATCH] cpu/hotplug: fix rollback during error-out in __cpu_disable()

If we error out in __cpu_disable() (via takedown_cpu() which is
currently the last one that can fail) we don't rollback entirely to
CPUHP_ONLINE (where we started) but to CPUHP_AP_ONLINE_IDLE. This
happens because the former states were on the target CPU (the AP states)
and during the rollback we go back until the first BP state we started.
During the next cpu_down attempt (on the same failed CPU) will take
forever because the cpuhp thread is still down.

The fix this I rollback to where we started in _cpu_down() via a workqueue
to ensure that those callback will be run on the target CPU in
non-atomic context (as in normal cpu_up()).
The workqueues should be working again because the CPU_DOWN_FAILED were
already invoked.

notify_online() has been marked as ->skip_onerr because otherwise we
will see the CPU_ONLINE notifier in addition to the CPU_DOWN_FAILED.
However with ->skip_onerr we neither see CPU_ONLINE nor CPU_DOWN_FAILED
if something in between (CPU_DOWN_FAILED … CPUHP_TEARDOWN_CPU).
Currently there is nothing.

This regression got probably introduce in the rework while we introduced
the hotplug thread to offload the work to the target CPU.

Fixes: 4cb28ced23c4 ("cpu/hotplug: Create hotplug threads")
Reported-by: Heiko Carstens <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
kernel/cpu.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ea42e8da861..35b3ce38cd93 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -724,6 +724,8 @@ static int takedown_cpu(unsigned int cpu)
/* CPU didn't die: tell everyone. Can't complain. */
cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
irq_unlock_sparse();
+ kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+ /* smpboot threads are up via CPUHP_AP_SMPBOOT_THREADS */
return err;
}
BUG_ON(cpu_online(cpu));
@@ -787,6 +789,13 @@ void cpuhp_report_idle_dead(void)

#ifdef CONFIG_HOTPLUG_CPU

+static void undo_cpu_down_work(struct work_struct *work)
+{
+ struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
+
+ undo_cpu_down(smp_processor_id(), st, cpuhp_ap_states);
+}
+
/* Requires cpu_add_remove_lock to be held */
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
enum cpuhp_state target)
@@ -832,6 +841,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups.
*/
ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+ if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+ struct work_struct undo_work;
+
+ INIT_WORK_ONSTACK(&undo_work, undo_cpu_down_work);
+ st->target = prev_state;
+ schedule_work_on(cpu, &undo_work);
+ flush_work(&undo_work);
+ destroy_work_on_stack(&undo_work);
+ }

hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
out:
@@ -1249,6 +1267,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.name = "notify:online",
.startup = notify_online,
.teardown = notify_down_prepare,
+ .skip_onerr = true,
},
#endif
/*
--
2.8.0.rc3

2016-04-06 19:51:44

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PATCH] cpu/hotplug: fix rollback during error-out in __cpu_disable()

On Tue, Apr 05, 2016 at 05:59:04PM +0200, Sebastian Andrzej Siewior wrote:
> If we error out in __cpu_disable() (via takedown_cpu() which is
> currently the last one that can fail) we don't rollback entirely to
> CPUHP_ONLINE (where we started) but to CPUHP_AP_ONLINE_IDLE. This
> happens because the former states were on the target CPU (the AP states)
> and during the rollback we go back until the first BP state we started.
> During the next cpu_down attempt (on the same failed CPU) will take
> forever because the cpuhp thread is still down.
>
> The fix this I rollback to where we started in _cpu_down() via a workqueue
> to ensure that those callback will be run on the target CPU in
> non-atomic context (as in normal cpu_up()).
> The workqueues should be working again because the CPU_DOWN_FAILED were
> already invoked.
>
> notify_online() has been marked as ->skip_onerr because otherwise we
> will see the CPU_ONLINE notifier in addition to the CPU_DOWN_FAILED.
> However with ->skip_onerr we neither see CPU_ONLINE nor CPU_DOWN_FAILED
> if something in between (CPU_DOWN_FAILED … CPUHP_TEARDOWN_CPU).
> Currently there is nothing.
>
> This regression got probably introduce in the rework while we introduced
> the hotplug thread to offload the work to the target CPU.
>
> Fixes: 4cb28ced23c4 ("cpu/hotplug: Create hotplug threads")
> Reported-by: Heiko Carstens <[email protected]>
> Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
> ---
> kernel/cpu.c | 19 +++++++++++++++++++
> 1 file changed, 19 insertions(+)

This fixes the issue that a second cpu_down() will take forever, if
__cpu_disable() fails.

However it does not fix the issue that CPU_DOWN_FAILED will be seen on a
different cpu than the cpu that was supposed to be taken offline.

Subject: Re: [PATCH] cpu/hotplug: fix rollback during error-out in __cpu_disable()

On 04/06/2016 09:51 PM, Heiko Carstens wrote:
> This fixes the issue that a second cpu_down() will take forever, if
> __cpu_disable() fails.

Yes. But even without the second take down your CPU isn't complete up.

> However it does not fix the issue that CPU_DOWN_FAILED will be seen on a
> different cpu than the cpu that was supposed to be taken offline.

This is correct. It fixes only the regression you reported.
The CPU_DOWN_FAILED patches are on hold for now.

Sebastian

2016-04-08 06:19:59

by Heiko Carstens

[permalink] [raw]
Subject: Re: [PATCH] cpu/hotplug: fix rollback during error-out in __cpu_disable()

On Thu, Apr 07, 2016 at 05:14:00PM +0200, Sebastian Andrzej Siewior wrote:
> On 04/06/2016 09:51 PM, Heiko Carstens wrote:
> > This fixes the issue that a second cpu_down() will take forever, if
> > __cpu_disable() fails.
>
> Yes. But even without the second take down your CPU isn't complete up.
>
> > However it does not fix the issue that CPU_DOWN_FAILED will be seen on a
> > different cpu than the cpu that was supposed to be taken offline.
>
> This is correct. It fixes only the regression you reported.
> The CPU_DOWN_FAILED patches are on hold for now.

Ok, I was bit confused here. So you may add

Tested-by: Heiko Carstens <[email protected]>

if you want to :)

Subject: [PATCH v2] cpu/hotplug: fix rollback during error-out in __cpu_disable()

If we error out in __cpu_disable() (via takedown_cpu() which is
currently the last one that can fail) we don't rollback entirely to
CPUHP_ONLINE (where we started) but to CPUHP_AP_ONLINE_IDLE. This
happens because the former states were on the target CPU (the AP states)
and during the rollback we go back until the first BP state we started.
The next cpu_down attempt (on the same failed CPU) will take forever
because the cpuhp thread is still down (same goes for smpboot threads).

The fix this I rollback to where we started in _cpu_down(). For this I
add a ->rollback flag so we can invoke the states on the target CPU via
undo_cpu_down() (otherwise cpuhp_ap_online() rollback to
CPUHP_AP_ONLINE_IDLE in case of an error).

notify_online() has been marked as ->skip_onerr because otherwise we
will see the CPU_ONLINE notifier in addition to the CPU_DOWN_FAILED.
However with ->skip_onerr we neither see CPU_ONLINE nor CPU_DOWN_FAILED
if something in between (CPU_DOWN_FAILED … CPUHP_TEARDOWN_CPU).
Currently there is nothing.

This regression got probably introduce in the rework while we introduced
the hotplug thread to offload the work to the target CPU.

Fixes: 4cb28ced23c4 ("cpu/hotplug: Create hotplug threads")
Reported-by: Heiko Carstens <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
v1…v2: replace the workqueue with cpuhp thread

CPU_DOWN_FAILED is still invoked on the "wrong" CPU, this is still just
about fixing the regression.

kernel/cpu.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ea42e8da861..6433b9639946 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -36,6 +36,7 @@
* @target: The target state
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
+ * @rollback: Perform a rollback
* @cb_stat: The state for a single callback (install/uninstall)
* @cb: Single callback function (install/uninstall)
* @result: Result of the operation
@@ -47,6 +48,7 @@ struct cpuhp_cpu_state {
#ifdef CONFIG_SMP
struct task_struct *thread;
bool should_run;
+ bool rollback;
enum cpuhp_state cb_state;
int (*cb)(unsigned int cpu);
int result;
@@ -477,6 +479,11 @@ static void cpuhp_thread_fun(unsigned int cpu)
} else {
ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
}
+ } else if (st->rollback) {
+ BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+
+ undo_cpu_down(cpu, st, cpuhp_ap_states);
+ st->rollback = false;
} else {
/* Cannot happen .... */
BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
@@ -724,6 +731,8 @@ static int takedown_cpu(unsigned int cpu)
/* CPU didn't die: tell everyone. Can't complain. */
cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
irq_unlock_sparse();
+ kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+ /* smpboot threads are up via CPUHP_AP_SMPBOOT_THREADS */
return err;
}
BUG_ON(cpu_online(cpu));
@@ -832,6 +841,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups.
*/
ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+ if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+
+ st->target = prev_state;
+ st->rollback = true;
+ cpuhp_kick_ap_work(cpu);
+ }

hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
out:
@@ -1249,6 +1264,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.name = "notify:online",
.startup = notify_online,
.teardown = notify_down_prepare,
+ .skip_onerr = true,
},
#endif
/*
--
2.8.0.rc3

Subject: [tip:smp/urgent] cpu/hotplug: Fix rollback during error-out in __cpu_disable()

Commit-ID: 3b9d6da67e11ca8f78fde887918983523a36b0fa
Gitweb: http://git.kernel.org/tip/3b9d6da67e11ca8f78fde887918983523a36b0fa
Author: Sebastian Andrzej Siewior <[email protected]>
AuthorDate: Fri, 8 Apr 2016 14:40:15 +0200
Committer: Thomas Gleixner <[email protected]>
CommitDate: Fri, 22 Apr 2016 09:49:49 +0200

cpu/hotplug: Fix rollback during error-out in __cpu_disable()

The recent introduction of the hotplug thread which invokes the callbacks on
the plugged cpu, cased the following regression:

If takedown_cpu() fails, then we run into several issues:

1) The rollback of the target cpu states is not invoked. That leaves the smp
threads and the hotplug thread in disabled state.

2) notify_online() is executed due to a missing skip_onerr flag. That causes
that both CPU_DOWN_FAILED and CPU_ONLINE notifications are invoked which
confuses quite some notifiers.

3) The CPU_DOWN_FAILED notification is not invoked on the target CPU. That's
not an issue per se, but it is inconsistent and in consequence blocks the
patches which rely on these states being invoked on the target CPU and not
on the controlling cpu. It also does not preserve the strict call order on
rollback which is problematic for the ongoing state machine conversion as
well.

To fix this we add a rollback flag to the remote callback machinery and invoke
the rollback including the CPU_DOWN_FAILED notification on the remote
cpu. Further mark the notify online state with 'skip_onerr' so we don't get a
double invokation.

This workaround will go away once we moved the unplug invocation to the target
cpu itself.

[ tglx: Massaged changelog and moved the CPU_DOWN_FAILED notifiaction to the
target cpu ]

Fixes: 4cb28ced23c4 ("cpu/hotplug: Create hotplug threads")
Reported-by: Heiko Carstens <[email protected]>
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: Martin Schwidefsky <[email protected]>
Cc: Anna-Maria Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Thomas Gleixner <[email protected]>

---
kernel/cpu.c | 33 ++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ea42e8..3e3f6e4 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -36,6 +36,7 @@
* @target: The target state
* @thread: Pointer to the hotplug thread
* @should_run: Thread should execute
+ * @rollback: Perform a rollback
* @cb_stat: The state for a single callback (install/uninstall)
* @cb: Single callback function (install/uninstall)
* @result: Result of the operation
@@ -47,6 +48,7 @@ struct cpuhp_cpu_state {
#ifdef CONFIG_SMP
struct task_struct *thread;
bool should_run;
+ bool rollback;
enum cpuhp_state cb_state;
int (*cb)(unsigned int cpu);
int result;
@@ -301,6 +303,11 @@ static int cpu_notify(unsigned long val, unsigned int cpu)
return __cpu_notify(val, cpu, -1, NULL);
}

+static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
+{
+ BUG_ON(cpu_notify(val, cpu));
+}
+
/* Notifier wrappers for transitioning to state machine */
static int notify_prepare(unsigned int cpu)
{
@@ -477,6 +484,16 @@ static void cpuhp_thread_fun(unsigned int cpu)
} else {
ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
}
+ } else if (st->rollback) {
+ BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
+
+ undo_cpu_down(cpu, st, cpuhp_ap_states);
+ /*
+ * This is a momentary workaround to keep the notifier users
+ * happy. Will go away once we got rid of the notifiers.
+ */
+ cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
+ st->rollback = false;
} else {
/* Cannot happen .... */
BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
@@ -636,11 +653,6 @@ static inline void check_for_tasks(int dead_cpu)
read_unlock(&tasklist_lock);
}

-static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
-{
- BUG_ON(cpu_notify(val, cpu));
-}
-
static int notify_down_prepare(unsigned int cpu)
{
int err, nr_calls = 0;
@@ -721,9 +733,10 @@ static int takedown_cpu(unsigned int cpu)
*/
err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
- /* CPU didn't die: tell everyone. Can't complain. */
- cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
+ /* CPU refused to die */
irq_unlock_sparse();
+ /* Unpark the hotplug thread so we can rollback there */
+ kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
return err;
}
BUG_ON(cpu_online(cpu));
@@ -832,6 +845,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups.
*/
ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+ if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+ st->target = prev_state;
+ st->rollback = true;
+ cpuhp_kick_ap_work(cpu);
+ }

hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
out:
@@ -1249,6 +1267,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.name = "notify:online",
.startup = notify_online,
.teardown = notify_down_prepare,
+ .skip_onerr = true,
},
#endif
/*