Currently stop machine infrastructure can be called only from a cpu that is
online. But for !CONFIG_SMP, we do allow calling __stop_machine() before the
cpu is online.
x86 for example requires stop machine infrastructure in the cpu online path
and currently implements its own stop machine (using stop_one_cpu_nowait())
for MTRR initialization in the cpu online path.
Enhance the __stop_machine() so that it can be called before the cpu
is onlined. This will pave the way for code consolidation and address potential
deadlocks caused by multiple mechanisms of doing system wide rendezvous.
This will also address the behavioral differences of __stop_machine()
between SMP and UP builds.
Signed-off-by: Suresh Siddha <[email protected]>
Cc: [email protected] # v2.6.35+
---
kernel/stop_machine.c | 62 +++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 57 insertions(+), 5 deletions(-)
Index: tree/kernel/stop_machine.c
===================================================================
--- tree.orig/kernel/stop_machine.c
+++ tree/kernel/stop_machine.c
@@ -136,12 +136,35 @@ void stop_one_cpu_nowait(unsigned int cp
static DEFINE_MUTEX(stop_cpus_mutex);
static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+/**
+ * __stop_cpus - stop multiple cpus
+ * @cpumask: cpus to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
+ * is run on all the online cpus including the current cpu (even if it
+ * is not online).
+ * On each target cpu, @fn is run in a process context (except when run on
+ * the cpu that is in the process of coming online, in which case @fn is run
+ * in the same context as __stop_cpus()) with the highest priority
+ * preempting any task on the cpu and monopolizing it. This function
+ * returns after all executions are complete.
+ */
int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
{
+ int online = cpu_online(smp_processor_id());
+ int include_this_offline = 0;
struct cpu_stop_work *work;
struct cpu_stop_done done;
+ unsigned int weight;
unsigned int cpu;
+ if (!cpumask) {
+ cpumask = cpu_online_mask;
+ include_this_offline = 1;
+ }
+
/* initialize works and done */
for_each_cpu(cpu, cpumask) {
work = &per_cpu(stop_cpus_work, cpu);
@@ -149,7 +172,12 @@ int __stop_cpus(const struct cpumask *cp
work->arg = arg;
work->done = &done;
}
- cpu_stop_init_done(&done, cpumask_weight(cpumask));
+
+ weight = cpumask_weight(cpumask);
+ if (!online && include_this_offline)
+ weight++;
+
+ cpu_stop_init_done(&done, weight);
/*
* Disable preemption while queueing to avoid getting
@@ -162,7 +190,20 @@ int __stop_cpus(const struct cpumask *cp
&per_cpu(stop_cpus_work, cpu));
preempt_enable();
- wait_for_completion(&done.completion);
+ if (online)
+ wait_for_completion(&done.completion);
+ else {
+ /*
+ * This cpu is not yet online. If @fn needs to be run on this
+ * cpu, run it now. Also, we can't afford to sleep here,
+ * so poll till the work is completed on all the cpu's.
+ */
+ if (include_this_offline)
+ fn(arg);
+ while (atomic_read(&done.nr_todo) > 1)
+ cpu_relax();
+ }
+
return done.executed ? done.ret : -ENOENT;
}
@@ -431,6 +472,7 @@ static int stop_machine_cpu_stop(void *d
struct stop_machine_data *smdata = data;
enum stopmachine_state curstate = STOPMACHINE_NONE;
int cpu = smp_processor_id(), err = 0;
+ unsigned long flags = 0;
bool is_active;
if (!smdata->active_cpus)
@@ -446,7 +488,7 @@ static int stop_machine_cpu_stop(void *d
curstate = smdata->state;
switch (curstate) {
case STOPMACHINE_DISABLE_IRQ:
- local_irq_disable();
+ local_irq_save(flags);
hard_irq_disable();
break;
case STOPMACHINE_RUN:
@@ -460,7 +502,7 @@ static int stop_machine_cpu_stop(void *d
}
} while (curstate != STOPMACHINE_EXIT);
- local_irq_enable();
+ local_irq_restore(flags);
return err;
}
@@ -470,9 +512,19 @@ int __stop_machine(int (*fn)(void *), vo
.num_threads = num_online_cpus(),
.active_cpus = cpus };
+ /* Include the calling cpu that might not be online yet. */
+ if (!cpu_online(smp_processor_id()))
+ smdata.num_threads++;
+
/* Set the initial state and stop all online cpus. */
set_state(&smdata, STOPMACHINE_PREPARE);
- return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
+
+ if (cpu_online(smp_processor_id()))
+ return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
+ &smdata);
+ else
+ return __stop_cpus(NULL, stop_machine_cpu_stop,
+ &smdata);
}
int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
Commit-ID: e9de5210f6a6a8b403035985b5a65e77e7004690
Gitweb: http://git.kernel.org/tip/e9de5210f6a6a8b403035985b5a65e77e7004690
Author: Suresh Siddha <[email protected]>
AuthorDate: Mon, 6 Jun 2011 16:16:56 -0700
Committer: H. Peter Anvin <[email protected]>
CommitDate: Mon, 6 Jun 2011 16:42:09 -0700
x86, stop_machine: Enable __stop_machine() to be called from the cpu online path
Currently stop machine infrastructure can be called only from a cpu that is
online. But for !CONFIG_SMP, we do allow calling __stop_machine() before the
cpu is online.
x86 for example requires stop machine infrastructure in the cpu online path
and currently implements its own stop machine (using stop_one_cpu_nowait())
for MTRR initialization in the cpu online path.
Enhance the __stop_machine() so that it can be called before the cpu
is onlined. This will pave the way for code consolidation and address potential
deadlocks caused by multiple mechanisms of doing system wide rendezvous.
This will also address the behavioral differences of __stop_machine()
between SMP and UP builds.
Signed-off-by: Suresh Siddha <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Cc: [email protected] # v2.6.35+
Signed-off-by: H. Peter Anvin <[email protected]>
---
kernel/stop_machine.c | 62 +++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 57 insertions(+), 5 deletions(-)
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e3516b2..c78b0c2 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -136,12 +136,35 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
static DEFINE_MUTEX(stop_cpus_mutex);
static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+/**
+ * __stop_cpus - stop multiple cpus
+ * @cpumask: cpus to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask. If @cpumask is NULL, @fn
+ * is run on all the online cpus including the current cpu (even if it
+ * is not online).
+ * On each target cpu, @fn is run in a process context (except when run on
+ * the cpu that is in the process of coming online, in which case @fn is run
+ * in the same context as __stop_cpus()) with the highest priority
+ * preempting any task on the cpu and monopolizing it. This function
+ * returns after all executions are complete.
+ */
int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
{
+ int online = cpu_online(smp_processor_id());
+ int include_this_offline = 0;
struct cpu_stop_work *work;
struct cpu_stop_done done;
+ unsigned int weight;
unsigned int cpu;
+ if (!cpumask) {
+ cpumask = cpu_online_mask;
+ include_this_offline = 1;
+ }
+
/* initialize works and done */
for_each_cpu(cpu, cpumask) {
work = &per_cpu(stop_cpus_work, cpu);
@@ -149,7 +172,12 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
work->arg = arg;
work->done = &done;
}
- cpu_stop_init_done(&done, cpumask_weight(cpumask));
+
+ weight = cpumask_weight(cpumask);
+ if (!online && include_this_offline)
+ weight++;
+
+ cpu_stop_init_done(&done, weight);
/*
* Disable preemption while queueing to avoid getting
@@ -162,7 +190,20 @@ int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
&per_cpu(stop_cpus_work, cpu));
preempt_enable();
- wait_for_completion(&done.completion);
+ if (online)
+ wait_for_completion(&done.completion);
+ else {
+ /*
+ * This cpu is not yet online. If @fn needs to be run on this
+ * cpu, run it now. Also, we can't afford to sleep here,
+ * so poll till the work is completed on all the cpu's.
+ */
+ if (include_this_offline)
+ fn(arg);
+ while (atomic_read(&done.nr_todo) > 1)
+ cpu_relax();
+ }
+
return done.executed ? done.ret : -ENOENT;
}
@@ -431,6 +472,7 @@ static int stop_machine_cpu_stop(void *data)
struct stop_machine_data *smdata = data;
enum stopmachine_state curstate = STOPMACHINE_NONE;
int cpu = smp_processor_id(), err = 0;
+ unsigned long flags = 0;
bool is_active;
if (!smdata->active_cpus)
@@ -446,7 +488,7 @@ static int stop_machine_cpu_stop(void *data)
curstate = smdata->state;
switch (curstate) {
case STOPMACHINE_DISABLE_IRQ:
- local_irq_disable();
+ local_irq_save(flags);
hard_irq_disable();
break;
case STOPMACHINE_RUN:
@@ -460,7 +502,7 @@ static int stop_machine_cpu_stop(void *data)
}
} while (curstate != STOPMACHINE_EXIT);
- local_irq_enable();
+ local_irq_restore(flags);
return err;
}
@@ -470,9 +512,19 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
.num_threads = num_online_cpus(),
.active_cpus = cpus };
+ /* Include the calling cpu that might not be online yet. */
+ if (!cpu_online(smp_processor_id()))
+ smdata.num_threads++;
+
/* Set the initial state and stop all online cpus. */
set_state(&smdata, STOPMACHINE_PREPARE);
- return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
+
+ if (cpu_online(smp_processor_id()))
+ return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
+ &smdata);
+ else
+ return __stop_cpus(NULL, stop_machine_cpu_stop,
+ &smdata);
}
int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
* Suresh Siddha <[email protected]> wrote:
> Currently stop machine infrastructure can be called only from a cpu
> that is online. But for !CONFIG_SMP, we do allow calling
> __stop_machine() before the cpu is online.
>
> x86 for example requires stop machine infrastructure in the cpu
> online path and currently implements its own stop machine (using
> stop_one_cpu_nowait()) for MTRR initialization in the cpu online
> path.
>
> Enhance the __stop_machine() so that it can be called before the
> cpu is onlined. This will pave the way for code consolidation and
> address potential deadlocks caused by multiple mechanisms of doing
> system wide rendezvous.
>
> This will also address the behavioral differences of
> __stop_machine() between SMP and UP builds.
>
> Signed-off-by: Suresh Siddha <[email protected]>
> Cc: [email protected] # v2.6.35+
> ---
> kernel/stop_machine.c | 62 +++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 57 insertions(+), 5 deletions(-)
This patch is causing problems:
[ 19.835435] BUG: using smp_processor_id() in preemptible [00000000] code: perf/701
[ 19.838718] caller is __stop_machine+0x3c/0xbe
[ 19.842079] Pid: 701, comm: perf Not tainted 3.0.0-rc2-tip+ #132432
[ 19.845378] Call Trace:
[ 19.847838] [<c1112431>] ? debug_smp_processor_id+0xbd/0xd0
[ 19.848712] [<c1050be6>] ? __stop_machine+0x3c/0xbe
[ 19.852046] [<c1005b74>] ? text_poke+0xec/0xec
[ 19.855379] [<c1014447>] ? do_page_fault+0xb0/0x361
[ 19.858711] [<c1005f00>] ? text_poke_smp+0x48/0x4f
[ 19.862044] [<c1014447>] ? do_page_fault+0xb0/0x361
[ 19.865378] [<c100519c>] ? arch_jump_label_transform+0x50/0x64
[ 19.868712] [<c103d61a>] ? __jump_label_update+0x28/0x39
[ 19.872044] [<c103d647>] ? jump_label_update+0x1c/0x49
[ 19.875377] [<c103d892>] ? jump_label_inc+0x38/0x40
[ 19.878710] [<c105cfaf>] ? perf_swevent_init+0x118/0x129
[ 19.882044] [<c10625b6>] ? perf_init_event+0x47/0x7c
[ 19.885376] [<c10627f7>] ? perf_event_alloc+0x20c/0x416
[ 19.888710] [<c1062fb1>] ? sys_perf_event_open+0x313/0x634
[ 19.892042] [<c101469a>] ? do_page_fault+0x303/0x361
[ 19.895379] [<c1281e70>] ? sysenter_do_call+0x12/0x26
On Tue, 2011-06-07 at 11:02 -0700, Ingo Molnar wrote:
> * Suresh Siddha <[email protected]> wrote:
>
> > Currently stop machine infrastructure can be called only from a cpu
> > that is online. But for !CONFIG_SMP, we do allow calling
> > __stop_machine() before the cpu is online.
> >
> > x86 for example requires stop machine infrastructure in the cpu
> > online path and currently implements its own stop machine (using
> > stop_one_cpu_nowait()) for MTRR initialization in the cpu online
> > path.
> >
> > Enhance the __stop_machine() so that it can be called before the
> > cpu is onlined. This will pave the way for code consolidation and
> > address potential deadlocks caused by multiple mechanisms of doing
> > system wide rendezvous.
> >
> > This will also address the behavioral differences of
> > __stop_machine() between SMP and UP builds.
> >
> > Signed-off-by: Suresh Siddha <[email protected]>
> > Cc: [email protected] # v2.6.35+
> > ---
> > kernel/stop_machine.c | 62 +++++++++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 57 insertions(+), 5 deletions(-)
>
> This patch is causing problems:
>
> [ 19.835435] BUG: using smp_processor_id() in preemptible [00000000] code: perf/701
> [ 19.838718] caller is __stop_machine+0x3c/0xbe
This is kind of a false positive. We are just checking if the calling
cpu is online/offline (and there is no possible process migration
between an online and offline cpu).
So I can use either raw_smp_processor_id() or probably wrap it around
get/put_cpu()'s. I would prefer raw_smp_processor_id() instead of the
unnecessary get/put_cpu().
We already have preempt disable/enable down this code path and would
like to keep the preempt disable section to the minimum needed code,
instead of increasing it to address this false positive.
Thoughts?
thanks,
suresh
> [ 19.842079] Pid: 701, comm: perf Not tainted 3.0.0-rc2-tip+ #132432
> [ 19.845378] Call Trace:
> [ 19.847838] [<c1112431>] ? debug_smp_processor_id+0xbd/0xd0
> [ 19.848712] [<c1050be6>] ? __stop_machine+0x3c/0xbe
> [ 19.852046] [<c1005b74>] ? text_poke+0xec/0xec
> [ 19.855379] [<c1014447>] ? do_page_fault+0xb0/0x361
> [ 19.858711] [<c1005f00>] ? text_poke_smp+0x48/0x4f
> [ 19.862044] [<c1014447>] ? do_page_fault+0xb0/0x361
> [ 19.865378] [<c100519c>] ? arch_jump_label_transform+0x50/0x64
> [ 19.868712] [<c103d61a>] ? __jump_label_update+0x28/0x39
> [ 19.872044] [<c103d647>] ? jump_label_update+0x1c/0x49
> [ 19.875377] [<c103d892>] ? jump_label_inc+0x38/0x40
> [ 19.878710] [<c105cfaf>] ? perf_swevent_init+0x118/0x129
> [ 19.882044] [<c10625b6>] ? perf_init_event+0x47/0x7c
> [ 19.885376] [<c10627f7>] ? perf_event_alloc+0x20c/0x416
> [ 19.888710] [<c1062fb1>] ? sys_perf_event_open+0x313/0x634
> [ 19.892042] [<c101469a>] ? do_page_fault+0x303/0x361
> [ 19.895379] [<c1281e70>] ? sysenter_do_call+0x12/0x26
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
On 06/07/2011 11:38 AM, Suresh Siddha wrote:
>
> This is kind of a false positive. We are just checking if the calling
> cpu is online/offline (and there is no possible process migration
> between an online and offline cpu).
>
Is the state not available in a percpu variable of some sort? If not,
shouldn't it be rather than indirecting via a CPU number?
-hpa
On Tue, 2011-06-07 at 11:42 -0700, H. Peter Anvin wrote:
> On 06/07/2011 11:38 AM, Suresh Siddha wrote:
> >
> > This is kind of a false positive. We are just checking if the calling
> > cpu is online/offline (and there is no possible process migration
> > between an online and offline cpu).
> >
>
> Is the state not available in a percpu variable of some sort? If not,
> shouldn't it be rather than indirecting via a CPU number?
This is what I first thought about but the current percpu cpu_state is
arch specific and not all architectures define it.
Thinking a bit more, I should be able to use the stop machine's percpu
thread state and use it here. Something like the appended. I am testing
and will send the modified patches shortly.
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c78b0c2..97a1770 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -153,7 +153,7 @@ static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
*/
int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
{
- int online = cpu_online(smp_processor_id());
+ int online = percpu_read(cpu_stopper.enabled);
int include_this_offline = 0;
struct cpu_stop_work *work;
struct cpu_stop_done done;
@@ -513,13 +513,13 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
.active_cpus = cpus };
/* Include the calling cpu that might not be online yet. */
- if (!cpu_online(smp_processor_id()))
+ if (!percpu_read(cpu_stopper.enabled))
smdata.num_threads++;
/* Set the initial state and stop all online cpus. */
set_state(&smdata, STOPMACHINE_PREPARE);
- if (cpu_online(smp_processor_id()))
+ if (percpu_read(cpu_stopper.enabled))
return stop_cpus(cpu_online_mask, stop_machine_cpu_stop,
&smdata);
else