2008-07-08 07:51:29

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 0/3] stop_machine enhancements and simplifications

Hi all,

Here are the three stop_machine patches I've queued for the next merge
window. The first two are pretty well tested via linux-next, the last is
recent but fairly straightforward.

I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
but it can be removed by the last of these patches (left in to reduce
transition breakage).

Feedback welcome as always,
Rusty.


2008-07-08 07:56:28

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 1/3] stop_machine: add ALL_CPUS option

From: Jason Baron <[email protected]>

-allow stop_mahcine_run() to call a function on all cpus. Calling
stop_machine_run() with a 'ALL_CPUS' invokes this new behavior.
stop_machine_run() proceeds as normal until the calling cpu has
invoked 'fn'. Then, we tell all the other cpus to call 'fn'.

Signed-off-by: Jason Baron <[email protected]>
Signed-off-by: Mathieu Desnoyers <[email protected]>
Signed-off-by: Rusty Russell <[email protected]>
CC: Adrian Bunk <[email protected]>
CC: Andi Kleen <[email protected]>
CC: Alexey Dobriyan <[email protected]>
CC: Christoph Hellwig <[email protected]>
CC: [email protected]
CC: [email protected]
---
include/linux/stop_machine.h | 8 +++++++-
kernel/stop_machine.c | 32 +++++++++++++++++++++++++-------
2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 5bfc553..18af011 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -8,11 +8,17 @@
#include <asm/system.h>

#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+
+#define ALL_CPUS ~0U
+
/**
* stop_machine_run: freeze the machine on all CPUs and run this function
* @fn: the function to run
* @data: the data ptr for the @fn()
- * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS.
+ * @cpu: if @cpu == n, run @fn() on cpu n
+ * if @cpu == NR_CPUS, run @fn() on any cpu
+ * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
+ * concurrently on all the other cpus
*
* Description: This causes a thread to be scheduled on every other cpu,
* each of which disables interrupts, and finally interrupts are disabled
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 0101aee..bab5d2a 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -22,9 +22,17 @@ enum stopmachine_state {
STOPMACHINE_WAIT,
STOPMACHINE_PREPARE,
STOPMACHINE_DISABLE_IRQ,
+ STOPMACHINE_RUN,
STOPMACHINE_EXIT,
};

+struct stop_machine_data {
+ int (*fn)(void *);
+ void *data;
+ struct completion done;
+ int run_all;
+} smdata;
+
static enum stopmachine_state stopmachine_state;
static unsigned int stopmachine_num_threads;
static atomic_t stopmachine_thread_ack;
@@ -33,6 +41,7 @@ static int stopmachine(void *cpu)
{
int irqs_disabled = 0;
int prepared = 0;
+ int ran = 0;

set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));

@@ -57,6 +66,11 @@ static int stopmachine(void *cpu)
prepared = 1;
smp_mb(); /* Must read state first. */
atomic_inc(&stopmachine_thread_ack);
+ } else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
+ smdata.fn(smdata.data);
+ ran = 1;
+ smp_mb(); /* Must read state first. */
+ atomic_inc(&stopmachine_thread_ack);
}
/* Yield in first stage: migration threads need to
* help our sisters onto their CPUs. */
@@ -134,11 +148,10 @@ static void restart_machine(void)
preempt_enable_no_resched();
}

-struct stop_machine_data {
- int (*fn)(void *);
- void *data;
- struct completion done;
-};
+static void run_other_cpus(void)
+{
+ stopmachine_set_state(STOPMACHINE_RUN);
+}

static int do_stop(void *_smdata)
{
@@ -148,6 +161,8 @@ static int do_stop(void *_smdata)
ret = stop_machine();
if (ret == 0) {
ret = smdata->fn(smdata->data);
+ if (smdata->run_all)
+ run_other_cpus();
restart_machine();
}

@@ -171,14 +186,17 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
struct stop_machine_data smdata;
struct task_struct *p;

+ mutex_lock(&stopmachine_mutex);
+
smdata.fn = fn;
smdata.data = data;
+ smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
init_completion(&smdata.done);

- mutex_lock(&stopmachine_mutex);
+ smp_wmb(); /* make sure other cpus see smdata updates */

/* If they don't care which CPU fn runs on, bind to any online one. */
- if (cpu == NR_CPUS)
+ if (cpu == NR_CPUS || cpu == ALL_CPUS)
cpu = raw_smp_processor_id();

p = kthread_create(do_stop, &smdata, "kstopmachine");

2008-07-08 07:57:07

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 2/3] stop_machine: simplify

stop_machine creates a kthread which creates kernel threads. We can
create those threads directly and simplify things a little. Some care
must be taken with CPU hotunplug, which has special needs, but that code
seems more robust than it was in the past.

Signed-off-by: Rusty Russell <[email protected]>
---
include/linux/stop_machine.h | 12 -
kernel/cpu.c | 13 -
kernel/stop_machine.c | 299 ++++++++++++++++++-------------------------
3 files changed, 135 insertions(+), 189 deletions(-)

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -17,13 +17,12 @@
* @data: the data ptr for the @fn()
* @cpu: if @cpu == n, run @fn() on cpu n
* if @cpu == NR_CPUS, run @fn() on any cpu
- * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
- * concurrently on all the other cpus
+ * if @cpu == ALL_CPUS, run @fn() on every online CPU.
*
- * Description: This causes a thread to be scheduled on every other cpu,
- * each of which disables interrupts, and finally interrupts are disabled
- * on the current CPU. The result is that noone is holding a spinlock
- * or inside any other preempt-disabled region when @fn() runs.
+ * Description: This causes a thread to be scheduled on every cpu,
+ * each of which disables interrupts. The result is that noone is
+ * holding a spinlock or inside any other preempt-disabled region when
+ * @fn() runs.
*
* This can be thought of as a very heavy write lock, equivalent to
* grabbing every spinlock in the kernel. */
@@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *),
* @data: the data ptr for the @fn
* @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
*
- * Description: This is a special version of the above, which returns the
- * thread which has run @fn(): kthread_stop will return the return value
- * of @fn(). Used by hotplug cpu.
+ * Description: This is a special version of the above, which assumes cpus
+ * won't come or go while it's being called. Used by hotplug cpu.
*/
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
- unsigned int cpu);
-
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
#else

static inline int stop_machine_run(int (*fn)(void *), void *data,
diff --git a/kernel/cpu.c b/kernel/cpu.c
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
int err, nr_calls = 0;
- struct task_struct *p;
cpumask_t old_allowed, tmp;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
@@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int
cpu_clear(cpu, tmp);
set_cpus_allowed_ptr(current, &tmp);

- p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+ err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);

- if (IS_ERR(p) || cpu_online(cpu)) {
+ if (err || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu) == NOTIFY_BAD)
BUG();

- if (IS_ERR(p)) {
- err = PTR_ERR(p);
- goto out_allowed;
- }
- goto out_thread;
+ goto out_allowed;
}

/* Wait for it to sleep (leaving idle task). */
@@ -255,8 +250,6 @@ static int __ref _cpu_down(unsigned int

check_for_tasks(cpu);

-out_thread:
- err = kthread_stop(p);
out_allowed:
set_cpus_allowed_ptr(current, &old_allowed);
out_release:
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,4 +1,4 @@
-/* Copyright 2005 Rusty Russell [email protected] IBM Corporation.
+/* Copyright 2008, 2005 Rusty Russell [email protected] IBM Corporation.
* GPL v2 and any later version.
*/
#include <linux/cpu.h>
@@ -13,219 +13,176 @@
#include <asm/atomic.h>
#include <asm/uaccess.h>

-/* Since we effect priority and affinity (both of which are visible
- * to, and settable by outside processes) we do indirection via a
- * kthread. */
-
-/* Thread to stop each CPU in user context. */
+/* This controls the threads on each CPU. */
enum stopmachine_state {
- STOPMACHINE_WAIT,
+ /* Dummy starting state for thread. */
+ STOPMACHINE_NONE,
+ /* Awaiting everyone to be scheduled. */
STOPMACHINE_PREPARE,
+ /* Disable interrupts. */
STOPMACHINE_DISABLE_IRQ,
+ /* Run the function */
STOPMACHINE_RUN,
+ /* Exit */
STOPMACHINE_EXIT,
};
+static enum stopmachine_state state;

struct stop_machine_data {
int (*fn)(void *);
void *data;
- struct completion done;
- int run_all;
-} smdata;
+ int fnret;
+};

-static enum stopmachine_state stopmachine_state;
-static unsigned int stopmachine_num_threads;
-static atomic_t stopmachine_thread_ack;
+/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+static unsigned int num_threads;
+static atomic_t thread_ack;
+static struct completion finished;
+static DEFINE_MUTEX(lock);

-static int stopmachine(void *cpu)
+static void set_state(enum stopmachine_state newstate)
{
- int irqs_disabled = 0;
- int prepared = 0;
- int ran = 0;
+ /* Reset ack counter. */
+ atomic_set(&thread_ack, num_threads);
+ smp_wmb();
+ state = newstate;
+}

- set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
+/* Last one to ack a state moves to the next state. */
+static void ack_state(void)
+{
+ if (atomic_dec_and_test(&thread_ack)) {
+ /* If we're the last one to ack the EXIT, we're finished. */
+ if (state == STOPMACHINE_EXIT)
+ complete(&finished);
+ else
+ set_state(state + 1);
+ }
+}

- /* Ack: we are alive */
- smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
- atomic_inc(&stopmachine_thread_ack);
+/* This is the actual thread which stops the CPU. It exits by itself rather
+ * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
+static int stop_cpu(struct stop_machine_data *smdata)
+{
+ enum stopmachine_state curstate = STOPMACHINE_NONE;
+ int uninitialized_var(ret);

/* Simple state machine */
- while (stopmachine_state != STOPMACHINE_EXIT) {
- if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
- && !irqs_disabled) {
- local_irq_disable();
- hard_irq_disable();
- irqs_disabled = 1;
- /* Ack: irqs disabled. */
- smp_mb(); /* Must read state first. */
- atomic_inc(&stopmachine_thread_ack);
- } else if (stopmachine_state == STOPMACHINE_PREPARE
- && !prepared) {
- /* Everyone is in place, hold CPU. */
- preempt_disable();
- prepared = 1;
- smp_mb(); /* Must read state first. */
- atomic_inc(&stopmachine_thread_ack);
- } else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
- smdata.fn(smdata.data);
- ran = 1;
- smp_mb(); /* Must read state first. */
- atomic_inc(&stopmachine_thread_ack);
+ do {
+ /* Chill out and ensure we re-read stopmachine_state. */
+ cpu_relax();
+ if (state != curstate) {
+ curstate = state;
+ switch (curstate) {
+ case STOPMACHINE_DISABLE_IRQ:
+ local_irq_disable();
+ hard_irq_disable();
+ break;
+ case STOPMACHINE_RUN:
+ /* |= allows error detection if functions on
+ * multiple CPUs. */
+ smdata->fnret |= smdata->fn(smdata->data);
+ break;
+ default:
+ break;
+ }
+ ack_state();
}
- /* Yield in first stage: migration threads need to
- * help our sisters onto their CPUs. */
- if (!prepared && !irqs_disabled)
- yield();
- cpu_relax();
- }
+ } while (curstate != STOPMACHINE_EXIT);

- /* Ack: we are exiting. */
- smp_mb(); /* Must read state first. */
- atomic_inc(&stopmachine_thread_ack);
+ local_irq_enable();
+ do_exit(0);
+}

- if (irqs_disabled)
- local_irq_enable();
- if (prepared)
- preempt_enable();
-
+/* Callback for CPUs which aren't supposed to do anything. */
+static int chill(void *unused)
+{
return 0;
}

-/* Change the thread state */
-static void stopmachine_set_state(enum stopmachine_state state)
+int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
{
- atomic_set(&stopmachine_thread_ack, 0);
- smp_wmb();
- stopmachine_state = state;
- while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
- cpu_relax();
-}
+ int i, err;
+ struct stop_machine_data active, idle;
+ struct task_struct **threads;

-static int stop_machine(void)
-{
- int i, ret = 0;
+ active.fn = fn;
+ active.data = data;
+ active.fnret = 0;
+ idle.fn = chill;
+ idle.data = NULL;

- atomic_set(&stopmachine_thread_ack, 0);
- stopmachine_num_threads = 0;
- stopmachine_state = STOPMACHINE_WAIT;
+ /* If they don't care which cpu fn runs on, just pick one. */
+ if (cpu == NR_CPUS)
+ cpu = any_online_cpu(cpu_online_map);
+
+ /* This could be too big for stack on large machines. */
+ threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
+ if (!threads)
+ return -ENOMEM;
+
+ /* Set up initial state. */
+ mutex_lock(&lock);
+ init_completion(&finished);
+ num_threads = num_online_cpus();
+ set_state(STOPMACHINE_PREPARE);

for_each_online_cpu(i) {
- if (i == raw_smp_processor_id())
- continue;
- ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
- if (ret < 0)
- break;
- stopmachine_num_threads++;
+ struct stop_machine_data *smdata;
+ struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+ if (cpu == ALL_CPUS || i == cpu)
+ smdata = &active;
+ else
+ smdata = &idle;
+
+ threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
+ if (IS_ERR(threads[i])) {
+ err = PTR_ERR(threads[i]);
+ threads[i] = NULL;
+ goto kill_threads;
+ }
+
+ /* Place it onto correct cpu. */
+ kthread_bind(threads[i], i);
+
+ /* Make it highest prio. */
+ if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
+ BUG();
}

- /* Wait for them all to come to life. */
- while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
- yield();
- cpu_relax();
- }
+ /* We've created all the threads. Wake them all: hold this CPU so one
+ * doesn't hit this CPU until we're ready. */
+ cpu = get_cpu();
+ for_each_online_cpu(i)
+ wake_up_process(threads[i]);

- /* If some failed, kill them all. */
- if (ret < 0) {
- stopmachine_set_state(STOPMACHINE_EXIT);
- return ret;
- }
+ /* This will release the thread on our CPU. */
+ put_cpu();
+ wait_for_completion(&finished);
+ mutex_unlock(&lock);

- /* Now they are all started, make them hold the CPUs, ready. */
- preempt_disable();
- stopmachine_set_state(STOPMACHINE_PREPARE);
+ kfree(threads);

- /* Make them disable irqs. */
- local_irq_disable();
- hard_irq_disable();
- stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
+ return active.fnret;

- return 0;
-}
+kill_threads:
+ for_each_online_cpu(i)
+ if (threads[i])
+ kthread_stop(threads[i]);
+ mutex_unlock(&lock);

-static void restart_machine(void)
-{
- stopmachine_set_state(STOPMACHINE_EXIT);
- local_irq_enable();
- preempt_enable_no_resched();
-}
-
-static void run_other_cpus(void)
-{
- stopmachine_set_state(STOPMACHINE_RUN);
-}
-
-static int do_stop(void *_smdata)
-{
- struct stop_machine_data *smdata = _smdata;
- int ret;
-
- ret = stop_machine();
- if (ret == 0) {
- ret = smdata->fn(smdata->data);
- if (smdata->run_all)
- run_other_cpus();
- restart_machine();
- }
-
- /* We're done: you can kthread_stop us now */
- complete(&smdata->done);
-
- /* Wait for kthread_stop */
- set_current_state(TASK_INTERRUPTIBLE);
- while (!kthread_should_stop()) {
- schedule();
- set_current_state(TASK_INTERRUPTIBLE);
- }
- __set_current_state(TASK_RUNNING);
- return ret;
-}
-
-struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
- unsigned int cpu)
-{
- static DEFINE_MUTEX(stopmachine_mutex);
- struct stop_machine_data smdata;
- struct task_struct *p;
-
- mutex_lock(&stopmachine_mutex);
-
- smdata.fn = fn;
- smdata.data = data;
- smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
- init_completion(&smdata.done);
-
- smp_wmb(); /* make sure other cpus see smdata updates */
-
- /* If they don't care which CPU fn runs on, bind to any online one. */
- if (cpu == NR_CPUS || cpu == ALL_CPUS)
- cpu = raw_smp_processor_id();
-
- p = kthread_create(do_stop, &smdata, "kstopmachine");
- if (!IS_ERR(p)) {
- struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-
- /* One high-prio thread per cpu. We'll do this one. */
- sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
- kthread_bind(p, cpu);
- wake_up_process(p);
- wait_for_completion(&smdata.done);
- }
- mutex_unlock(&stopmachine_mutex);
- return p;
+ kfree(threads);
+ return err;
}

int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
{
- struct task_struct *p;
int ret;

/* No CPUs can come up or down during this. */
get_online_cpus();
- p = __stop_machine_run(fn, data, cpu);
- if (!IS_ERR(p))
- ret = kthread_stop(p);
- else
- ret = PTR_ERR(p);
+ ret = __stop_machine_run(fn, data, cpu);
put_online_cpus();

return ret;

2008-07-08 08:01:57

by Rusty Russell

[permalink] [raw]
Subject: [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers

Instead of a "cpu" arg with magic values NR_CPUS (any cpu) and ~0 (all
cpus), pass a cpumask_t. Allow NULL for the common case (where we
don't care which CPU the function is run on): temporary cpumask_t's
are usually considered bad for stack space.

Signed-off-by: Rusty Russell <[email protected]>

diff -r 277c5fb41d25 arch/s390/kernel/kprobes.c
--- a/arch/s390/kernel/kprobes.c Tue Jul 08 12:57:33 2008 +1000
+++ b/arch/s390/kernel/kprobes.c Tue Jul 08 17:31:41 2008 +1000
@@ -199,7 +199,7 @@ void __kprobes arch_arm_kprobe(struct kp
args.new = BREAKPOINT_INSTRUCTION;

kcb->kprobe_status = KPROBE_SWAP_INST;
- stop_machine_run(swap_instruction, &args, NR_CPUS);
+ stop_machine_run(swap_instruction, &args, NULL);
kcb->kprobe_status = status;
}

@@ -214,7 +214,7 @@ void __kprobes arch_disarm_kprobe(struct
args.new = p->opcode;

kcb->kprobe_status = KPROBE_SWAP_INST;
- stop_machine_run(swap_instruction, &args, NR_CPUS);
+ stop_machine_run(swap_instruction, &args, NULL);
kcb->kprobe_status = status;
}

diff -r 277c5fb41d25 drivers/char/hw_random/intel-rng.c
--- a/drivers/char/hw_random/intel-rng.c Tue Jul 08 12:57:33 2008 +1000
+++ b/drivers/char/hw_random/intel-rng.c Tue Jul 08 17:31:41 2008 +1000
@@ -368,7 +368,7 @@ static int __init mod_init(void)
* Use stop_machine_run because IPIs can be blocked by disabling
* interrupts.
*/
- err = stop_machine_run(intel_rng_hw_init, intel_rng_hw, NR_CPUS);
+ err = stop_machine_run(intel_rng_hw_init, intel_rng_hw, NULL);
pci_dev_put(dev);
iounmap(intel_rng_hw->mem);
kfree(intel_rng_hw);
diff -r 277c5fb41d25 include/linux/stop_machine.h
--- a/include/linux/stop_machine.h Tue Jul 08 12:57:33 2008 +1000
+++ b/include/linux/stop_machine.h Tue Jul 08 17:31:41 2008 +1000
@@ -5,19 +5,19 @@
(and more). So the "read" side to such a lock is anything which
diables preeempt. */
#include <linux/cpu.h>
+#include <linux/cpumask.h>
#include <asm/system.h>

#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)

-#define ALL_CPUS ~0U
+/* Deprecated, but useful for transition. */
+#define ALL_CPUS CPU_MASK_ALL_PTR

/**
* stop_machine_run: freeze the machine on all CPUs and run this function
* @fn: the function to run
* @data: the data ptr for the @fn()
- * @cpu: if @cpu == n, run @fn() on cpu n
- * if @cpu == NR_CPUS, run @fn() on any cpu
- * if @cpu == ALL_CPUS, run @fn() on every online CPU.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
*
* Description: This causes a thread to be scheduled on every cpu,
* each of which disables interrupts. The result is that noone is
@@ -26,22 +26,22 @@
*
* This can be thought of as a very heavy write lock, equivalent to
* grabbing every spinlock in the kernel. */
-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus);

/**
* __stop_machine_run: freeze the machine on all CPUs and run this function
* @fn: the function to run
* @data: the data ptr for the @fn
- * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
+ * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
*
* Description: This is a special version of the above, which assumes cpus
* won't come or go while it's being called. Used by hotplug cpu.
*/
-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
+int __stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus);
#else

static inline int stop_machine_run(int (*fn)(void *), void *data,
- unsigned int cpu)
+ const cpumask_t *cpus)
{
int ret;
local_irq_disable();
diff -r 277c5fb41d25 kernel/cpu.c
--- a/kernel/cpu.c Tue Jul 08 12:57:33 2008 +1000
+++ b/kernel/cpu.c Tue Jul 08 17:31:41 2008 +1000
@@ -224,8 +224,9 @@ static int __ref _cpu_down(unsigned int
cpus_setall(tmp);
cpu_clear(cpu, tmp);
set_cpus_allowed_ptr(current, &tmp);
+ tmp = cpumask_of_cpu(cpu);

- err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
+ err = __stop_machine_run(take_cpu_down, &tcd_param, &tmp);

if (err || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */
diff -r 277c5fb41d25 kernel/module.c
--- a/kernel/module.c Tue Jul 08 12:57:33 2008 +1000
+++ b/kernel/module.c Tue Jul 08 17:31:41 2008 +1000
@@ -689,7 +689,7 @@ static int try_stop_module(struct module
{
struct stopref sref = { mod, flags, forced };

- return stop_machine_run(__try_stop_module, &sref, NR_CPUS);
+ return stop_machine_run(__try_stop_module, &sref, NULL);
}

unsigned int module_refcount(struct module *mod)
@@ -1421,7 +1421,7 @@ static void free_module(struct module *m
static void free_module(struct module *mod)
{
/* Delete from various lists */
- stop_machine_run(__unlink_module, mod, NR_CPUS);
+ stop_machine_run(__unlink_module, mod, NULL);
remove_notes_attrs(mod);
remove_sect_attrs(mod);
mod_kobject_remove(mod);
@@ -2189,7 +2189,7 @@ static struct module *load_module(void _
/* Now sew it into the lists so we can get lockdep and oops
* info during argument parsing. Noone should access us, since
* strong_try_module_get() will fail. */
- stop_machine_run(__link_module, mod, NR_CPUS);
+ stop_machine_run(__link_module, mod, NULL);

/* Size of section 0 is 0, so this works well if no params */
err = parse_args(mod->name, mod->args,
@@ -2223,7 +2223,7 @@ static struct module *load_module(void _
return mod;

unlink:
- stop_machine_run(__unlink_module, mod, NR_CPUS);
+ stop_machine_run(__unlink_module, mod, NULL);
module_arch_cleanup(mod);
cleanup:
kobject_del(&mod->mkobj.kobj);
diff -r 277c5fb41d25 kernel/stop_machine.c
--- a/kernel/stop_machine.c Tue Jul 08 12:57:33 2008 +1000
+++ b/kernel/stop_machine.c Tue Jul 08 17:31:41 2008 +1000
@@ -100,7 +100,7 @@ static int chill(void *unused)
return 0;
}

-int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int __stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus)
{
int i, err;
struct stop_machine_data active, idle;
@@ -111,10 +111,6 @@ int __stop_machine_run(int (*fn)(void *)
active.fnret = 0;
idle.fn = chill;
idle.data = NULL;
-
- /* If they don't care which cpu fn runs on, just pick one. */
- if (cpu == NR_CPUS)
- cpu = any_online_cpu(cpu_online_map);

/* This could be too big for stack on large machines. */
threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
@@ -128,13 +124,16 @@ int __stop_machine_run(int (*fn)(void *)
set_state(STOPMACHINE_PREPARE);

for_each_online_cpu(i) {
- struct stop_machine_data *smdata;
+ struct stop_machine_data *smdata = &idle;
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };

- if (cpu == ALL_CPUS || i == cpu)
- smdata = &active;
- else
- smdata = &idle;
+ if (!cpus) {
+ if (i == first_cpu(cpu_online_map))
+ smdata = &active;
+ } else {
+ if (cpu_isset(i, *cpus))
+ smdata = &active;
+ }

threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
if (IS_ERR(threads[i])) {
@@ -153,7 +152,7 @@ int __stop_machine_run(int (*fn)(void *)

/* We've created all the threads. Wake them all: hold this CPU so one
* doesn't hit this CPU until we're ready. */
- cpu = get_cpu();
+ get_cpu();
for_each_online_cpu(i)
wake_up_process(threads[i]);

@@ -176,13 +175,13 @@ kill_threads:
return err;
}

-int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
+int stop_machine_run(int (*fn)(void *), void *data, const cpumask_t *cpus)
{
int ret;

/* No CPUs can come up or down during this. */
get_online_cpus();
- ret = __stop_machine_run(fn, data, cpu);
+ ret = __stop_machine_run(fn, data, cpus);
put_online_cpus();

return ret;
diff -r 277c5fb41d25 mm/page_alloc.c
--- a/mm/page_alloc.c Tue Jul 08 12:57:33 2008 +1000
+++ b/mm/page_alloc.c Tue Jul 08 17:31:41 2008 +1000
@@ -2356,7 +2356,7 @@ void build_all_zonelists(void)
} else {
/* we have to stop all cpus to guarantee there is no user
of zonelist */
- stop_machine_run(__build_all_zonelists, NULL, NR_CPUS);
+ stop_machine_run(__build_all_zonelists, NULL, NULL);
/* cpuset refresh routine should be here */
}
vm_total_pages = nr_free_pagecache_pages();

2008-07-08 11:44:49

by Akinobu Mita

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

I found a small possible cleanup in this patch.

> diff --git a/kernel/cpu.c b/kernel/cpu.c
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int
> static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
> {
> int err, nr_calls = 0;
> - struct task_struct *p;
> cpumask_t old_allowed, tmp;
> void *hcpu = (void *)(long)cpu;
> unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
> @@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int
> cpu_clear(cpu, tmp);
> set_cpus_allowed_ptr(current, &tmp);
>
> - p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
> + err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
>
> - if (IS_ERR(p) || cpu_online(cpu)) {
> + if (err || cpu_online(cpu)) {

I think checking cpu_online() is now unnecessary by __stop_machine_run()
change in this patch. i.e. this line can simply be:

if (err) {

Because __stop_machine_run(take_cpu_down, ...) returned zero means
take_cpu_down() has already finished and suceeded (returned zero).
It also means __cpu_disable() in take_cpu_down() has been succeeded.
So you can remove cpu_online() check.

> /* CPU didn't die: tell everyone. Can't complain. */
> if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
> hcpu) == NOTIFY_BAD)
> BUG();
>
> - if (IS_ERR(p)) {
> - err = PTR_ERR(p);
> - goto out_allowed;
> - }
> - goto out_thread;
> + goto out_allowed;
> }
>
> /* Wait for it to sleep (leaving idle task). */

2008-07-08 13:11:59

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
> I found a small possible cleanup in this patch.

Well spotted! I think this cleanup is actually orthogonal to my patch,
so best served as a separate patch, how's this?

===
Hotplug CPU: don't check cpu_online after take_cpu_down

Akinobu points out that if take_cpu_down() succeeds, the cpu must be offline
(otherwise we're in deep trouble anyway.

Remove the cpu_online() check, and put a BUG_ON().

Signed-off-by: Rusty Russell <[email protected]>
Cc: "Akinobu Mita" <[email protected]>

diff -r 805a2e5e68dd kernel/cpu.c
--- a/kernel/cpu.c Tue Jul 08 23:04:48 2008 +1000
+++ b/kernel/cpu.c Tue Jul 08 23:07:43 2008 +1000
@@ -226,8 +226,7 @@ static int __ref _cpu_down(unsigned int
set_cpus_allowed_ptr(current, &tmp);

err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
-
- if (err || cpu_online(cpu)) {
+ if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
hcpu) == NOTIFY_BAD)
@@ -235,6 +234,7 @@ static int __ref _cpu_down(unsigned int

goto out_allowed;
}
+ BUG_ON(cpu_online(cpu));

/* Wait for it to sleep (leaving idle task). */
while (!idle_cpu(cpu))

2008-07-08 14:27:18

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

Hi Rusty,

* Rusty Russell ([email protected]) wrote:
> stop_machine creates a kthread which creates kernel threads. We can
> create those threads directly and simplify things a little. Some care
> must be taken with CPU hotunplug, which has special needs, but that code
> seems more robust than it was in the past.
>
> Signed-off-by: Rusty Russell <[email protected]>
> ---
> include/linux/stop_machine.h | 12 -
> kernel/cpu.c | 13 -
> kernel/stop_machine.c | 299 ++++++++++++++++++-------------------------
> 3 files changed, 135 insertions(+), 189 deletions(-)
>
> diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
> --- a/include/linux/stop_machine.h
> +++ b/include/linux/stop_machine.h
> @@ -17,13 +17,12 @@
> * @data: the data ptr for the @fn()
> * @cpu: if @cpu == n, run @fn() on cpu n
> * if @cpu == NR_CPUS, run @fn() on any cpu
> - * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then
> - * concurrently on all the other cpus
> + * if @cpu == ALL_CPUS, run @fn() on every online CPU.
> *

I agree with this change if it makes things simpler. However, callers
must be aware of this important change :

"run @fn() first on the calling cpu, and then concurrently on all the
other cpus" becomes "run @fn() on every online CPU".

There were assumptions done in @fn() where a simple non atomic increment
was used on a static variable to detect that it was the first thread to
execute. It will have to be changed into an atomic inc/dec and test.
Given that the other threads have tasks to perform _after_ the first
thread has executed, they will have to busy-wait (spin) there waiting
for the first thread to finish its execution.

Mathieu

> - * Description: This causes a thread to be scheduled on every other cpu,
> - * each of which disables interrupts, and finally interrupts are disabled
> - * on the current CPU. The result is that noone is holding a spinlock
> - * or inside any other preempt-disabled region when @fn() runs.
> + * Description: This causes a thread to be scheduled on every cpu,
> + * each of which disables interrupts. The result is that noone is
> + * holding a spinlock or inside any other preempt-disabled region when
> + * @fn() runs.
> *
> * This can be thought of as a very heavy write lock, equivalent to
> * grabbing every spinlock in the kernel. */
> @@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *),
> * @data: the data ptr for the @fn
> * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS.
> *
> - * Description: This is a special version of the above, which returns the
> - * thread which has run @fn(): kthread_stop will return the return value
> - * of @fn(). Used by hotplug cpu.
> + * Description: This is a special version of the above, which assumes cpus
> + * won't come or go while it's being called. Used by hotplug cpu.
> */
> -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
> - unsigned int cpu);
> -
> +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
> #else
>
> static inline int stop_machine_run(int (*fn)(void *), void *data,
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int
> static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
> {
> int err, nr_calls = 0;
> - struct task_struct *p;
> cpumask_t old_allowed, tmp;
> void *hcpu = (void *)(long)cpu;
> unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
> @@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int
> cpu_clear(cpu, tmp);
> set_cpus_allowed_ptr(current, &tmp);
>
> - p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
> + err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
>
> - if (IS_ERR(p) || cpu_online(cpu)) {
> + if (err || cpu_online(cpu)) {
> /* CPU didn't die: tell everyone. Can't complain. */
> if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
> hcpu) == NOTIFY_BAD)
> BUG();
>
> - if (IS_ERR(p)) {
> - err = PTR_ERR(p);
> - goto out_allowed;
> - }
> - goto out_thread;
> + goto out_allowed;
> }
>
> /* Wait for it to sleep (leaving idle task). */
> @@ -255,8 +250,6 @@ static int __ref _cpu_down(unsigned int
>
> check_for_tasks(cpu);
>
> -out_thread:
> - err = kthread_stop(p);
> out_allowed:
> set_cpus_allowed_ptr(current, &old_allowed);
> out_release:
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2005 Rusty Russell [email protected] IBM Corporation.
> +/* Copyright 2008, 2005 Rusty Russell [email protected] IBM Corporation.
> * GPL v2 and any later version.
> */
> #include <linux/cpu.h>
> @@ -13,219 +13,176 @@
> #include <asm/atomic.h>
> #include <asm/uaccess.h>
>
> -/* Since we effect priority and affinity (both of which are visible
> - * to, and settable by outside processes) we do indirection via a
> - * kthread. */
> -
> -/* Thread to stop each CPU in user context. */
> +/* This controls the threads on each CPU. */
> enum stopmachine_state {
> - STOPMACHINE_WAIT,
> + /* Dummy starting state for thread. */
> + STOPMACHINE_NONE,
> + /* Awaiting everyone to be scheduled. */
> STOPMACHINE_PREPARE,
> + /* Disable interrupts. */
> STOPMACHINE_DISABLE_IRQ,
> + /* Run the function */
> STOPMACHINE_RUN,
> + /* Exit */
> STOPMACHINE_EXIT,
> };
> +static enum stopmachine_state state;
>
> struct stop_machine_data {
> int (*fn)(void *);
> void *data;
> - struct completion done;
> - int run_all;
> -} smdata;
> + int fnret;
> +};
>
> -static enum stopmachine_state stopmachine_state;
> -static unsigned int stopmachine_num_threads;
> -static atomic_t stopmachine_thread_ack;
> +/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
> +static unsigned int num_threads;
> +static atomic_t thread_ack;
> +static struct completion finished;
> +static DEFINE_MUTEX(lock);
>
> -static int stopmachine(void *cpu)
> +static void set_state(enum stopmachine_state newstate)
> {
> - int irqs_disabled = 0;
> - int prepared = 0;
> - int ran = 0;
> + /* Reset ack counter. */
> + atomic_set(&thread_ack, num_threads);
> + smp_wmb();
> + state = newstate;
> +}
>
> - set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu));
> +/* Last one to ack a state moves to the next state. */
> +static void ack_state(void)
> +{
> + if (atomic_dec_and_test(&thread_ack)) {
> + /* If we're the last one to ack the EXIT, we're finished. */
> + if (state == STOPMACHINE_EXIT)
> + complete(&finished);
> + else
> + set_state(state + 1);
> + }
> +}
>
> - /* Ack: we are alive */
> - smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
> - atomic_inc(&stopmachine_thread_ack);
> +/* This is the actual thread which stops the CPU. It exits by itself rather
> + * than waiting for kthread_stop(), because it's easier for hotplug CPU. */
> +static int stop_cpu(struct stop_machine_data *smdata)
> +{
> + enum stopmachine_state curstate = STOPMACHINE_NONE;
> + int uninitialized_var(ret);
>
> /* Simple state machine */
> - while (stopmachine_state != STOPMACHINE_EXIT) {
> - if (stopmachine_state == STOPMACHINE_DISABLE_IRQ
> - && !irqs_disabled) {
> - local_irq_disable();
> - hard_irq_disable();
> - irqs_disabled = 1;
> - /* Ack: irqs disabled. */
> - smp_mb(); /* Must read state first. */
> - atomic_inc(&stopmachine_thread_ack);
> - } else if (stopmachine_state == STOPMACHINE_PREPARE
> - && !prepared) {
> - /* Everyone is in place, hold CPU. */
> - preempt_disable();
> - prepared = 1;
> - smp_mb(); /* Must read state first. */
> - atomic_inc(&stopmachine_thread_ack);
> - } else if (stopmachine_state == STOPMACHINE_RUN && !ran) {
> - smdata.fn(smdata.data);
> - ran = 1;
> - smp_mb(); /* Must read state first. */
> - atomic_inc(&stopmachine_thread_ack);
> + do {
> + /* Chill out and ensure we re-read stopmachine_state. */
> + cpu_relax();
> + if (state != curstate) {
> + curstate = state;
> + switch (curstate) {
> + case STOPMACHINE_DISABLE_IRQ:
> + local_irq_disable();
> + hard_irq_disable();
> + break;
> + case STOPMACHINE_RUN:
> + /* |= allows error detection if functions on
> + * multiple CPUs. */
> + smdata->fnret |= smdata->fn(smdata->data);
> + break;
> + default:
> + break;
> + }
> + ack_state();
> }
> - /* Yield in first stage: migration threads need to
> - * help our sisters onto their CPUs. */
> - if (!prepared && !irqs_disabled)
> - yield();
> - cpu_relax();
> - }
> + } while (curstate != STOPMACHINE_EXIT);
>
> - /* Ack: we are exiting. */
> - smp_mb(); /* Must read state first. */
> - atomic_inc(&stopmachine_thread_ack);
> + local_irq_enable();
> + do_exit(0);
> +}
>
> - if (irqs_disabled)
> - local_irq_enable();
> - if (prepared)
> - preempt_enable();
> -
> +/* Callback for CPUs which aren't supposed to do anything. */
> +static int chill(void *unused)
> +{
> return 0;
> }
>
> -/* Change the thread state */
> -static void stopmachine_set_state(enum stopmachine_state state)
> +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
> {
> - atomic_set(&stopmachine_thread_ack, 0);
> - smp_wmb();
> - stopmachine_state = state;
> - while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
> - cpu_relax();
> -}
> + int i, err;
> + struct stop_machine_data active, idle;
> + struct task_struct **threads;
>
> -static int stop_machine(void)
> -{
> - int i, ret = 0;
> + active.fn = fn;
> + active.data = data;
> + active.fnret = 0;
> + idle.fn = chill;
> + idle.data = NULL;
>
> - atomic_set(&stopmachine_thread_ack, 0);
> - stopmachine_num_threads = 0;
> - stopmachine_state = STOPMACHINE_WAIT;
> + /* If they don't care which cpu fn runs on, just pick one. */
> + if (cpu == NR_CPUS)
> + cpu = any_online_cpu(cpu_online_map);
> +
> + /* This could be too big for stack on large machines. */
> + threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL);
> + if (!threads)
> + return -ENOMEM;
> +
> + /* Set up initial state. */
> + mutex_lock(&lock);
> + init_completion(&finished);
> + num_threads = num_online_cpus();
> + set_state(STOPMACHINE_PREPARE);
>
> for_each_online_cpu(i) {
> - if (i == raw_smp_processor_id())
> - continue;
> - ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
> - if (ret < 0)
> - break;
> - stopmachine_num_threads++;
> + struct stop_machine_data *smdata;
> + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
> +
> + if (cpu == ALL_CPUS || i == cpu)
> + smdata = &active;
> + else
> + smdata = &idle;
> +
> + threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i);
> + if (IS_ERR(threads[i])) {
> + err = PTR_ERR(threads[i]);
> + threads[i] = NULL;
> + goto kill_threads;
> + }
> +
> + /* Place it onto correct cpu. */
> + kthread_bind(threads[i], i);
> +
> + /* Make it highest prio. */
> + if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, &param))
> + BUG();
> }
>
> - /* Wait for them all to come to life. */
> - while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) {
> - yield();
> - cpu_relax();
> - }
> + /* We've created all the threads. Wake them all: hold this CPU so one
> + * doesn't hit this CPU until we're ready. */
> + cpu = get_cpu();
> + for_each_online_cpu(i)
> + wake_up_process(threads[i]);
>
> - /* If some failed, kill them all. */
> - if (ret < 0) {
> - stopmachine_set_state(STOPMACHINE_EXIT);
> - return ret;
> - }
> + /* This will release the thread on our CPU. */
> + put_cpu();
> + wait_for_completion(&finished);
> + mutex_unlock(&lock);
>
> - /* Now they are all started, make them hold the CPUs, ready. */
> - preempt_disable();
> - stopmachine_set_state(STOPMACHINE_PREPARE);
> + kfree(threads);
>
> - /* Make them disable irqs. */
> - local_irq_disable();
> - hard_irq_disable();
> - stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
> + return active.fnret;
>
> - return 0;
> -}
> +kill_threads:
> + for_each_online_cpu(i)
> + if (threads[i])
> + kthread_stop(threads[i]);
> + mutex_unlock(&lock);
>
> -static void restart_machine(void)
> -{
> - stopmachine_set_state(STOPMACHINE_EXIT);
> - local_irq_enable();
> - preempt_enable_no_resched();
> -}
> -
> -static void run_other_cpus(void)
> -{
> - stopmachine_set_state(STOPMACHINE_RUN);
> -}
> -
> -static int do_stop(void *_smdata)
> -{
> - struct stop_machine_data *smdata = _smdata;
> - int ret;
> -
> - ret = stop_machine();
> - if (ret == 0) {
> - ret = smdata->fn(smdata->data);
> - if (smdata->run_all)
> - run_other_cpus();
> - restart_machine();
> - }
> -
> - /* We're done: you can kthread_stop us now */
> - complete(&smdata->done);
> -
> - /* Wait for kthread_stop */
> - set_current_state(TASK_INTERRUPTIBLE);
> - while (!kthread_should_stop()) {
> - schedule();
> - set_current_state(TASK_INTERRUPTIBLE);
> - }
> - __set_current_state(TASK_RUNNING);
> - return ret;
> -}
> -
> -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
> - unsigned int cpu)
> -{
> - static DEFINE_MUTEX(stopmachine_mutex);
> - struct stop_machine_data smdata;
> - struct task_struct *p;
> -
> - mutex_lock(&stopmachine_mutex);
> -
> - smdata.fn = fn;
> - smdata.data = data;
> - smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0;
> - init_completion(&smdata.done);
> -
> - smp_wmb(); /* make sure other cpus see smdata updates */
> -
> - /* If they don't care which CPU fn runs on, bind to any online one. */
> - if (cpu == NR_CPUS || cpu == ALL_CPUS)
> - cpu = raw_smp_processor_id();
> -
> - p = kthread_create(do_stop, &smdata, "kstopmachine");
> - if (!IS_ERR(p)) {
> - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
> -
> - /* One high-prio thread per cpu. We'll do this one. */
> - sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
> - kthread_bind(p, cpu);
> - wake_up_process(p);
> - wait_for_completion(&smdata.done);
> - }
> - mutex_unlock(&stopmachine_mutex);
> - return p;
> + kfree(threads);
> + return err;
> }
>
> int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
> {
> - struct task_struct *p;
> int ret;
>
> /* No CPUs can come up or down during this. */
> get_online_cpus();
> - p = __stop_machine_run(fn, data, cpu);
> - if (!IS_ERR(p))
> - ret = kthread_stop(p);
> - else
> - ret = PTR_ERR(p);
> + ret = __stop_machine_run(fn, data, cpu);
> put_online_cpus();
>
> return ret;

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-08 15:02:22

by Akinobu Mita

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

2008/7/8 Rusty Russell <[email protected]>:
> On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
>> I found a small possible cleanup in this patch.
>
> Well spotted! I think this cleanup is actually orthogonal to my patch,
> so best served as a separate patch, how's this?

Actually the cpu_online() check was necessary before appling this
stop_machine: simplify patch.

With old __stop_machine_run(), __stop_machine_run() could succeed
(return !IS_ERR(p) value) even if take_cpu_down() returned non-zero value.
The return value of take_cpu_down() was obtained through kthread_stop().

> ===
> Hotplug CPU: don't check cpu_online after take_cpu_down

So it seems that folding this patch into stop_machine: simplify patch is
more appropriate.

> Akinobu points out that if take_cpu_down() succeeds, the cpu must be offline
> (otherwise we're in deep trouble anyway.
>
> Remove the cpu_online() check, and put a BUG_ON().
>
> Signed-off-by: Rusty Russell <[email protected]>
> Cc: "Akinobu Mita" <[email protected]>
>
> diff -r 805a2e5e68dd kernel/cpu.c
> --- a/kernel/cpu.c Tue Jul 08 23:04:48 2008 +1000
> +++ b/kernel/cpu.c Tue Jul 08 23:07:43 2008 +1000
> @@ -226,8 +226,7 @@ static int __ref _cpu_down(unsigned int
> set_cpus_allowed_ptr(current, &tmp);
>
> err = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
> -
> - if (err || cpu_online(cpu)) {
> + if (err) {
> /* CPU didn't die: tell everyone. Can't complain. */
> if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
> hcpu) == NOTIFY_BAD)
> @@ -235,6 +234,7 @@ static int __ref _cpu_down(unsigned int
>
> goto out_allowed;
> }
> + BUG_ON(cpu_online(cpu));
>
> /* Wait for it to sleep (leaving idle task). */
> while (!idle_cpu(cpu))
>

2008-07-08 16:21:45

by Christian Borntraeger

[permalink] [raw]
Subject: Re: [PATCH 0/3] stop_machine enhancements and simplifications

Am Dienstag, 8. Juli 2008 schrieb Rusty Russell:
> Hi all,
>
> Here are the three stop_machine patches I've queued for the next merge
> window. The first two are pretty well tested via linux-next, the last is
> recent but fairly straightforward.

Hmm, these patches dont fit on kvm.git so I tested the version from
linux-next.

When using linux-next in a kvm guest on a linux-next host this patch set
really improves the situation. On a guest with 64 cpus on a 1 cpu host
stop_machine_run seems to finish immediately.

I still have some strange problems with 25 guests * 64 cpus on one host cpu,
but this requires further analysis.

IMHO this patch set is really an improvement to the earlier version:

Acked-by: Christian Borntraeger <[email protected]>

2008-07-08 20:10:51

by Jason Baron

[permalink] [raw]
Subject: Re: [PATCH 0/3] stop_machine enhancements and simplifications

On Tue, Jul 08, 2008 at 05:50:55PM +1000, Rusty Russell wrote:
> Hi all,
>
> Here are the three stop_machine patches I've queued for the next merge
> window. The first two are pretty well tested via linux-next, the last is
> recent but fairly straightforward.
>
> I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
> but it can be removed by the last of these patches (left in to reduce
> transition breakage).

hi,

I added the 'ALL_CPUS' feature to support the architecture independent immediate
code that Mathieu wrote. So, if Mathieu needs it great, otherwise I have no
other code depending on it.

thanks,

-Jason

2008-07-09 03:29:55

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 0/3] stop_machine enhancements and simplifications

* Jason Baron ([email protected]) wrote:
> On Tue, Jul 08, 2008 at 05:50:55PM +1000, Rusty Russell wrote:
> > Hi all,
> >
> > Here are the three stop_machine patches I've queued for the next merge
> > window. The first two are pretty well tested via linux-next, the last is
> > recent but fairly straightforward.
> >
> > I'm assuming that Jason and/or Mathieu have need for the ALL_CPUS mod,
> > but it can be removed by the last of these patches (left in to reduce
> > transition breakage).
>
> hi,
>
> I added the 'ALL_CPUS' feature to support the architecture independent immediate
> code that Mathieu wrote. So, if Mathieu needs it great, otherwise I have no
> other code depending on it.
>
> thanks,
>
> -Jason

Hi Rusty,

The immediate values implementation (the 'simple' version) uses this
patch. If we include your simplification, I'll have to modify the
immediate values patch a little bit so we deal with concurrent execution
of all the threads, as I pointed out in my earlier email.

Mathieu


--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-09 04:50:46

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

On Wednesday 09 July 2008 00:27:03 Mathieu Desnoyers wrote:
> Hi Rusty,
>
> * Rusty Russell ([email protected]) wrote:
> > stop_machine creates a kthread which creates kernel threads. We can
> > create those threads directly and simplify things a little. Some care
> > must be taken with CPU hotunplug, which has special needs, but that code
> > seems more robust than it was in the past.
> >
> > Signed-off-by: Rusty Russell <[email protected]>
> > ---
> > include/linux/stop_machine.h | 12 -
> > kernel/cpu.c | 13 -
> > kernel/stop_machine.c | 299
> > ++++++++++++++++++------------------------- 3 files changed, 135
> > insertions(+), 189 deletions(-)
> >
> > diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
> > --- a/include/linux/stop_machine.h
> > +++ b/include/linux/stop_machine.h
> > @@ -17,13 +17,12 @@
> > * @data: the data ptr for the @fn()
> > * @cpu: if @cpu == n, run @fn() on cpu n
> > * if @cpu == NR_CPUS, run @fn() on any cpu
> > - * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and
> > then - * concurrently on all the other cpus
> > + * if @cpu == ALL_CPUS, run @fn() on every online CPU.
> > *
>
> I agree with this change if it makes things simpler. However, callers
> must be aware of this important change :
>
> "run @fn() first on the calling cpu, and then concurrently on all the
> other cpus" becomes "run @fn() on every online CPU".

OK. Since that was never in mainline, I think you're the only one who needs
to be aware of the semantic change?

The new symmetric implementation breaks it; hope that isn't a showstopper for
you?

> There were assumptions done in @fn() where a simple non atomic increment
> was used on a static variable to detect that it was the first thread to
> execute. It will have to be changed into an atomic inc/dec and test.
> Given that the other threads have tasks to perform _after_ the first
> thread has executed, they will have to busy-wait (spin) there waiting
> for the first thread to finish its execution.

I assume you can't do that step then call stop_machine.

Thanks,
Rusty.

2008-07-09 04:51:00

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

On Wednesday 09 July 2008 01:02:02 Akinobu Mita wrote:
> 2008/7/8 Rusty Russell <[email protected]>:
> > On Tuesday 08 July 2008 21:44:40 Akinobu Mita wrote:
> >> I found a small possible cleanup in this patch.
> >
> > Well spotted! I think this cleanup is actually orthogonal to my patch,
> > so best served as a separate patch, how's this?
>
> Actually the cpu_online() check was necessary before appling this
> stop_machine: simplify patch.
>
> With old __stop_machine_run(), __stop_machine_run() could succeed
> (return !IS_ERR(p) value) even if take_cpu_down() returned non-zero value.
> The return value of take_cpu_down() was obtained through kthread_stop().

Ah, thanks for the analysis!

It's a little non-obvious, so I've left it as a separate patch (it doesn't
hurt to have the check there), but included your excellent explanation within
it.

Thanks!
Rusty.

2008-07-09 12:42:22

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

* Rusty Russell ([email protected]) wrote:
> On Wednesday 09 July 2008 00:27:03 Mathieu Desnoyers wrote:
> > Hi Rusty,
> >
> > * Rusty Russell ([email protected]) wrote:
> > > stop_machine creates a kthread which creates kernel threads. We can
> > > create those threads directly and simplify things a little. Some care
> > > must be taken with CPU hotunplug, which has special needs, but that code
> > > seems more robust than it was in the past.
> > >
> > > Signed-off-by: Rusty Russell <[email protected]>
> > > ---
> > > include/linux/stop_machine.h | 12 -
> > > kernel/cpu.c | 13 -
> > > kernel/stop_machine.c | 299
> > > ++++++++++++++++++------------------------- 3 files changed, 135
> > > insertions(+), 189 deletions(-)
> > >
> > > diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
> > > --- a/include/linux/stop_machine.h
> > > +++ b/include/linux/stop_machine.h
> > > @@ -17,13 +17,12 @@
> > > * @data: the data ptr for the @fn()
> > > * @cpu: if @cpu == n, run @fn() on cpu n
> > > * if @cpu == NR_CPUS, run @fn() on any cpu
> > > - * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and
> > > then - * concurrently on all the other cpus
> > > + * if @cpu == ALL_CPUS, run @fn() on every online CPU.
> > > *
> >
> > I agree with this change if it makes things simpler. However, callers
> > must be aware of this important change :
> >
> > "run @fn() first on the calling cpu, and then concurrently on all the
> > other cpus" becomes "run @fn() on every online CPU".
>
> OK. Since that was never in mainline, I think you're the only one who needs
> to be aware of the semantic change?
>
> The new symmetric implementation breaks it; hope that isn't a showstopper for
> you?
>

Nope, that should be ok with something like :

...
atomic_set(1, &stop_machine_first);
wrote_text = 0;
stop_machine_run(stop_machine_imv_update, (void *)imv,
ALL_CPUS);
...

static int stop_machine_imv_update(void *imv_ptr)
{
struct __imv *imv = imv_ptr;

if (atomic_dec_and_test(&stop_machine_first)) {
text_poke((void *)imv->imv, (void *)imv->var, imv->size);
smp_wmb(); /* make sure other cpus see that this has run */
wrote_text = 1;
} else {
while (!wrote_text)
smp_rmb();
sync_core();
}

flush_icache_range(imv->imv, imv->imv + imv->size);

return 0;
}

> > There were assumptions done in @fn() where a simple non atomic increment
> > was used on a static variable to detect that it was the first thread to
> > execute. It will have to be changed into an atomic inc/dec and test.
> > Given that the other threads have tasks to perform _after_ the first
> > thread has executed, they will have to busy-wait (spin) there waiting
> > for the first thread to finish its execution.
>
> I assume you can't do that step then call stop_machine.
>

Indeed, I can't, because I need to have all other CPUs busy looping with
interrupts disabled while I do the text_poke.

Mathieu


> Thanks,
> Rusty.

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-10 00:30:48

by Max Krasnyansky

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

Rusty Russell wrote:
> stop_machine creates a kthread which creates kernel threads. We can
> create those threads directly and simplify things a little. Some care
> must be taken with CPU hotunplug, which has special needs, but that code
> seems more robust than it was in the past.
>
> Signed-off-by: Rusty Russell <[email protected]>

Rusty,

You mentioned (in private conversation) that you were going to add some
logic that checks whether CPU is running user-space code and not holding
any locks to avoid scheduling stop_machine thread on it. Was it supposed
to be part of this patch ?

Max

2008-07-10 21:40:26

by Milton Miller

[permalink] [raw]
Subject: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace



Hi Rusty, Ingo.

Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic numbers
didn't find kernel/trace/ftrace.c in -next, causing an immediate almost NULL
pointer dereference in ftrace_dynamic_init.


Signed-off-by: Milton Miller <[email protected]>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0f271c4..c29acb5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -587,7 +587,7 @@ static int __ftrace_modify_code(void *data)

static void ftrace_run_update_code(int command)
{
- stop_machine_run(__ftrace_modify_code, &command, NR_CPUS);
+ stop_machine_run(__ftrace_modify_code, &command, NULL);
}

void ftrace_disable_daemon(void)
@@ -787,7 +787,7 @@ static int ftrace_update_code(void)
!ftrace_enabled || !ftraced_trigger)
return 0;

- stop_machine_run(__ftrace_update_code, NULL, NR_CPUS);
+ stop_machine_run(__ftrace_update_code, NULL, NULL);

return 1;
}
@@ -1564,7 +1564,7 @@ static int __init ftrace_dynamic_init(void)

addr = (unsigned long)ftrace_record_ip;

- stop_machine_run(ftrace_dyn_arch_init, &addr, NR_CPUS);
+ stop_machine_run(ftrace_dyn_arch_init, &addr, NULL);

/* ftrace_dyn_arch_init places the return code in addr */
if (addr) {

2008-07-11 06:43:44

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace

On Friday 11 July 2008 07:07:57 Milton Miller wrote:
> Hi Rusty, Ingo.
>
> Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic
> numbers didn't find kernel/trace/ftrace.c in -next, causing an immediate
> almost NULL pointer dereference in ftrace_dynamic_init.

Yes, I'm switching the patches around, so it does the transition correctly.
Introduces a new stop_machine() fn with the new interface and deprecates the
old stop_machine_run(). We can remove stop_machine_run() after everyone is
switched.

Thanks,
Rusty.

2008-07-11 07:46:51

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace


* Milton Miller <[email protected]> wrote:

> Hi Rusty, Ingo.
>
> Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic
> numbers didn't find kernel/trace/ftrace.c in -next, causing an
> immediate almost NULL pointer dereference in ftrace_dynamic_init.

Rusty - what's going on here? Please do not change APIs like that, which
cause code to crash. Either do a compatible API change, or change it
over in a way that causes clear build failures, not crashes.

Ingo

2008-07-11 07:55:48

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
> Rusty Russell wrote:
> > stop_machine creates a kthread which creates kernel threads. We can
> > create those threads directly and simplify things a little. Some care
> > must be taken with CPU hotunplug, which has special needs, but that code
> > seems more robust than it was in the past.
> >
> > Signed-off-by: Rusty Russell <[email protected]>
>
> Rusty,
>
> You mentioned (in private conversation) that you were going to add some
> logic that checks whether CPU is running user-space code and not holding
> any locks to avoid scheduling stop_machine thread on it. Was it supposed
> to be part of this patch ?
>
> Max

No... I tried it, and it killed my machine. I didn't chase it for the moment,
but it's on the "experimental" end of my patch queue.

Will play with it again and report,
Rusty.



2008-07-11 08:56:20

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace


* Ingo Molnar <[email protected]> wrote:

>
> * Milton Miller <[email protected]> wrote:
>
> > Hi Rusty, Ingo.
> >
> > Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic
> > numbers didn't find kernel/trace/ftrace.c in -next, causing an
> > immediate almost NULL pointer dereference in ftrace_dynamic_init.
>
> Rusty - what's going on here? Please do not change APIs like that,
> which cause code to crash. Either do a compatible API change, or
> change it over in a way that causes clear build failures, not crashes.

ah, i see it from Rusty's other reply that there's going to be another
version of this. Good :-)

Ingo

2008-07-11 12:34:24

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH -next-20080709] fixup stop_machine use cpu mask vs ftrace

On Friday 11 July 2008 17:46:03 Ingo Molnar wrote:
> * Milton Miller <[email protected]> wrote:
> > Hi Rusty, Ingo.
> >
> > Rusty's patch [PATCH 3/3] stop_machine: use cpu mask rather than magic
> > numbers didn't find kernel/trace/ftrace.c in -next, causing an
> > immediate almost NULL pointer dereference in ftrace_dynamic_init.
>
> Rusty - what's going on here? Please do not change APIs like that, which
> cause code to crash. Either do a compatible API change, or change it
> over in a way that causes clear build failures, not crashes.

To be fair, I did. Unfortunately GCC only warns about passing an int to a
pointer arg, and boom.

But compatible is even better. Given the number of stop_machine_run users I
thought it unlikely that a new one would be introduced during the change. I
was wrong, so I'll do it the Right Way.

Cheers,
Rusty.

2008-07-11 13:12:36

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

* Rusty Russell ([email protected]) wrote:
> On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
> > Rusty Russell wrote:
> > > stop_machine creates a kthread which creates kernel threads. We can
> > > create those threads directly and simplify things a little. Some care
> > > must be taken with CPU hotunplug, which has special needs, but that code
> > > seems more robust than it was in the past.
> > >
> > > Signed-off-by: Rusty Russell <[email protected]>
> >
> > Rusty,
> >
> > You mentioned (in private conversation) that you were going to add some
> > logic that checks whether CPU is running user-space code and not holding
> > any locks to avoid scheduling stop_machine thread on it. Was it supposed
> > to be part of this patch ?
> >
> > Max
>
> No... I tried it, and it killed my machine. I didn't chase it for the moment,
> but it's on the "experimental" end of my patch queue.
>
> Will play with it again and report,
> Rusty.
>

Hrm, I must be missing something, but using the fact that other CPUs are
running in userspace as a guarantee that they are not running within
critical kernel sections seems kind of.. racy ? I'd like to have a look
at this experimental patch : does it inhibit interrupts somehow and/or
does it take control of userspace processes doing system calls at that
precise moment ?

Mathieu

>
>
>

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2008-07-12 05:07:54

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH 2/3] stop_machine: simplify

On Friday 11 July 2008 23:12:21 Mathieu Desnoyers wrote:
> * Rusty Russell ([email protected]) wrote:
> > On Thursday 10 July 2008 10:30:37 Max Krasnyansky wrote:
> > > You mentioned (in private conversation) that you were going to add some
> > > logic that checks whether CPU is running user-space code and not
> > > holding any locks to avoid scheduling stop_machine thread on it.
> >
> > Will play with it again and report,
> > Rusty.
>
> Hrm, I must be missing something, but using the fact that other CPUs are
> running in userspace as a guarantee that they are not running within
> critical kernel sections seems kind of.. racy ? I'd like to have a look
> at this experimental patch : does it inhibit interrupts somehow and/or
> does it take control of userspace processes doing system calls at that
> precise moment ?

The idea was to try this ipi-to-all-cpus and fall back on the current thread
method if it doesn't work. I suspect it will succeed in the vast majority of
cases (with CONFIG_PREEMPT, we can also let the function execute if in-kernel
but preemptible). Something like this:

+struct ipi_data {
+ atomic_t acked;
+ atomic_t failed;
+ unsigned int cpu;
+ int fnret;
+ int (*fn)(void *data);
+ void *data;
+};
+
+static void ipi_func(void *info)
+{
+ struct ipi_data *ipi = info;
+ bool ok = false;
+
+ printk("get_irq_regs(%i) = %p\n", smp_processor_id(), get_irq_regs());
+ goto fail;
+
+ if (user_mode(get_irq_regs()))
+ ok = true;
+ else {
+#ifdef CONFIG_PREEMPT
+ /* We're in an interrupt, ok, but were we preemptible
+ * before that? */
+ if ((hardirq_count() >> HARDIRQ_SHIFT) == 1) {
+ int prev = preempt_count() & ~HARDIRQ_MASK;
+ if ((prev & ~PREEMPT_ACTIVE) == PREEMPT_INATOMIC_BASE)
+ ok = true;
+ }
+#endif
+ }
+
+fail:
+ if (!ok) {
+ /* Mark our failure before acking. */
+ atomic_inc(&ipi->failed);
+ wmb();
+ }
+
+ if (smp_processor_id() != ipi->cpu) {
+ /* Wait for cpu to call function (last to ack). */
+ atomic_inc(&ipi->acked);
+ while (atomic_read(&ipi->acked) != num_online_cpus())
+ cpu_relax();
+ } else {
+ while (atomic_read(&ipi->acked) != num_online_cpus() - 1)
+ cpu_relax();
+ /* Must read acked before failed. */
+ rmb();
+
+ /* Call function if noone failed. */
+ if (atomic_read(&ipi->failed) == 0)
+ ipi->fnret = ipi->fn(ipi->data);
+ atomic_inc(&ipi->acked);
+ }
+}
+
+static bool try_ipi_stop(int (*fn)(void *), void *data, unsigned int cpu,
+ int *ret)
+{
+ struct ipi_data ipi;
+
+ /* If they don't care which cpu fn runs on, just pick one. */
+ if (cpu == NR_CPUS)
+ ipi.cpu = any_online_cpu(cpu_online_map);
+ else
+ ipi.cpu = cpu;
+
+ atomic_set(&ipi.acked, 0);
+ atomic_set(&ipi.failed, 0);
+ ipi.fn = fn;
+ ipi.data = data;
+ ipi.fnret = 0;
+
+ smp_call_function(ipi_func, &ipi, 0, 1);
+
+ printk("stop_machine: ipi acked %u failed %u\n",
+ atomic_read(&ipi.acked), atomic_read(&ipi.failed));
+ *ret = ipi.fnret;
+ return (atomic_read(&ipi.failed) == 0);
+}

Hope that clarifies!
Rusty.