Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756354AbYGHH5H (ORCPT ); Tue, 8 Jul 2008 03:57:07 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1751564AbYGHH47 (ORCPT ); Tue, 8 Jul 2008 03:56:59 -0400 Received: from ozlabs.org ([203.10.76.45]:48463 "EHLO ozlabs.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751412AbYGHH46 (ORCPT ); Tue, 8 Jul 2008 03:56:58 -0400 From: Rusty Russell To: linux-kernel@vger.kernel.org Subject: [PATCH 2/3] stop_machine: simplify Date: Tue, 8 Jul 2008 17:56:46 +1000 User-Agent: KMail/1.9.9 Cc: Jason Baron , Mathieu Desnoyers , Max Krasnyansky , Hidetoshi Seto References: <200807081750.55536.rusty@rustcorp.com.au> <200807081756.02838.rusty@rustcorp.com.au> In-Reply-To: <200807081756.02838.rusty@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Content-Disposition: inline Message-Id: <200807081756.47140.rusty@rustcorp.com.au> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13297 Lines: 455 stop_machine creates a kthread which creates kernel threads. We can create those threads directly and simplify things a little. Some care must be taken with CPU hotunplug, which has special needs, but that code seems more robust than it was in the past. Signed-off-by: Rusty Russell --- include/linux/stop_machine.h | 12 - kernel/cpu.c | 13 - kernel/stop_machine.c | 299 ++++++++++++++++++------------------------- 3 files changed, 135 insertions(+), 189 deletions(-) diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -17,13 +17,12 @@ * @data: the data ptr for the @fn() * @cpu: if @cpu == n, run @fn() on cpu n * if @cpu == NR_CPUS, run @fn() on any cpu - * if @cpu == ALL_CPUS, run @fn() first on the calling cpu, and then - * concurrently on all the other cpus + * if @cpu == ALL_CPUS, run @fn() on every online CPU. * - * Description: This causes a thread to be scheduled on every other cpu, - * each of which disables interrupts, and finally interrupts are disabled - * on the current CPU. The result is that noone is holding a spinlock - * or inside any other preempt-disabled region when @fn() runs. + * Description: This causes a thread to be scheduled on every cpu, + * each of which disables interrupts. The result is that noone is + * holding a spinlock or inside any other preempt-disabled region when + * @fn() runs. * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ @@ -35,13 +34,10 @@ int stop_machine_run(int (*fn)(void *), * @data: the data ptr for the @fn * @cpu: the cpu to run @fn on (or any, if @cpu == NR_CPUS. * - * Description: This is a special version of the above, which returns the - * thread which has run @fn(): kthread_stop will return the return value - * of @fn(). Used by hotplug cpu. + * Description: This is a special version of the above, which assumes cpus + * won't come or go while it's being called. Used by hotplug cpu. */ -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, - unsigned int cpu); - +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu); #else static inline int stop_machine_run(int (*fn)(void *), void *data, diff --git a/kernel/cpu.c b/kernel/cpu.c --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -192,7 +192,6 @@ static int __ref _cpu_down(unsigned int static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) { int err, nr_calls = 0; - struct task_struct *p; cpumask_t old_allowed, tmp; void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; @@ -226,19 +225,15 @@ static int __ref _cpu_down(unsigned int cpu_clear(cpu, tmp); set_cpus_allowed_ptr(current, &tmp); - p = __stop_machine_run(take_cpu_down, &tcd_param, cpu); + err = __stop_machine_run(take_cpu_down, &tcd_param, cpu); - if (IS_ERR(p) || cpu_online(cpu)) { + if (err || cpu_online(cpu)) { /* CPU didn't die: tell everyone. Can't complain. */ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, hcpu) == NOTIFY_BAD) BUG(); - if (IS_ERR(p)) { - err = PTR_ERR(p); - goto out_allowed; - } - goto out_thread; + goto out_allowed; } /* Wait for it to sleep (leaving idle task). */ @@ -255,8 +250,6 @@ static int __ref _cpu_down(unsigned int check_for_tasks(cpu); -out_thread: - err = kthread_stop(p); out_allowed: set_cpus_allowed_ptr(current, &old_allowed); out_release: diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -1,4 +1,4 @@ -/* Copyright 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. +/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. * GPL v2 and any later version. */ #include @@ -13,219 +13,176 @@ #include #include -/* Since we effect priority and affinity (both of which are visible - * to, and settable by outside processes) we do indirection via a - * kthread. */ - -/* Thread to stop each CPU in user context. */ +/* This controls the threads on each CPU. */ enum stopmachine_state { - STOPMACHINE_WAIT, + /* Dummy starting state for thread. */ + STOPMACHINE_NONE, + /* Awaiting everyone to be scheduled. */ STOPMACHINE_PREPARE, + /* Disable interrupts. */ STOPMACHINE_DISABLE_IRQ, + /* Run the function */ STOPMACHINE_RUN, + /* Exit */ STOPMACHINE_EXIT, }; +static enum stopmachine_state state; struct stop_machine_data { int (*fn)(void *); void *data; - struct completion done; - int run_all; -} smdata; + int fnret; +}; -static enum stopmachine_state stopmachine_state; -static unsigned int stopmachine_num_threads; -static atomic_t stopmachine_thread_ack; +/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ +static unsigned int num_threads; +static atomic_t thread_ack; +static struct completion finished; +static DEFINE_MUTEX(lock); -static int stopmachine(void *cpu) +static void set_state(enum stopmachine_state newstate) { - int irqs_disabled = 0; - int prepared = 0; - int ran = 0; + /* Reset ack counter. */ + atomic_set(&thread_ack, num_threads); + smp_wmb(); + state = newstate; +} - set_cpus_allowed_ptr(current, &cpumask_of_cpu((int)(long)cpu)); +/* Last one to ack a state moves to the next state. */ +static void ack_state(void) +{ + if (atomic_dec_and_test(&thread_ack)) { + /* If we're the last one to ack the EXIT, we're finished. */ + if (state == STOPMACHINE_EXIT) + complete(&finished); + else + set_state(state + 1); + } +} - /* Ack: we are alive */ - smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */ - atomic_inc(&stopmachine_thread_ack); +/* This is the actual thread which stops the CPU. It exits by itself rather + * than waiting for kthread_stop(), because it's easier for hotplug CPU. */ +static int stop_cpu(struct stop_machine_data *smdata) +{ + enum stopmachine_state curstate = STOPMACHINE_NONE; + int uninitialized_var(ret); /* Simple state machine */ - while (stopmachine_state != STOPMACHINE_EXIT) { - if (stopmachine_state == STOPMACHINE_DISABLE_IRQ - && !irqs_disabled) { - local_irq_disable(); - hard_irq_disable(); - irqs_disabled = 1; - /* Ack: irqs disabled. */ - smp_mb(); /* Must read state first. */ - atomic_inc(&stopmachine_thread_ack); - } else if (stopmachine_state == STOPMACHINE_PREPARE - && !prepared) { - /* Everyone is in place, hold CPU. */ - preempt_disable(); - prepared = 1; - smp_mb(); /* Must read state first. */ - atomic_inc(&stopmachine_thread_ack); - } else if (stopmachine_state == STOPMACHINE_RUN && !ran) { - smdata.fn(smdata.data); - ran = 1; - smp_mb(); /* Must read state first. */ - atomic_inc(&stopmachine_thread_ack); + do { + /* Chill out and ensure we re-read stopmachine_state. */ + cpu_relax(); + if (state != curstate) { + curstate = state; + switch (curstate) { + case STOPMACHINE_DISABLE_IRQ: + local_irq_disable(); + hard_irq_disable(); + break; + case STOPMACHINE_RUN: + /* |= allows error detection if functions on + * multiple CPUs. */ + smdata->fnret |= smdata->fn(smdata->data); + break; + default: + break; + } + ack_state(); } - /* Yield in first stage: migration threads need to - * help our sisters onto their CPUs. */ - if (!prepared && !irqs_disabled) - yield(); - cpu_relax(); - } + } while (curstate != STOPMACHINE_EXIT); - /* Ack: we are exiting. */ - smp_mb(); /* Must read state first. */ - atomic_inc(&stopmachine_thread_ack); + local_irq_enable(); + do_exit(0); +} - if (irqs_disabled) - local_irq_enable(); - if (prepared) - preempt_enable(); - +/* Callback for CPUs which aren't supposed to do anything. */ +static int chill(void *unused) +{ return 0; } -/* Change the thread state */ -static void stopmachine_set_state(enum stopmachine_state state) +int __stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) { - atomic_set(&stopmachine_thread_ack, 0); - smp_wmb(); - stopmachine_state = state; - while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) - cpu_relax(); -} + int i, err; + struct stop_machine_data active, idle; + struct task_struct **threads; -static int stop_machine(void) -{ - int i, ret = 0; + active.fn = fn; + active.data = data; + active.fnret = 0; + idle.fn = chill; + idle.data = NULL; - atomic_set(&stopmachine_thread_ack, 0); - stopmachine_num_threads = 0; - stopmachine_state = STOPMACHINE_WAIT; + /* If they don't care which cpu fn runs on, just pick one. */ + if (cpu == NR_CPUS) + cpu = any_online_cpu(cpu_online_map); + + /* This could be too big for stack on large machines. */ + threads = kcalloc(NR_CPUS, sizeof(threads[0]), GFP_KERNEL); + if (!threads) + return -ENOMEM; + + /* Set up initial state. */ + mutex_lock(&lock); + init_completion(&finished); + num_threads = num_online_cpus(); + set_state(STOPMACHINE_PREPARE); for_each_online_cpu(i) { - if (i == raw_smp_processor_id()) - continue; - ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL); - if (ret < 0) - break; - stopmachine_num_threads++; + struct stop_machine_data *smdata; + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + if (cpu == ALL_CPUS || i == cpu) + smdata = &active; + else + smdata = &idle; + + threads[i] = kthread_create(stop_cpu, smdata, "kstop%u", i); + if (IS_ERR(threads[i])) { + err = PTR_ERR(threads[i]); + threads[i] = NULL; + goto kill_threads; + } + + /* Place it onto correct cpu. */ + kthread_bind(threads[i], i); + + /* Make it highest prio. */ + if (sched_setscheduler_nocheck(threads[i], SCHED_FIFO, ¶m)) + BUG(); } - /* Wait for them all to come to life. */ - while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) { - yield(); - cpu_relax(); - } + /* We've created all the threads. Wake them all: hold this CPU so one + * doesn't hit this CPU until we're ready. */ + cpu = get_cpu(); + for_each_online_cpu(i) + wake_up_process(threads[i]); - /* If some failed, kill them all. */ - if (ret < 0) { - stopmachine_set_state(STOPMACHINE_EXIT); - return ret; - } + /* This will release the thread on our CPU. */ + put_cpu(); + wait_for_completion(&finished); + mutex_unlock(&lock); - /* Now they are all started, make them hold the CPUs, ready. */ - preempt_disable(); - stopmachine_set_state(STOPMACHINE_PREPARE); + kfree(threads); - /* Make them disable irqs. */ - local_irq_disable(); - hard_irq_disable(); - stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); + return active.fnret; - return 0; -} +kill_threads: + for_each_online_cpu(i) + if (threads[i]) + kthread_stop(threads[i]); + mutex_unlock(&lock); -static void restart_machine(void) -{ - stopmachine_set_state(STOPMACHINE_EXIT); - local_irq_enable(); - preempt_enable_no_resched(); -} - -static void run_other_cpus(void) -{ - stopmachine_set_state(STOPMACHINE_RUN); -} - -static int do_stop(void *_smdata) -{ - struct stop_machine_data *smdata = _smdata; - int ret; - - ret = stop_machine(); - if (ret == 0) { - ret = smdata->fn(smdata->data); - if (smdata->run_all) - run_other_cpus(); - restart_machine(); - } - - /* We're done: you can kthread_stop us now */ - complete(&smdata->done); - - /* Wait for kthread_stop */ - set_current_state(TASK_INTERRUPTIBLE); - while (!kthread_should_stop()) { - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - __set_current_state(TASK_RUNNING); - return ret; -} - -struct task_struct *__stop_machine_run(int (*fn)(void *), void *data, - unsigned int cpu) -{ - static DEFINE_MUTEX(stopmachine_mutex); - struct stop_machine_data smdata; - struct task_struct *p; - - mutex_lock(&stopmachine_mutex); - - smdata.fn = fn; - smdata.data = data; - smdata.run_all = (cpu == ALL_CPUS) ? 1 : 0; - init_completion(&smdata.done); - - smp_wmb(); /* make sure other cpus see smdata updates */ - - /* If they don't care which CPU fn runs on, bind to any online one. */ - if (cpu == NR_CPUS || cpu == ALL_CPUS) - cpu = raw_smp_processor_id(); - - p = kthread_create(do_stop, &smdata, "kstopmachine"); - if (!IS_ERR(p)) { - struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; - - /* One high-prio thread per cpu. We'll do this one. */ - sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); - kthread_bind(p, cpu); - wake_up_process(p); - wait_for_completion(&smdata.done); - } - mutex_unlock(&stopmachine_mutex); - return p; + kfree(threads); + return err; } int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu) { - struct task_struct *p; int ret; /* No CPUs can come up or down during this. */ get_online_cpus(); - p = __stop_machine_run(fn, data, cpu); - if (!IS_ERR(p)) - ret = kthread_stop(p); - else - ret = PTR_ERR(p); + ret = __stop_machine_run(fn, data, cpu); put_online_cpus(); return ret; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/