We found a problem. When a cpu is brought down using _cpu_down(),
the corresponding cpu bit in the cpus_allowed of the current task is
cleared. But this bit will not be set when the same cpu is online again.
Then, the current task and its child processes will not be allowed to
run on this cpu.
To fix this problem, some related code in the commit cc4088a (hotplug:
Lightweight get online cpus) is removed.
Signed-off-by: Zhao Chenhui <[email protected]>
---
kernel/cpu.c | 15 +--------------
1 files changed, 1 insertions(+), 14 deletions(-)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bfeeb00..e25b05f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -524,14 +524,13 @@ static int __ref take_cpu_down(void *_param)
/* Requires cpu_add_remove_lock to be held */
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
- int mycpu, err, nr_calls = 0;
+ int err, nr_calls = 0;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
.mod = mod,
.hcpu = hcpu,
};
- cpumask_var_t cpumask;
if (num_online_cpus() == 1)
return -EBUSY;
@@ -539,19 +538,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
if (!cpu_online(cpu))
return -EINVAL;
- /* Move the downtaker off the unplug cpu */
- if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
- return -ENOMEM;
- cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
- set_cpus_allowed_ptr(current, cpumask);
- free_cpumask_var(cpumask);
migrate_disable();
- mycpu = smp_processor_id();
- if (mycpu == cpu) {
- printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
- migrate_enable();
- return -EBUSY;
- }
cpu_hotplug_begin();
err = cpu_unplug_begin(cpu);
--
1.7.3
* Zhao Chenhui | 2013-05-16 18:17:19 [+0800]:
>We found a problem. When a cpu is brought down using _cpu_down(),
>the corresponding cpu bit in the cpus_allowed of the current task is
>cleared. But this bit will not be set when the same cpu is online again.
>Then, the current task and its child processes will not be allowed to
>run on this cpu.
Isn't this what should happen? This also happens on mainline if you
leave the RT bits out, right? You should be able to put the task back on
the CPU once the CPU is up again.
Sebastian
On Fri, Jun 07, 2013 at 11:05:43AM +0200, Sebastian Andrzej Siewior wrote:
> * Zhao Chenhui | 2013-05-16 18:17:19 [+0800]:
>
> >We found a problem. When a cpu is brought down using _cpu_down(),
> >the corresponding cpu bit in the cpus_allowed of the current task is
> >cleared. But this bit will not be set when the same cpu is online again.
> >Then, the current task and its child processes will not be allowed to
> >run on this cpu.
>
> Isn't this what should happen? This also happens on mainline if you
> leave the RT bits out, right? You should be able to put the task back on
> the CPU once the CPU is up again.
>
> Sebastian
>
No. _cpu_down() on mainline do not change the cpus_allowed.
The problem is that the task which turned off cpu2 (for instance)
can not run on cpu2 again after cpu2 is turned on, because cpu2 has been
removed from the cpus_allowed of the task.
The task can put himself back on cpu2 throuhg the system call,
but I think applications should not do this work and do not care which cpu
it is running on in most time.
-Chenhui
* Zhao Chenhui | 2013-06-09 17:59:42 [+0800]:
>No. _cpu_down() on mainline do not change the cpus_allowed.
My bad.
>The problem is that the task which turned off cpu2 (for instance)
>can not run on cpu2 again after cpu2 is turned on, because cpu2 has been
>removed from the cpus_allowed of the task.
>
>The task can put himself back on cpu2 throuhg the system call,
>but I think applications should not do this work and do not care which cpu
>it is running on in most time.
The mask needs to be changed because you may not be on the CPU while it
is going down. What do you think about:
Subject: [PATCH] kernel/hotplug: restore original cpu mask oncpu/down
If a task which is allowed to run only on CPU X puts CPU Y down then it
will be allowed on all CPUs but the on CPU Y after it comes back from
kernel. This patch ensures that we don't lose the initial setting unless
the CPU the task is running is going down.
Cc: [email protected]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
kernel/cpu.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3acf17d..f5ad8e1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -545,6 +545,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
.hcpu = hcpu,
};
cpumask_var_t cpumask;
+ cpumask_var_t cpumask_org;
if (num_online_cpus() == 1)
return -EBUSY;
@@ -555,6 +556,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
/* Move the downtaker off the unplug cpu */
if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
return -ENOMEM;
+ if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
+ free_cpumask_var(cpumask);
+ return -ENOMEM;
+ }
+
+ cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
set_cpus_allowed_ptr(current, cpumask);
free_cpumask_var(cpumask);
@@ -563,7 +570,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
if (mycpu == cpu) {
printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
migrate_enable();
- return -EBUSY;
+ err = -EBUSY;
+ goto restore_cpus;
}
cpu_hotplug_begin();
@@ -622,6 +630,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
cpu_hotplug_done();
if (!err)
cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
+restore_cpus:
+ set_cpus_allowed_ptr(current, cpumask_org);
+ free_cpumask_var(cpumask_org);
return err;
}
--
1.7.10.4
>
>-Chenhui
Sebastian
On Fri, Jun 14, 2013 at 05:29:17PM +0200, Sebastian Andrzej Siewior wrote:
> * Zhao Chenhui | 2013-06-09 17:59:42 [+0800]:
>
> >No. _cpu_down() on mainline do not change the cpus_allowed.
> My bad.
>
> >The problem is that the task which turned off cpu2 (for instance)
> >can not run on cpu2 again after cpu2 is turned on, because cpu2 has been
> >removed from the cpus_allowed of the task.
> >
> >The task can put himself back on cpu2 throuhg the system call,
> >but I think applications should not do this work and do not care which cpu
> >it is running on in most time.
>
> The mask needs to be changed because you may not be on the CPU while it
> is going down. What do you think about:
>
I don't think it is necessary to change the mask. migration_call() invoked by
the cpu notify "CPU_DYING" will remove all running tasks from the dying cpu.
Even if the current task is running on the dying cpu, it will be transfered
to another online cpu.
I guess that changing the mask benefits the latency of the system.
Please correct me.
-Chenhui
> Subject: [PATCH] kernel/hotplug: restore original cpu mask oncpu/down
>
> If a task which is allowed to run only on CPU X puts CPU Y down then it
> will be allowed on all CPUs but the on CPU Y after it comes back from
> kernel. This patch ensures that we don't lose the initial setting unless
> the CPU the task is running is going down.
>
> Cc: [email protected]
> Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
> ---
> kernel/cpu.c | 13 ++++++++++++-
> 1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/cpu.c b/kernel/cpu.c
> index 3acf17d..f5ad8e1 100644
> --- a/kernel/cpu.c
> +++ b/kernel/cpu.c
> @@ -545,6 +545,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
> .hcpu = hcpu,
> };
> cpumask_var_t cpumask;
> + cpumask_var_t cpumask_org;
>
> if (num_online_cpus() == 1)
> return -EBUSY;
> @@ -555,6 +556,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
> /* Move the downtaker off the unplug cpu */
> if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
> return -ENOMEM;
> + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) {
> + free_cpumask_var(cpumask);
> + return -ENOMEM;
> + }
> +
> + cpumask_copy(cpumask_org, tsk_cpus_allowed(current));
> cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu));
> set_cpus_allowed_ptr(current, cpumask);
> free_cpumask_var(cpumask);
> @@ -563,7 +570,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
> if (mycpu == cpu) {
> printk(KERN_ERR "Yuck! Still on unplug CPU\n!");
> migrate_enable();
> - return -EBUSY;
> + err = -EBUSY;
> + goto restore_cpus;
> }
>
> cpu_hotplug_begin();
> @@ -622,6 +630,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
> cpu_hotplug_done();
> if (!err)
> cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
> +restore_cpus:
> + set_cpus_allowed_ptr(current, cpumask_org);
> + free_cpumask_var(cpumask_org);
> return err;
> }
>
On 06/17/2013 12:48 PM, Zhao Chenhui wrote:
> I don't think it is necessary to change the mask. migration_call() invoked by
> the cpu notify "CPU_DYING" will remove all running tasks from the dying cpu.
> Even if the current task is running on the dying cpu, it will be transfered
> to another online cpu.
I had here hiccups if the task was running on the CPU which should go
down.
> I guess that changing the mask benefits the latency of the system.
> Please correct me.
I don't get this. Lets say your system has CPUs 0-15 and you pin your
application to CPU0. After this application brings CPU15 down it is
allowed to run on CPUs 0-14. This is wrong and has been corrected. The
CPU down mechanism should not affected CPU mask of the application.
>
> -Chenhui
>
Sebastian