2014-02-17 17:19:09

by Oleg Nesterov

[permalink] [raw]
Subject: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On 02/13, Jiri Olsa wrote:
>
> hi,
> not sure you'd be interested nor if I can reproduce it,
> but got another workqueue warning
>
> jirka
>
> [ 4324.514324] ------------[ cut here ]------------
> [ 4324.514547] WARNING: CPU: 0 PID: 12 at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()
> [ 4324.514933] Modules linked in:
> [ 4324.515076] CPU: 0 PID: 12 Comm: migration/0 Tainted: G W 3.13.0+ #213
> [ 4324.515411] Hardware name: Intel Corporation Montevina platform/To be filled by O.E.M., BIOS AMVACRB1.86C.0066.B00.0805070703 05/07/2008
> [ 4324.515966] 0000000000000009 ffff8800001c9b78 ffffffff81643b6a 0000000000000004
> [ 4324.516320] 0000000000000000 ffff8800001c9bb8 ffffffff81045a7c ffff88007a9d35c0
> [ 4324.516674] 0000000000000001 ffff88007a9d35c0 ffff8800751828f8 0000000000000086
> [ 4324.517027] Call Trace:
> [ 4324.517141] [<ffffffff81643b6a>] dump_stack+0x4f/0x7c
> [ 4324.517374] [<ffffffff81045a7c>] warn_slowpath_common+0x8c/0xc0
> [ 4324.517647] [<ffffffff81045aca>] warn_slowpath_null+0x1a/0x20
> [ 4324.517912] [<ffffffff810663f3>] wq_worker_waking_up+0x53/0x70
> [ 4324.518181] [<ffffffff81079f09>] ttwu_do_activate.constprop.98+0x59/0x70
> [ 4324.518489] [<ffffffff8107d99f>] try_to_wake_up+0x1cf/0x2e0
> [ 4324.518745] [<ffffffff8107dac2>] default_wake_function+0x12/0x20
> [ 4324.519022] [<ffffffff8108cad5>] __wake_up_common+0x55/0x90
> [ 4324.519279] [<ffffffff8107a3f0>] ? __migrate_task+0x1a0/0x1a0
> [ 4324.519543] [<ffffffff8108cb23>] __wake_up_locked+0x13/0x20
> [ 4324.519799] [<ffffffff8108d4d2>] complete+0x42/0x60
> [ 4324.520024] [<ffffffff8107a424>] ? migration_cpu_stop+0x34/0x40
> [ 4324.520297] [<ffffffff810d207d>] cpu_stop_signal_done+0x2d/0x30
> [ 4324.520570] [<ffffffff810d226f>] cpu_stopper_thread+0xaf/0x130
> [ 4324.520838] [<ffffffff81092f0e>] ? put_lock_stats.isra.18+0xe/0x30
> [ 4324.521122] [<ffffffff8164dded>] ? _raw_spin_unlock_irqrestore+0x6d/0x80
> [ 4324.521430] [<ffffffff8107af61>] ? get_parent_ip+0x11/0x50
> [ 4324.521684] [<ffffffff81074e91>] smpboot_thread_fn+0x1a1/0x2b0
> [ 4324.521952] [<ffffffff81074cf0>] ? SyS_setgroups+0x150/0x150
> [ 4324.522213] [<ffffffff8106db04>] kthread+0xe4/0x100
> [ 4324.522439] [<ffffffff816498a8>] ? wait_for_common+0xd8/0x160
> [ 4324.522703] [<ffffffff8106da20>] ? __init_kthread_worker+0x70/0x70
> [ 4324.522988] [<ffffffff8165682c>] ret_from_fork+0x7c/0xb0
> [ 4324.523233] [<ffffffff8106da20>] ? __init_kthread_worker+0x70/0x70
> [ 4324.523517] ---[ end trace 8659853860e530bc ]---

And with this debugging patch

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2649,6 +2649,9 @@ pick_next_task(struct rq *rq)
* - return from syscall or exception to user-space
* - return from interrupt-handler to user-space
*/
+
+void ret_from_sched(void);
+
static void __sched __schedule(void)
{
struct task_struct *prev, *next;
@@ -2733,6 +2736,9 @@ need_resched:
sched_preempt_enable_no_resched();
if (need_resched())
goto need_resched;
+
+ if (current->flags & PF_WQ_WORKER)
+ ret_from_sched();
}

static inline void sched_submit_work(struct task_struct *tsk)
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -108,6 +108,8 @@ enum {
WQ_NAME_LEN = 24,
};

+#define WQ_MARK (1 << 10)
+
/*
* Structure fields follow one of the following exclusion rules.
*
@@ -826,11 +828,22 @@ void wq_worker_waking_up(struct task_struct *task, int cpu)
struct worker *worker = kthread_data(task);

if (!(worker->flags & WORKER_NOT_RUNNING)) {
- WARN_ON_ONCE(worker->pool->cpu != cpu);
+ if (worker->pool->cpu != cpu) {
+ pr_crit("WARN: %d %s\n", worker->task->pid, worker->task->comm);
+ worker->flags |= WQ_MARK;
+ }
atomic_inc(&worker->pool->nr_running);
}
}

+void ret_from_sched(void)
+{
+ struct worker *worker = kthread_data(current);
+
+ if (WARN_ON(worker->flags & WQ_MARK))
+ worker->flags &= ~WQ_MARK;
+}
+
/**
* wq_worker_sleeping - a worker is going to sleep
* @task: task going to sleep

Jiri got the following:

[ 3438.192852] WARN: 29668 kworker/0:2
[ 3438.193022] ------------[ cut here ]------------
[ 3438.193391] ------------[ cut here ]------------
[ 3438.193606] WARNING: CPU: 1 PID: 29668 at kernel/workqueue.c:843 ret_from_sched+0x38/0x50()
[ 3438.193988] Modules linked in:
[ 3438.194132] CPU: 1 PID: 29668 Comm: kworker/0:2 Tainted: G W 3.13.0+ #238
[ 3438.194491] Hardware name: Intel Corporation Montevina platform/To be filled by O.E.M., BIOS AMVACRB1.86C.0066.B00.0805070703 05/07/2008
[ 3438.195055] Workqueue: events_freezable thermal_zone_device_check
[ 3438.195350] 0000000000000009 ffff88007b5b94c8 ffffffff81642f0a 00000000000015c0
[ 3438.195707] 0000000000000000 ffff88007b5b9508 ffffffff81045a7c ffff88007b5b94f8
[ 3438.196064] ffff88007a9d35c0 0000000000000000 ffff8800746b5a00 0000000000000001
[ 3438.196426] Call Trace:
[ 3438.196540] [<ffffffff81642f0a>] dump_stack+0x4f/0x7c
[ 3438.196775] [<ffffffff81045a7c>] warn_slowpath_common+0x8c/0xc0
[ 3438.197049] [<ffffffff81045aca>] warn_slowpath_null+0x1a/0x20
[ 3438.197327] [<ffffffff81066448>] ret_from_sched+0x38/0x50
[ 3438.197578] [<ffffffff81647a9f>] __schedule+0x37f/0xb00
[ 3438.197823] [<ffffffff810951f5>] ? mark_held_locks+0x95/0x140
[ 3438.198091] [<ffffffff8164c795>] ? _raw_spin_lock_irqsave+0x25/0x90
[ 3438.198389] [<ffffffff816486de>] ? preempt_schedule_irq+0x3e/0x70
[ 3438.198671] [<ffffffff816486e4>] preempt_schedule_irq+0x44/0x70
[ 3438.198943] [<ffffffff8164da70>] retint_kernel+0x20/0x30
[ 3438.199191] [<ffffffff810a28eb>] ? vprintk_emit+0x18b/0x510
[ 3438.199459] [<ffffffff81066448>] ? ret_from_sched+0x38/0x50
[ 3438.199718] [<ffffffff816412a6>] printk+0x4d/0x4f
[ 3438.199936] [<ffffffff81066448>] ? ret_from_sched+0x38/0x50
[ 3438.200199] [<ffffffff81045a33>] warn_slowpath_common+0x43/0xc0
[ 3438.200475] [<ffffffff81045aca>] warn_slowpath_null+0x1a/0x20
[ 3438.200740] [<ffffffff81066448>] ret_from_sched+0x38/0x50
[ 3438.200989] [<ffffffff81647a9f>] __schedule+0x37f/0xb00
[ 3438.201240] [<ffffffff81097749>] ? __lock_acquire+0x479/0x21d0
[ 3438.201510] [<ffffffff8106642b>] ? ret_from_sched+0x1b/0x50
[ 3438.201768] [<ffffffff8100a9b5>] ? native_sched_clock+0x85/0xd0
[ 3438.202042] [<ffffffff816482e9>] schedule+0x29/0x70
[ 3438.202273] [<ffffffff8164715c>] schedule_timeout+0x19c/0x290
[ 3438.202538] [<ffffffff81092f5e>] ? put_lock_stats.isra.18+0xe/0x30
[ 3438.202824] [<ffffffff8164d1e0>] ? _raw_spin_unlock_irq+0x30/0x60
[ 3438.203106] [<ffffffff8107afb1>] ? get_parent_ip+0x11/0x50
[ 3438.203368] [<ffffffff8165172b>] ? preempt_count_sub+0x7b/0x100
[ 3438.203642] [<ffffffff81648c4d>] wait_for_common+0xcd/0x160
[ 3438.203900] [<ffffffff8107db00>] ? try_to_wake_up+0x2e0/0x2e0
[ 3438.204165] [<ffffffff81648cfd>] wait_for_completion+0x1d/0x20
[ 3438.204441] [<ffffffff810d27ca>] stop_one_cpu+0x6a/0x90
[ 3438.204684] [<ffffffff8107a440>] ? __migrate_task+0x1a0/0x1a0
[ 3438.204949] [<ffffffff8108d508>] ? complete+0x28/0x60
[ 3438.205182] [<ffffffff8107e159>] set_cpus_allowed_ptr+0x109/0x110
[ 3438.205474] [<ffffffff81303333>] acpi_processor_set_throttling+0x1b1/0x276
[ 3438.205792] [<ffffffff81304d45>] processor_set_cur_state+0x55/0x60
[ 3438.206078] [<ffffffff8147039d>] thermal_cdev_update+0x9d/0xc0
[ 3438.206354] [<ffffffff81471f21>] step_wise_throttle+0x61/0xa0
[ 3438.206619] [<ffffffff81470da3>] handle_thermal_trip+0x53/0x150
[ 3438.218006] [<ffffffff81470f25>] thermal_zone_device_update+0x75/0xb0
[ 3438.229503] [<ffffffff81471a55>] thermal_zone_device_check+0x15/0x20
[ 3438.240967] [<ffffffff810648ac>] process_one_work+0x1dc/0x660
[ 3438.252429] [<ffffffff81064842>] ? process_one_work+0x172/0x660
[ 3438.263823] [<ffffffff81065881>] worker_thread+0x121/0x380
[ 3438.275116] [<ffffffff8108d52d>] ? complete+0x4d/0x60
[ 3438.286413] [<ffffffff8164d17b>] ? _raw_spin_unlock_irqrestore+0x4b/0x80
[ 3438.297786] [<ffffffff81065760>] ? manage_workers.isra.25+0x2b0/0x2b0
[ 3438.309004] [<ffffffff8106db54>] kthread+0xe4/0x100
[ 3438.319983] [<ffffffff8165172b>] ? preempt_count_sub+0x7b/0x100
[ 3438.330842] [<ffffffff8106da70>] ? __init_kthread_worker+0x70/0x70
[ 3438.341534] [<ffffffff81655bac>] ret_from_fork+0x7c/0xb0
[ 3438.352137] [<ffffffff8106da70>] ? __init_kthread_worker+0x70/0x70
[ 3438.362782] ---[ end trace 579ce178e4febac3 ]---

acpi_processor_set_throttling() plays with set_cpus_allowed_ptr(current),
this is obviously wrong, and the worker is bound.

Oleg.


2014-02-18 22:49:16

by Tejun Heo

[permalink] [raw]
Subject: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

Hello,

On Mon, Feb 17, 2014 at 06:19:00PM +0100, Oleg Nesterov wrote:
> acpi_processor_set_throttling() plays with set_cpus_allowed_ptr(current),
> this is obviously wrong, and the worker is bound.

Umm... yeah, anything running on workqueues shouldn't be diddling with
cpu affinity. The function even has /* FIXME: use work_on_cpu() */ in
it. I suppose it's about time to actually implement that?

Thanks.

--
tejun

2014-02-20 13:28:29

by Lan Tianyu

[permalink] [raw]
Subject: Re: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On 02/19/2014 06:49 AM, Tejun Heo wrote:
> Hello,
>
> On Mon, Feb 17, 2014 at 06:19:00PM +0100, Oleg Nesterov wrote:
>> acpi_processor_set_throttling() plays with set_cpus_allowed_ptr(current),
>> this is obviously wrong, and the worker is bound.
>
> Umm... yeah, anything running on workqueues shouldn't be diddling with
> cpu affinity. The function even has /* FIXME: use work_on_cpu() */ in
> it. I suppose it's about time to actually implement that?
>
> Thanks.
>

Hi Jiri:
Could you try this patch which reworks ACPI processor throttling with
work_on_cpu()?


diff --git a/drivers/acpi/processor_throttling.c
b/drivers/acpi/processor_throttling.c
index 28baa05..9122d64 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -1060,6 +1060,23 @@ static int
acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
return 0;
}

+struct acpi_processor_throttling_arg {
+ struct acpi_processor *pr;
+ int target_state;
+ bool force;
+};
+
+static long acpi_processor_throttling_fn(void *data)
+{
+ struct acpi_processor_throttling_arg *arg = data;
+ struct acpi_processor *pr = arg->pr;
+ struct acpi_processor_throttling *p_throttling = &pr->throttling;
+
+ return p_throttling->acpi_processor_set_throttling(pr,
+ arg->target_state, arg->force);
+}
+
+
int acpi_processor_set_throttling(struct acpi_processor *pr,
int state, bool force)
{
@@ -1068,7 +1085,8 @@ int acpi_processor_set_throttling(struct
acpi_processor *pr,
unsigned int i;
struct acpi_processor *match_pr;
struct acpi_processor_throttling *p_throttling;
- struct throttling_tstate t_state;
+ struct acpi_processor_throttling_arg arg;
+ struct throttling_tstate t_state;
cpumask_var_t online_throttling_cpus;

if (!pr)
@@ -1083,10 +1101,8 @@ int acpi_processor_set_throttling(struct
acpi_processor *pr,
if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL))
return -ENOMEM;

- if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) {
- free_cpumask_var(saved_mask);
+ if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL))
return -ENOMEM;
- }

if (cpu_is_offline(pr->id)) {
/*
@@ -1096,7 +1112,6 @@ int acpi_processor_set_throttling(struct
acpi_processor *pr,
return -ENODEV;
}

- cpumask_copy(saved_mask, &current->cpus_allowed);
t_state.target_state = state;
p_throttling = &(pr->throttling);
cpumask_and(online_throttling_cpus, cpu_online_mask,
@@ -1118,14 +1133,10 @@ int acpi_processor_set_throttling(struct
acpi_processor *pr,
* it can be called only for the cpu pointed by pr.
*/
if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
- /* FIXME: use work_on_cpu() */
- if (set_cpus_allowed_ptr(current, cpumask_of(pr->id))) {
- /* Can't migrate to the pr->id CPU. Exit */
- ret = -ENODEV;
- goto exit;
- }
- ret = p_throttling->acpi_processor_set_throttling(pr,
- t_state.target_state, force);
+ arg.pr = pr;
+ arg.target_state = state;
+ arg.force = force;
+ ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, &arg);
} else {
/*
* When the T-state coordination is SW_ALL or HW_ALL,
@@ -1153,13 +1164,11 @@ int acpi_processor_set_throttling(struct
acpi_processor *pr,
"on CPU %d\n", i));
continue;
}
- t_state.cpu = i;
- /* FIXME: use work_on_cpu() */
- if (set_cpus_allowed_ptr(current, cpumask_of(i)))
- continue;
- ret = match_pr->throttling.
- acpi_processor_set_throttling(
- match_pr, t_state.target_state, force);
+
+ arg.pr = match_pr;
+ arg.target_state = state;
+ arg.force = force;
+ ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, &arg);
}
}
/*
@@ -1173,12 +1182,8 @@ int acpi_processor_set_throttling(struct
acpi_processor *pr,
acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE,
&t_state);
}
- /* restore the previous state */
- /* FIXME: use work_on_cpu() */
- set_cpus_allowed_ptr(current, saved_mask);
-exit:
+
free_cpumask_var(online_throttling_cpus);
- free_cpumask_var(saved_mask);
return ret;
}


--
Best Regards
Tianyu Lan


Attachments:
pt.patch (3.38 kB)

2014-02-20 14:31:11

by Jiri Olsa

[permalink] [raw]
Subject: Re: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On Thu, Feb 20, 2014 at 09:28:26PM +0800, Lan Tianyu wrote:
> On 02/19/2014 06:49 AM, Tejun Heo wrote:
> >Hello,
> >
> >On Mon, Feb 17, 2014 at 06:19:00PM +0100, Oleg Nesterov wrote:
> >>acpi_processor_set_throttling() plays with set_cpus_allowed_ptr(current),
> >>this is obviously wrong, and the worker is bound.
> >
> >Umm... yeah, anything running on workqueues shouldn't be diddling with
> >cpu affinity. The function even has /* FIXME: use work_on_cpu() */ in
> >it. I suppose it's about time to actually implement that?
> >
> >Thanks.
> >
>
> Hi Jiri:
> Could you try this patch which reworks ACPI processor throttling
> with work_on_cpu()?

hum, I've got difficulties to apply it..

[jolsa@krava2 linux-perf]$ git am /tmp/wq/
Applying: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()
fatal: corrupt patch at line 8
Patch failed at 0001 WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

jirka

2014-02-20 14:46:06

by Lan Tianyu

[permalink] [raw]
Subject: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On 02/20/2014 10:31 PM, Jiri Olsa wrote:
> On Thu, Feb 20, 2014 at 09:28:26PM +0800, Lan Tianyu wrote:
>> On 02/19/2014 06:49 AM, Tejun Heo wrote:
>>> Hello,
>>>
>>> On Mon, Feb 17, 2014 at 06:19:00PM +0100, Oleg Nesterov wrote:
>>>> acpi_processor_set_throttling() plays with set_cpus_allowed_ptr(current),
>>>> this is obviously wrong, and the worker is bound.
>>>
>>> Umm... yeah, anything running on workqueues shouldn't be diddling with
>>> cpu affinity. The function even has /* FIXME: use work_on_cpu() */ in
>>> it. I suppose it's about time to actually implement that?
>>>
>>> Thanks.
>>>
>>
>> Hi Jiri:
>> Could you try this patch which reworks ACPI processor throttling
>> with work_on_cpu()?
>
> hum, I've got difficulties to apply it..
>
> [jolsa@krava2 linux-perf]$ git am /tmp/wq/
> Applying: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()
> fatal: corrupt patch at line 8
> Patch failed at 0001 WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()
>
> jirka
>
Sorry. Please try the attachment again.

--
Best Regards
Tianyu Lan


Attachments:
0001-ACPI-Processor-Rework-processor-throttling-with-work.patch (3.71 kB)

2014-02-20 14:53:18

by Jiri Olsa

[permalink] [raw]
Subject: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On Thu, Feb 20, 2014 at 10:45:32PM +0800, Lan Tianyu wrote:
> On 02/20/2014 10:31 PM, Jiri Olsa wrote:
> >On Thu, Feb 20, 2014 at 09:28:26PM +0800, Lan Tianyu wrote:
> >>On 02/19/2014 06:49 AM, Tejun Heo wrote:
> >>>Hello,
> >>>
> >>>On Mon, Feb 17, 2014 at 06:19:00PM +0100, Oleg Nesterov wrote:
> >>>>acpi_processor_set_throttling() plays with set_cpus_allowed_ptr(current),
> >>>>this is obviously wrong, and the worker is bound.
> >>>
> >>>Umm... yeah, anything running on workqueues shouldn't be diddling with
> >>>cpu affinity. The function even has /* FIXME: use work_on_cpu() */ in
> >>>it. I suppose it's about time to actually implement that?
> >>>
> >>>Thanks.
> >>>
> >>
> >>Hi Jiri:
> >> Could you try this patch which reworks ACPI processor throttling
> >>with work_on_cpu()?
> >
> >hum, I've got difficulties to apply it..
> >
> >[jolsa@krava2 linux-perf]$ git am /tmp/wq/
> >Applying: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()
> >fatal: corrupt patch at line 8
> >Patch failed at 0001 WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()
> >
> >jirka
> >
> Sorry. Please try the attachment again.

np, this one got applied cleanly ;-)

I started the workload.. it took several hours to hit
it before, I'll let you know

jirka

2014-02-20 15:13:26

by Oleg Nesterov

[permalink] [raw]
Subject: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On 02/20, Lan Tianyu wrote:
>
> @@ -1096,7 +1112,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
> return -ENODEV;
> }
>
> - cpumask_copy(saved_mask, &current->cpus_allowed);

Yes, but the patch forgets to kill alloc_cpumask_var(&saved_mask)
and saved_mask itself ;)

Oleg.

2014-02-20 15:17:29

by Oleg Nesterov

[permalink] [raw]
Subject: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On 02/20, Oleg Nesterov wrote:
>
> On 02/20, Lan Tianyu wrote:
> >
> > @@ -1096,7 +1112,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
> > return -ENODEV;
> > }
> >
> > - cpumask_copy(saved_mask, &current->cpus_allowed);
>
> Yes, but the patch forgets to kill alloc_cpumask_var(&saved_mask)
> and saved_mask itself ;)

And it seems that acpi_processor_set_throttling() doesn't need
online_throttling_cpus at all. It could use
for_each_cpu_and(cpu_online_mask, p_throttling->shared_cpu_map).

Oleg.

2014-02-21 02:34:20

by Lan Tianyu

[permalink] [raw]
Subject: Re: WARNING at kernel/workqueue.c:829 wq_worker_waking_up+0x53/0x70()

On 02/20/2014 11:17 PM, Oleg Nesterov wrote:
> On 02/20, Oleg Nesterov wrote:
>>
>> On 02/20, Lan Tianyu wrote:
>>>
>>> @@ -1096,7 +1112,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
>>> return -ENODEV;
>>> }
>>>
>>> - cpumask_copy(saved_mask, &current->cpus_allowed);
>>
>> Yes, but the patch forgets to kill alloc_cpumask_var(&saved_mask)
>> and saved_mask itself ;
>
> And it seems that acpi_processor_set_throttling() doesn't need
> online_throttling_cpus at all. It could use
> for_each_cpu_and(cpu_online_mask, p_throttling->shared_cpu_map).
>


Yes, oops and will update soon. Thanks.


> Oleg.
>


--
Best Regards
Tianyu Lan

2014-02-21 05:36:03

by Lan Tianyu

[permalink] [raw]
Subject: [PATCH] ACPI/Processor: Rework processor throttling with work_on_cpu()

acpi_processor_set_throttling() uses set_cpus_allowed_ptr() to make
sure struct acpi_processor->acpi_processor_set_throttling() callback
run on associated cpu. But the function maybe called in a worker which
has been bound to a cpu. The patch is to replace set_cpus_allowed_ptr()
with work_on_cpu().

Signed-off-by: Lan Tianyu <[email protected]>
---
drivers/acpi/processor_throttling.c | 70 +++++++++++++++++--------------------
1 file changed, 33 insertions(+), 37 deletions(-)

diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index 28baa05..2db105a 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -56,6 +56,12 @@ struct throttling_tstate {
int target_state; /* target T-state */
};

+struct acpi_processor_throttling_arg {
+ struct acpi_processor *pr;
+ int target_state;
+ bool force;
+};
+
#define THROTTLING_PRECHANGE (1)
#define THROTTLING_POSTCHANGE (2)

@@ -1060,16 +1066,25 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
return 0;
}

+static long acpi_processor_throttling_fn(void *data)
+{
+ struct acpi_processor_throttling_arg *arg = data;
+ struct acpi_processor *pr = arg->pr;
+ struct acpi_processor_throttling *p_throttling = &pr->throttling;
+
+ return p_throttling->acpi_processor_set_throttling(pr,
+ arg->target_state, arg->force);
+}
+
int acpi_processor_set_throttling(struct acpi_processor *pr,
int state, bool force)
{
- cpumask_var_t saved_mask;
int ret = 0;
unsigned int i;
struct acpi_processor *match_pr;
struct acpi_processor_throttling *p_throttling;
+ struct acpi_processor_throttling_arg arg;
struct throttling_tstate t_state;
- cpumask_var_t online_throttling_cpus;

if (!pr)
return -EINVAL;
@@ -1080,14 +1095,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
if ((state < 0) || (state > (pr->throttling.state_count - 1)))
return -EINVAL;

- if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL))
- return -ENOMEM;
-
- if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) {
- free_cpumask_var(saved_mask);
- return -ENOMEM;
- }
-
if (cpu_is_offline(pr->id)) {
/*
* the cpu pointed by pr->id is offline. Unnecessary to change
@@ -1096,17 +1103,15 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
return -ENODEV;
}

- cpumask_copy(saved_mask, &current->cpus_allowed);
t_state.target_state = state;
p_throttling = &(pr->throttling);
- cpumask_and(online_throttling_cpus, cpu_online_mask,
- p_throttling->shared_cpu_map);
+
/*
* The throttling notifier will be called for every
* affected cpu in order to get one proper T-state.
* The notifier event is THROTTLING_PRECHANGE.
*/
- for_each_cpu(i, online_throttling_cpus) {
+ for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) {
t_state.cpu = i;
acpi_processor_throttling_notifier(THROTTLING_PRECHANGE,
&t_state);
@@ -1118,21 +1123,18 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
* it can be called only for the cpu pointed by pr.
*/
if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
- /* FIXME: use work_on_cpu() */
- if (set_cpus_allowed_ptr(current, cpumask_of(pr->id))) {
- /* Can't migrate to the pr->id CPU. Exit */
- ret = -ENODEV;
- goto exit;
- }
- ret = p_throttling->acpi_processor_set_throttling(pr,
- t_state.target_state, force);
+ arg.pr = pr;
+ arg.target_state = state;
+ arg.force = force;
+ ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, &arg);
} else {
/*
* When the T-state coordination is SW_ALL or HW_ALL,
* it is necessary to set T-state for every affected
* cpus.
*/
- for_each_cpu(i, online_throttling_cpus) {
+ for_each_cpu_and(i, cpu_online_mask,
+ p_throttling->shared_cpu_map) {
match_pr = per_cpu(processors, i);
/*
* If the pointer is invalid, we will report the
@@ -1153,13 +1155,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
"on CPU %d\n", i));
continue;
}
- t_state.cpu = i;
- /* FIXME: use work_on_cpu() */
- if (set_cpus_allowed_ptr(current, cpumask_of(i)))
- continue;
- ret = match_pr->throttling.
- acpi_processor_set_throttling(
- match_pr, t_state.target_state, force);
+
+ arg.pr = match_pr;
+ arg.target_state = state;
+ arg.force = force;
+ ret = work_on_cpu(pr->id, acpi_processor_throttling_fn,
+ &arg);
}
}
/*
@@ -1168,17 +1169,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
* affected cpu to update the T-states.
* The notifier event is THROTTLING_POSTCHANGE
*/
- for_each_cpu(i, online_throttling_cpus) {
+ for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) {
t_state.cpu = i;
acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE,
&t_state);
}
- /* restore the previous state */
- /* FIXME: use work_on_cpu() */
- set_cpus_allowed_ptr(current, saved_mask);
-exit:
- free_cpumask_var(online_throttling_cpus);
- free_cpumask_var(saved_mask);
+
return ret;
}

--
1.8.2.1

2014-02-21 10:06:40

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH] ACPI/Processor: Rework processor throttling with work_on_cpu()

On Fri, Feb 21, 2014 at 01:35:45PM +0800, Lan Tianyu wrote:
> acpi_processor_set_throttling() uses set_cpus_allowed_ptr() to make
> sure struct acpi_processor->acpi_processor_set_throttling() callback
> run on associated cpu. But the function maybe called in a worker which
> has been bound to a cpu. The patch is to replace set_cpus_allowed_ptr()
> with work_on_cpu().

testing the new patch.. so far so good ;-)

jirka

2014-02-21 17:07:20

by Jiri Olsa

[permalink] [raw]
Subject: Re: [PATCH] ACPI/Processor: Rework processor throttling with work_on_cpu()

On Fri, Feb 21, 2014 at 11:06:30AM +0100, Jiri Olsa wrote:
> On Fri, Feb 21, 2014 at 01:35:45PM +0800, Lan Tianyu wrote:
> > acpi_processor_set_throttling() uses set_cpus_allowed_ptr() to make
> > sure struct acpi_processor->acpi_processor_set_throttling() callback
> > run on associated cpu. But the function maybe called in a worker which
> > has been bound to a cpu. The patch is to replace set_cpus_allowed_ptr()
> > with work_on_cpu().
>
> testing the new patch.. so far so good ;-)

ook.. survived whole day under the test workload without the warning ;-)

jirka

2014-02-24 09:33:54

by Lan Tianyu

[permalink] [raw]
Subject: Re: [PATCH] ACPI/Processor: Rework processor throttling with work_on_cpu()

On 2014年02月22日 01:07, Jiri Olsa wrote:
> On Fri, Feb 21, 2014 at 11:06:30AM +0100, Jiri Olsa wrote:
>> On Fri, Feb 21, 2014 at 01:35:45PM +0800, Lan Tianyu wrote:
>>> acpi_processor_set_throttling() uses set_cpus_allowed_ptr() to make
>>> sure struct acpi_processor->acpi_processor_set_throttling() callback
>>> run on associated cpu. But the function maybe called in a worker which
>>> has been bound to a cpu. The patch is to replace set_cpus_allowed_ptr()
>>> with work_on_cpu().
>>
>> testing the new patch.. so far so good ;-)
>
> ook.. survived whole day under the test workload without the warning ;-)
>
> jirka
>

Hi Jirka:
Great. Thanks for test:).


--
Best regards
Tianyu Lan

2014-02-26 01:08:38

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] ACPI/Processor: Rework processor throttling with work_on_cpu()

On Friday, February 21, 2014 01:35:45 PM Lan Tianyu wrote:
> acpi_processor_set_throttling() uses set_cpus_allowed_ptr() to make
> sure struct acpi_processor->acpi_processor_set_throttling() callback
> run on associated cpu. But the function maybe called in a worker which
> has been bound to a cpu. The patch is to replace set_cpus_allowed_ptr()
> with work_on_cpu().
>
> Signed-off-by: Lan Tianyu <[email protected]>
> ---
> drivers/acpi/processor_throttling.c | 70 +++++++++++++++++--------------------
> 1 file changed, 33 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
> index 28baa05..2db105a 100644
> --- a/drivers/acpi/processor_throttling.c
> +++ b/drivers/acpi/processor_throttling.c
> @@ -56,6 +56,12 @@ struct throttling_tstate {
> int target_state; /* target T-state */
> };
>
> +struct acpi_processor_throttling_arg {
> + struct acpi_processor *pr;
> + int target_state;
> + bool force;
> +};
> +
> #define THROTTLING_PRECHANGE (1)
> #define THROTTLING_POSTCHANGE (2)
>
> @@ -1060,16 +1066,25 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
> return 0;
> }
>
> +static long acpi_processor_throttling_fn(void *data)
> +{
> + struct acpi_processor_throttling_arg *arg = data;
> + struct acpi_processor *pr = arg->pr;
> + struct acpi_processor_throttling *p_throttling = &pr->throttling;
> +
> + return p_throttling->acpi_processor_set_throttling(pr,
> + arg->target_state, arg->force);

What about doing

return pr->throttling.acpi_processor_set_throttling(...);

directly without using the extra p_throttling pointer?

> +}
> +
> int acpi_processor_set_throttling(struct acpi_processor *pr,
> int state, bool force)
> {
> - cpumask_var_t saved_mask;
> int ret = 0;
> unsigned int i;
> struct acpi_processor *match_pr;
> struct acpi_processor_throttling *p_throttling;
> + struct acpi_processor_throttling_arg arg;
> struct throttling_tstate t_state;
> - cpumask_var_t online_throttling_cpus;
>
> if (!pr)
> return -EINVAL;
> @@ -1080,14 +1095,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
> if ((state < 0) || (state > (pr->throttling.state_count - 1)))
> return -EINVAL;
>
> - if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL))
> - return -ENOMEM;
> -
> - if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) {
> - free_cpumask_var(saved_mask);
> - return -ENOMEM;
> - }
> -
> if (cpu_is_offline(pr->id)) {
> /*
> * the cpu pointed by pr->id is offline. Unnecessary to change
> @@ -1096,17 +1103,15 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
> return -ENODEV;
> }
>
> - cpumask_copy(saved_mask, &current->cpus_allowed);
> t_state.target_state = state;
> p_throttling = &(pr->throttling);
> - cpumask_and(online_throttling_cpus, cpu_online_mask,
> - p_throttling->shared_cpu_map);
> +
> /*
> * The throttling notifier will be called for every
> * affected cpu in order to get one proper T-state.
> * The notifier event is THROTTLING_PRECHANGE.
> */
> - for_each_cpu(i, online_throttling_cpus) {
> + for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) {
> t_state.cpu = i;
> acpi_processor_throttling_notifier(THROTTLING_PRECHANGE,
> &t_state);
> @@ -1118,21 +1123,18 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
> * it can be called only for the cpu pointed by pr.
> */
> if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
> - /* FIXME: use work_on_cpu() */
> - if (set_cpus_allowed_ptr(current, cpumask_of(pr->id))) {
> - /* Can't migrate to the pr->id CPU. Exit */
> - ret = -ENODEV;
> - goto exit;
> - }
> - ret = p_throttling->acpi_processor_set_throttling(pr,
> - t_state.target_state, force);
> + arg.pr = pr;
> + arg.target_state = state;
> + arg.force = force;
> + ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, &arg);
> } else {
> /*
> * When the T-state coordination is SW_ALL or HW_ALL,
> * it is necessary to set T-state for every affected
> * cpus.
> */
> - for_each_cpu(i, online_throttling_cpus) {
> + for_each_cpu_and(i, cpu_online_mask,
> + p_throttling->shared_cpu_map) {
> match_pr = per_cpu(processors, i);
> /*
> * If the pointer is invalid, we will report the
> @@ -1153,13 +1155,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
> "on CPU %d\n", i));
> continue;
> }
> - t_state.cpu = i;
> - /* FIXME: use work_on_cpu() */
> - if (set_cpus_allowed_ptr(current, cpumask_of(i)))
> - continue;
> - ret = match_pr->throttling.
> - acpi_processor_set_throttling(
> - match_pr, t_state.target_state, force);
> +
> + arg.pr = match_pr;
> + arg.target_state = state;
> + arg.force = force;
> + ret = work_on_cpu(pr->id, acpi_processor_throttling_fn,
> + &arg);
> }
> }
> /*
> @@ -1168,17 +1169,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
> * affected cpu to update the T-states.
> * The notifier event is THROTTLING_POSTCHANGE
> */
> - for_each_cpu(i, online_throttling_cpus) {
> + for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) {
> t_state.cpu = i;
> acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE,
> &t_state);
> }
> - /* restore the previous state */
> - /* FIXME: use work_on_cpu() */
> - set_cpus_allowed_ptr(current, saved_mask);
> -exit:
> - free_cpumask_var(online_throttling_cpus);
> - free_cpumask_var(saved_mask);
> +
> return ret;
> }
>
>

--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

2014-02-26 01:14:53

by Lan Tianyu

[permalink] [raw]
Subject: Re: [PATCH] ACPI/Processor: Rework processor throttling with work_on_cpu()

On 2014年02月26日 09:23, Rafael J. Wysocki wrote:
> On Friday, February 21, 2014 01:35:45 PM Lan Tianyu wrote:
>> acpi_processor_set_throttling() uses set_cpus_allowed_ptr() to make
>> sure struct acpi_processor->acpi_processor_set_throttling() callback
>> run on associated cpu. But the function maybe called in a worker which
>> has been bound to a cpu. The patch is to replace set_cpus_allowed_ptr()
>> with work_on_cpu().
>>
>> Signed-off-by: Lan Tianyu <[email protected]>
>> ---
>> drivers/acpi/processor_throttling.c | 70 +++++++++++++++++--------------------
>> 1 file changed, 33 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
>> index 28baa05..2db105a 100644
>> --- a/drivers/acpi/processor_throttling.c
>> +++ b/drivers/acpi/processor_throttling.c
>> @@ -56,6 +56,12 @@ struct throttling_tstate {
>> int target_state; /* target T-state */
>> };
>>
>> +struct acpi_processor_throttling_arg {
>> + struct acpi_processor *pr;
>> + int target_state;
>> + bool force;
>> +};
>> +
>> #define THROTTLING_PRECHANGE (1)
>> #define THROTTLING_POSTCHANGE (2)
>>
>> @@ -1060,16 +1066,25 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
>> return 0;
>> }
>>
>> +static long acpi_processor_throttling_fn(void *data)
>> +{
>> + struct acpi_processor_throttling_arg *arg = data;
>> + struct acpi_processor *pr = arg->pr;
>> + struct acpi_processor_throttling *p_throttling = &pr->throttling;
>> +
>> + return p_throttling->acpi_processor_set_throttling(pr,
>> + arg->target_state, arg->force);
>
> What about doing
>
> return pr->throttling.acpi_processor_set_throttling(...);
>
> directly without using the extra p_throttling pointer?

This is better. I will update soon.

>
>> +}
>> +
>> int acpi_processor_set_throttling(struct acpi_processor *pr,
>> int state, bool force)
>> {
>> - cpumask_var_t saved_mask;
>> int ret = 0;
>> unsigned int i;
>> struct acpi_processor *match_pr;
>> struct acpi_processor_throttling *p_throttling;
>> + struct acpi_processor_throttling_arg arg;
>> struct throttling_tstate t_state;
>> - cpumask_var_t online_throttling_cpus;
>>
>> if (!pr)
>> return -EINVAL;
>> @@ -1080,14 +1095,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
>> if ((state < 0) || (state > (pr->throttling.state_count - 1)))
>> return -EINVAL;
>>
>> - if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL))
>> - return -ENOMEM;
>> -
>> - if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) {
>> - free_cpumask_var(saved_mask);
>> - return -ENOMEM;
>> - }
>> -
>> if (cpu_is_offline(pr->id)) {
>> /*
>> * the cpu pointed by pr->id is offline. Unnecessary to change
>> @@ -1096,17 +1103,15 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
>> return -ENODEV;
>> }
>>
>> - cpumask_copy(saved_mask, &current->cpus_allowed);
>> t_state.target_state = state;
>> p_throttling = &(pr->throttling);
>> - cpumask_and(online_throttling_cpus, cpu_online_mask,
>> - p_throttling->shared_cpu_map);
>> +
>> /*
>> * The throttling notifier will be called for every
>> * affected cpu in order to get one proper T-state.
>> * The notifier event is THROTTLING_PRECHANGE.
>> */
>> - for_each_cpu(i, online_throttling_cpus) {
>> + for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) {
>> t_state.cpu = i;
>> acpi_processor_throttling_notifier(THROTTLING_PRECHANGE,
>> &t_state);
>> @@ -1118,21 +1123,18 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
>> * it can be called only for the cpu pointed by pr.
>> */
>> if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) {
>> - /* FIXME: use work_on_cpu() */
>> - if (set_cpus_allowed_ptr(current, cpumask_of(pr->id))) {
>> - /* Can't migrate to the pr->id CPU. Exit */
>> - ret = -ENODEV;
>> - goto exit;
>> - }
>> - ret = p_throttling->acpi_processor_set_throttling(pr,
>> - t_state.target_state, force);
>> + arg.pr = pr;
>> + arg.target_state = state;
>> + arg.force = force;
>> + ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, &arg);
>> } else {
>> /*
>> * When the T-state coordination is SW_ALL or HW_ALL,
>> * it is necessary to set T-state for every affected
>> * cpus.
>> */
>> - for_each_cpu(i, online_throttling_cpus) {
>> + for_each_cpu_and(i, cpu_online_mask,
>> + p_throttling->shared_cpu_map) {
>> match_pr = per_cpu(processors, i);
>> /*
>> * If the pointer is invalid, we will report the
>> @@ -1153,13 +1155,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
>> "on CPU %d\n", i));
>> continue;
>> }
>> - t_state.cpu = i;
>> - /* FIXME: use work_on_cpu() */
>> - if (set_cpus_allowed_ptr(current, cpumask_of(i)))
>> - continue;
>> - ret = match_pr->throttling.
>> - acpi_processor_set_throttling(
>> - match_pr, t_state.target_state, force);
>> +
>> + arg.pr = match_pr;
>> + arg.target_state = state;
>> + arg.force = force;
>> + ret = work_on_cpu(pr->id, acpi_processor_throttling_fn,
>> + &arg);
>> }
>> }
>> /*
>> @@ -1168,17 +1169,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr,
>> * affected cpu to update the T-states.
>> * The notifier event is THROTTLING_POSTCHANGE
>> */
>> - for_each_cpu(i, online_throttling_cpus) {
>> + for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) {
>> t_state.cpu = i;
>> acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE,
>> &t_state);
>> }
>> - /* restore the previous state */
>> - /* FIXME: use work_on_cpu() */
>> - set_cpus_allowed_ptr(current, saved_mask);
>> -exit:
>> - free_cpumask_var(online_throttling_cpus);
>> - free_cpumask_var(saved_mask);
>> +
>> return ret;
>> }
>>
>>
>


--
Best regards
Tianyu Lan