While booting secondary cpus, online cpus mask is transient and can end up
in below calltrace. Using same cpumask for both schedule and flush of
work resolves the issue.
[ 0.377823] CPU3: Booted secondary processor 0x0000000003 [0x410fd083]
[ 0.379040] ------------[ cut here ]------------
[ 0.383662] WARNING: CPU: 0 PID: 10 at kernel/workqueue.c:3084 __flush_work+0x12c/0x138
[ 0.384850] Modules linked in:
[ 0.385403] CPU: 0 PID: 10 Comm: rcu_tasks_rude_ Not tainted 5.17.0-rc3-v8+ #13
[ 0.386473] Hardware name: Raspberry Pi 4 Model B Rev 1.4 (DT)
[ 0.387289] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 0.388308] pc : __flush_work+0x12c/0x138
[ 0.388970] lr : __flush_work+0x80/0x138
[ 0.389620] sp : ffffffc00aaf3c60
[ 0.390139] x29: ffffffc00aaf3d20 x28: ffffffc009c16af0 x27: ffffff80f761df48
[ 0.391316] x26: 0000000000000004 x25: 0000000000000003 x24: 0000000000000100
[ 0.392493] x23: ffffffffffffffff x22: ffffffc009c16b10 x21: ffffffc009c16b28
[ 0.393668] x20: ffffffc009e53861 x19: ffffff80f77fbf40 x18: 00000000d744fcc9
[ 0.394842] x17: 000000000000000b x16: 00000000000001c2 x15: ffffffc009e57550
[ 0.396016] x14: 0000000000000000 x13: ffffffffffffffff x12: 0000000100000000
[ 0.397190] x11: 0000000000000462 x10: ffffff8040258008 x9 : 0000000100000000
[ 0.398364] x8 : 0000000000000000 x7 : ffffffc0093c8bf4 x6 : 0000000000000000
[ 0.399538] x5 : 0000000000000000 x4 : ffffffc00a976e40 x3 : ffffffc00810444c
[ 0.400711] x2 : 0000000000000004 x1 : 0000000000000000 x0 : 0000000000000000
[ 0.401886] Call trace:
[ 0.402309] __flush_work+0x12c/0x138
[ 0.402941] schedule_on_each_cpu+0x228/0x278
[ 0.403693] rcu_tasks_rude_wait_gp+0x130/0x144
[ 0.404502] rcu_tasks_kthread+0x220/0x254
[ 0.405264] kthread+0x174/0x1ac
[ 0.405837] ret_from_fork+0x10/0x20
[ 0.406456] irq event stamp: 102
[ 0.406966] hardirqs last enabled at (101): [<ffffffc0093c8468>] _raw_spin_unlock_irq+0x78/0xb4
[ 0.408304] hardirqs last disabled at (102): [<ffffffc0093b8270>] el1_dbg+0x24/0x5c
[ 0.409410] softirqs last enabled at (54): [<ffffffc0081b80c8>] local_bh_enable+0xc/0x2c
[ 0.410645] softirqs last disabled at (50): [<ffffffc0081b809c>] local_bh_disable+0xc/0x2c
[ 0.411890] ---[ end trace 0000000000000000 ]---
[ 0.413000] smp: Brought up 1 node, 4 CPUs
[ 0.413762] SMP: Total of 4 processors activated.
Signed-off-by: Padmanabha Srinivasaiah <[email protected]>
---
kernel/workqueue.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 33f1106b4f99..a3f53f859e9d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3326,28 +3326,38 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
*/
int schedule_on_each_cpu(work_func_t func)
{
- int cpu;
struct work_struct __percpu *works;
+ cpumask_var_t sched_cpumask;
+ int cpu, ret = 0;
- works = alloc_percpu(struct work_struct);
- if (!works)
+ if (!alloc_cpumask_var(&sched_cpumask, GFP_KERNEL))
return -ENOMEM;
+ works = alloc_percpu(struct work_struct);
+ if (!works) {
+ ret = -ENOMEM;
+ goto free_cpumask;
+ }
+
cpus_read_lock();
- for_each_online_cpu(cpu) {
+ cpumask_copy(sched_cpumask, cpu_online_mask);
+ for_each_cpu_and(cpu, sched_cpumask, cpu_online_mask) {
struct work_struct *work = per_cpu_ptr(works, cpu);
INIT_WORK(work, func);
schedule_work_on(cpu, work);
}
- for_each_online_cpu(cpu)
+ for_each_cpu_and(cpu, sched_cpumask, cpu_online_mask)
flush_work(per_cpu_ptr(works, cpu));
cpus_read_unlock();
free_percpu(works);
- return 0;
+
+free_cpumask:
+ free_cpumask_var(sched_cpumask);
+ return ret;
}
/**
--
2.17.1
Hello,
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 33f1106b4f99..a3f53f859e9d 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -3326,28 +3326,38 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
> */
> int schedule_on_each_cpu(work_func_t func)
> {
> - int cpu;
> struct work_struct __percpu *works;
> + cpumask_var_t sched_cpumask;
> + int cpu, ret = 0;
>
> - works = alloc_percpu(struct work_struct);
> - if (!works)
> + if (!alloc_cpumask_var(&sched_cpumask, GFP_KERNEL))
> return -ENOMEM;
>
> + works = alloc_percpu(struct work_struct);
> + if (!works) {
> + ret = -ENOMEM;
> + goto free_cpumask;
> + }
> +
> cpus_read_lock();
>
> - for_each_online_cpu(cpu) {
> + cpumask_copy(sched_cpumask, cpu_online_mask);
> + for_each_cpu_and(cpu, sched_cpumask, cpu_online_mask) {
This definitely would need a comment explaining what's going on cuz it looks
weird to be copying the cpumask which is supposed to stay stable due to the
cpus_read_lock(). Given that it can only happen during early boot and the
online cpus can only be expanding, maybe just add sth like:
if (early_during_boot) {
for_each_possible_cpu(cpu)
INIT_WORK(per_cpu_ptr(works, cpu), func);
}
BTW, who's calling schedule_on_each_cpu() that early during boot. It makes
no sense to do this while the cpumasks can't be stabilized.
Thanks.
--
tejun
On Mon, Feb 14, 2022 at 09:43:52AM -1000, Tejun Heo wrote:
> Hello,
>
> > diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> > index 33f1106b4f99..a3f53f859e9d 100644
> > --- a/kernel/workqueue.c
> > +++ b/kernel/workqueue.c
> > @@ -3326,28 +3326,38 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
> > */
> > int schedule_on_each_cpu(work_func_t func)
> > {
> > - int cpu;
> > struct work_struct __percpu *works;
> > + cpumask_var_t sched_cpumask;
> > + int cpu, ret = 0;
> >
> > - works = alloc_percpu(struct work_struct);
> > - if (!works)
> > + if (!alloc_cpumask_var(&sched_cpumask, GFP_KERNEL))
> > return -ENOMEM;
> >
> > + works = alloc_percpu(struct work_struct);
> > + if (!works) {
> > + ret = -ENOMEM;
> > + goto free_cpumask;
> > + }
> > +
> > cpus_read_lock();
> >
> > - for_each_online_cpu(cpu) {
> > + cpumask_copy(sched_cpumask, cpu_online_mask);
> > + for_each_cpu_and(cpu, sched_cpumask, cpu_online_mask) {
>
> This definitely would need a comment explaining what's going on cuz it looks
> weird to be copying the cpumask which is supposed to stay stable due to the
> cpus_read_lock().Given that it can only happen during early boot and the
> online cpus can only be expanding, maybe just add sth like:
>
> if (early_during_boot) {
> for_each_possible_cpu(cpu)
> INIT_WORK(per_cpu_ptr(works, cpu), func);
> }
>
Thanks tejun for the reply and suggestions.
Yes, unfortunately cpus_read_lock not keeping cpumask stable at
secondary boot. Not sure, may be it only gurantee 'cpu' dont go down
under cpus_read_[lock/unlock].
As suggested will tryout something like:
if (system_state != RUNNING) {
:
}
> BTW, who's calling schedule_on_each_cpu() that early during boot. It makes
> no sense to do this while the cpumasks can't be stabilized.
>
It is implemenation of CONFIG_TASKS_RUDE_RCU.
> Thanks.
>
> --
> tejun
On Wed, Feb 16, 2022 at 07:49:39PM +0100, Padmanabha Srinivasaiah wrote:
> On Mon, Feb 14, 2022 at 09:43:52AM -1000, Tejun Heo wrote:
> > Hello,
> >
> > > diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> > > index 33f1106b4f99..a3f53f859e9d 100644
> > > --- a/kernel/workqueue.c
> > > +++ b/kernel/workqueue.c
> > > @@ -3326,28 +3326,38 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
> > > */
> > > int schedule_on_each_cpu(work_func_t func)
> > > {
> > > - int cpu;
> > > struct work_struct __percpu *works;
> > > + cpumask_var_t sched_cpumask;
> > > + int cpu, ret = 0;
> > >
> > > - works = alloc_percpu(struct work_struct);
> > > - if (!works)
> > > + if (!alloc_cpumask_var(&sched_cpumask, GFP_KERNEL))
> > > return -ENOMEM;
> > >
> > > + works = alloc_percpu(struct work_struct);
> > > + if (!works) {
> > > + ret = -ENOMEM;
> > > + goto free_cpumask;
> > > + }
> > > +
> > > cpus_read_lock();
> > >
> > > - for_each_online_cpu(cpu) {
> > > + cpumask_copy(sched_cpumask, cpu_online_mask);
> > > + for_each_cpu_and(cpu, sched_cpumask, cpu_online_mask) {
> >
> > This definitely would need a comment explaining what's going on cuz it looks
> > weird to be copying the cpumask which is supposed to stay stable due to the
> > cpus_read_lock().Given that it can only happen during early boot and the
> > online cpus can only be expanding, maybe just add sth like:
> >
> > if (early_during_boot) {
> > for_each_possible_cpu(cpu)
> > INIT_WORK(per_cpu_ptr(works, cpu), func);
> > }
> >
>
> Thanks tejun for the reply and suggestions.
>
> Yes, unfortunately cpus_read_lock not keeping cpumask stable at
> secondary boot. Not sure, may be it only gurantee 'cpu' dont go down
> under cpus_read_[lock/unlock].
>
> As suggested will tryout something like:
> if (system_state != RUNNING) {
> :
> }
> > BTW, who's calling schedule_on_each_cpu() that early during boot. It makes
> > no sense to do this while the cpumasks can't be stabilized.
> >
> It is implemenation of CONFIG_TASKS_RUDE_RCU.
Another option would be to adjust CONFIG_TASKS_RUDE_RCU based on where
things are in the boot process. For example:
// Wait for one rude RCU-tasks grace period.
static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
{
if (num_online_cpus() <= 1)
return; // Fastpath for only one CPU.
rtp->n_ipis += cpumask_weight(cpu_online_mask);
schedule_on_each_cpu(rcu_tasks_be_rude);
}
Easy enough either way!
Thanx, Paul
On Wed, Feb 16, 2022 at 11:07:00AM -0800, Paul E. McKenney wrote:
> On Wed, Feb 16, 2022 at 07:49:39PM +0100, Padmanabha Srinivasaiah wrote:
> > On Mon, Feb 14, 2022 at 09:43:52AM -1000, Tejun Heo wrote:
> > > Hello,
> > >
> > > > diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> > > > index 33f1106b4f99..a3f53f859e9d 100644
> > > > --- a/kernel/workqueue.c
> > > > +++ b/kernel/workqueue.c
> > > > @@ -3326,28 +3326,38 @@ EXPORT_SYMBOL(cancel_delayed_work_sync);
> > > > */
> > > > int schedule_on_each_cpu(work_func_t func)
> > > > {
> > > > - int cpu;
> > > > struct work_struct __percpu *works;
> > > > + cpumask_var_t sched_cpumask;
> > > > + int cpu, ret = 0;
> > > >
> > > > - works = alloc_percpu(struct work_struct);
> > > > - if (!works)
> > > > + if (!alloc_cpumask_var(&sched_cpumask, GFP_KERNEL))
> > > > return -ENOMEM;
> > > >
> > > > + works = alloc_percpu(struct work_struct);
> > > > + if (!works) {
> > > > + ret = -ENOMEM;
> > > > + goto free_cpumask;
> > > > + }
> > > > +
> > > > cpus_read_lock();
> > > >
> > > > - for_each_online_cpu(cpu) {
> > > > + cpumask_copy(sched_cpumask, cpu_online_mask);
> > > > + for_each_cpu_and(cpu, sched_cpumask, cpu_online_mask) {
> > >
> > > This definitely would need a comment explaining what's going on cuz it looks
> > > weird to be copying the cpumask which is supposed to stay stable due to the
> > > cpus_read_lock().Given that it can only happen during early boot and the
> > > online cpus can only be expanding, maybe just add sth like:
> > >
> > > if (early_during_boot) {
> > > for_each_possible_cpu(cpu)
> > > INIT_WORK(per_cpu_ptr(works, cpu), func);
> > > }
> > >
> >
> > Thanks tejun for the reply and suggestions.
> >
> > Yes, unfortunately cpus_read_lock not keeping cpumask stable at
> > secondary boot. Not sure, may be it only gurantee 'cpu' dont go down
> > under cpus_read_[lock/unlock].
> >
> > As suggested will tryout something like:
> > if (system_state != RUNNING) {
> > :
> > }
> > > BTW, who's calling schedule_on_each_cpu() that early during boot. It makes
> > > no sense to do this while the cpumasks can't be stabilized.
> > >
> > It is implemenation of CONFIG_TASKS_RUDE_RCU.
>
> Another option would be to adjust CONFIG_TASKS_RUDE_RCU based on where
> things are in the boot process. For example:
>
> // Wait for one rude RCU-tasks grace period.
> static void rcu_tasks_rude_wait_gp(struct rcu_tasks *rtp)
> {
> if (num_online_cpus() <= 1)
> return; // Fastpath for only one CPU.
> rtp->n_ipis += cpumask_weight(cpu_online_mask);
> schedule_on_each_cpu(rcu_tasks_be_rude);
> }
>
> Easy enough either way!
>
> Thanx, Paul
Thank you Paul for suggestion, tried same and it fixes the issue.
Have submitted same as suggested-by Paul.
Link :
https://lore.kernel.org/all/[email protected]/T/#t