On RCU tasks trace stall, inspect the RCU-tasks-trace specific
states of stalled task in locked down state, using try_invoke_
on_locked_down_task(), to get reliable trc state of a non-running
stalled task.
Signed-off-by: Neeraj Upadhyay <[email protected]>
---
kernel/rcu/tasks.h | 43 ++++++++++++++++++++++++++++++++++---------
1 file changed, 34 insertions(+), 9 deletions(-)
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index e4a32db9f712..4e49b847971b 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1073,25 +1073,50 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
// Any tasks that exit after this point will set ->trc_reader_checked.
}
+/* Communicate task state back to the RCU tasks trace stall warning request. */
+struct trc_stall_chk_rdr {
+ int nesting;
+ int ipi_to_cpu;
+ u8 needqs;
+};
+
+static bool trc_check_slow_task(struct task_struct *t, void *arg)
+{
+ struct trc_stall_chk_rdr *trc_rdrp = arg;
+
+ if (task_curr(t))
+ return false; // It is running, so decline to inspect it.
+ trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting);
+ trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu);
+ trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs);
+ return true;
+}
+
/* Show the state of a task stalling the current RCU tasks trace GP. */
static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
{
int cpu;
+ struct trc_stall_chk_rdr trc_rdr;
+ bool is_idle_tsk = is_idle_task(t);
if (*firstreport) {
pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
*firstreport = false;
}
- // FIXME: This should attempt to use try_invoke_on_nonrunning_task().
cpu = task_cpu(t);
- pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
- t->pid,
- ".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0],
- ".i"[is_idle_task(t)],
- ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
- READ_ONCE(t->trc_reader_nesting),
- " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
- cpu);
+ if (!try_invoke_on_locked_down_task(t, trc_check_slow_task, &trc_rdr))
+ pr_alert("P%d: %c\n",
+ t->pid,
+ ".i"[is_idle_tsk]);
+ else
+ pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
+ t->pid,
+ ".I"[trc_rdr.ipi_to_cpu >= 0],
+ ".i"[is_idle_tsk],
+ ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
+ trc_rdr.nesting,
+ " N"[!!trc_rdr.needqs],
+ cpu);
sched_show_task(t);
}
--
2.17.1
On Tue, Nov 09, 2021 at 04:52:14PM +0530, Neeraj Upadhyay wrote:
> On RCU tasks trace stall, inspect the RCU-tasks-trace specific
> states of stalled task in locked down state, using try_invoke_
> on_locked_down_task(), to get reliable trc state of a non-running
> stalled task.
>
> Signed-off-by: Neeraj Upadhyay <[email protected]>
Queued for further review and testing, thank you!
Along those lines, what did you do to test this? I would like to
add that to the commit message. (The usual approach is to use the
rcutorture.stall_cpu module parameter, in case I have not yet passed
that along.)
Thanx, Paul
> ---
> kernel/rcu/tasks.h | 43 ++++++++++++++++++++++++++++++++++---------
> 1 file changed, 34 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> index e4a32db9f712..4e49b847971b 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -1073,25 +1073,50 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
> // Any tasks that exit after this point will set ->trc_reader_checked.
> }
>
> +/* Communicate task state back to the RCU tasks trace stall warning request. */
> +struct trc_stall_chk_rdr {
> + int nesting;
> + int ipi_to_cpu;
> + u8 needqs;
> +};
> +
> +static bool trc_check_slow_task(struct task_struct *t, void *arg)
> +{
> + struct trc_stall_chk_rdr *trc_rdrp = arg;
> +
> + if (task_curr(t))
> + return false; // It is running, so decline to inspect it.
> + trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting);
> + trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu);
> + trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs);
> + return true;
> +}
> +
> /* Show the state of a task stalling the current RCU tasks trace GP. */
> static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
> {
> int cpu;
> + struct trc_stall_chk_rdr trc_rdr;
> + bool is_idle_tsk = is_idle_task(t);
>
> if (*firstreport) {
> pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
> *firstreport = false;
> }
> - // FIXME: This should attempt to use try_invoke_on_nonrunning_task().
> cpu = task_cpu(t);
> - pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
> - t->pid,
> - ".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0],
> - ".i"[is_idle_task(t)],
> - ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
> - READ_ONCE(t->trc_reader_nesting),
> - " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
> - cpu);
> + if (!try_invoke_on_locked_down_task(t, trc_check_slow_task, &trc_rdr))
> + pr_alert("P%d: %c\n",
> + t->pid,
> + ".i"[is_idle_tsk]);
> + else
> + pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
> + t->pid,
> + ".I"[trc_rdr.ipi_to_cpu >= 0],
> + ".i"[is_idle_tsk],
> + ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
> + trc_rdr.nesting,
> + " N"[!!trc_rdr.needqs],
> + cpu);
> sched_show_task(t);
> }
>
> --
> 2.17.1
>
Hi Paul,
On 11/9/2021 8:22 PM, Paul E. McKenney wrote:
> On Tue, Nov 09, 2021 at 04:52:14PM +0530, Neeraj Upadhyay wrote:
>> On RCU tasks trace stall, inspect the RCU-tasks-trace specific
>> states of stalled task in locked down state, using try_invoke_
>> on_locked_down_task(), to get reliable trc state of a non-running
>> stalled task.
>>
>> Signed-off-by: Neeraj Upadhyay <[email protected]>
>
> Queued for further review and testing, thank you!
>
> Along those lines, what did you do to test this? I would like to
> add that to the commit message. (The usual approach is to use the
> rcutorture.stall_cpu module parameter, in case I have not yet passed
> that along.)
>
Before sending the patch, I had tested with TRACE01 and adding long
delay before rcu_read_unlock_trace() in tasks_tracing_torture_read_unlock()
static void tasks_tracing_torture_read_unlock(int idx)
{
rcu_read_unlock_trace();
}
I retested with below commandline (without adding delay in
tasks_tracing_torture_read_unlock()):
tools/testing/selftests/rcutorture/bin/kvm.sh --cpus 8 --configs TRACE01
--bootargs "rcutorture.torture_type=tasks-tracing
rcutorture.stall_cpu=10 rcutorture.stall_cpu_block=1
rcupdate.rcu_task_stall_timeout=100" --trust-make
With this, I can see stalls for both RUNNING and WAITING state task:
[ 21.520291] INFO: rcu_tasks_trace detected stalls on tasks:
[ 21.521292] P85: ... nesting: 1N cpu: 2
[ 21.521966] task:rcu_torture_sta state:D stack:15080 pid: 85 ppid:
2 flags:0x00004000
[ 21.523384] Call Trace:
[ 21.523808] __schedule+0x273/0x6e0
[ 21.524428] schedule+0x35/0xa0
[ 21.524971] schedule_timeout+0x1ed/0x270
[ 21.525690] ? del_timer_sync+0x30/0x30
[ 21.526371] ? rcu_torture_writer+0x720/0x720
[ 21.527106] rcu_torture_stall+0x24a/0x270
[ 21.527816] kthread+0x115/0x140
[ 21.528401] ? set_kthread_struct+0x40/0x40
[ 21.529136] ret_from_fork+0x22/0x30
[ 21.529766] 1 holdouts
[ 21.632300] INFO: rcu_tasks_trace detected stalls on tasks:
[ 21.632345] rcu_torture_stall end.
[ 21.633293] P85: .
[ 21.633294] task:rcu_torture_sta state:R running task
stack:15080 pid: 85 ppid: 2 flags:0x00004000
[ 21.633299] Call Trace:
[ 21.633301] ? vprintk_emit+0xab/0x180
[ 21.633306] ? vprintk_emit+0x11a/0x180
[ 21.633308] ? _printk+0x4d/0x69
[ 21.633311] ? __default_send_IPI_shortcut+0x1f/0x40
Thanks
Neeraj
> Thanx, Paul
>
>> ---
>> kernel/rcu/tasks.h | 43 ++++++++++++++++++++++++++++++++++---------
>> 1 file changed, 34 insertions(+), 9 deletions(-)
>>
>> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
>> index e4a32db9f712..4e49b847971b 100644
>> --- a/kernel/rcu/tasks.h
>> +++ b/kernel/rcu/tasks.h
>> @@ -1073,25 +1073,50 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
>> // Any tasks that exit after this point will set ->trc_reader_checked.
>> }
>>
>> +/* Communicate task state back to the RCU tasks trace stall warning request. */
>> +struct trc_stall_chk_rdr {
>> + int nesting;
>> + int ipi_to_cpu;
>> + u8 needqs;
>> +};
>> +
>> +static bool trc_check_slow_task(struct task_struct *t, void *arg)
>> +{
>> + struct trc_stall_chk_rdr *trc_rdrp = arg;
>> +
>> + if (task_curr(t))
>> + return false; // It is running, so decline to inspect it.
>> + trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting);
>> + trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu);
>> + trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs);
>> + return true;
>> +}
>> +
>> /* Show the state of a task stalling the current RCU tasks trace GP. */
>> static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
>> {
>> int cpu;
>> + struct trc_stall_chk_rdr trc_rdr;
>> + bool is_idle_tsk = is_idle_task(t);
>>
>> if (*firstreport) {
>> pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
>> *firstreport = false;
>> }
>> - // FIXME: This should attempt to use try_invoke_on_nonrunning_task().
>> cpu = task_cpu(t);
>> - pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
>> - t->pid,
>> - ".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0],
>> - ".i"[is_idle_task(t)],
>> - ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
>> - READ_ONCE(t->trc_reader_nesting),
>> - " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
>> - cpu);
>> + if (!try_invoke_on_locked_down_task(t, trc_check_slow_task, &trc_rdr))
>> + pr_alert("P%d: %c\n",
>> + t->pid,
>> + ".i"[is_idle_tsk]);
>> + else
>> + pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
>> + t->pid,
>> + ".I"[trc_rdr.ipi_to_cpu >= 0],
>> + ".i"[is_idle_tsk],
>> + ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
>> + trc_rdr.nesting,
>> + " N"[!!trc_rdr.needqs],
>> + cpu);
>> sched_show_task(t);
>> }
>>
>> --
>> 2.17.1
>>
On Wed, Nov 10, 2021 at 10:10:27AM +0530, quic_neeraju wrote:
> Hi Paul,
>
>
> On 11/9/2021 8:22 PM, Paul E. McKenney wrote:
> > On Tue, Nov 09, 2021 at 04:52:14PM +0530, Neeraj Upadhyay wrote:
> > > On RCU tasks trace stall, inspect the RCU-tasks-trace specific
> > > states of stalled task in locked down state, using try_invoke_
> > > on_locked_down_task(), to get reliable trc state of a non-running
> > > stalled task.
> > >
> > > Signed-off-by: Neeraj Upadhyay <[email protected]>
> >
> > Queued for further review and testing, thank you!
> >
> > Along those lines, what did you do to test this? I would like to
> > add that to the commit message. (The usual approach is to use the
> > rcutorture.stall_cpu module parameter, in case I have not yet passed
> > that along.)
> >
>
> Before sending the patch, I had tested with TRACE01 and adding long delay
> before rcu_read_unlock_trace() in tasks_tracing_torture_read_unlock()
>
> static void tasks_tracing_torture_read_unlock(int idx)
> {
> rcu_read_unlock_trace();
> }
>
> I retested with below commandline (without adding delay in
> tasks_tracing_torture_read_unlock()):
>
> tools/testing/selftests/rcutorture/bin/kvm.sh --cpus 8 --configs TRACE01
> --bootargs "rcutorture.torture_type=tasks-tracing rcutorture.stall_cpu=10
> rcutorture.stall_cpu_block=1 rcupdate.rcu_task_stall_timeout=100"
> --trust-make
>
> With this, I can see stalls for both RUNNING and WAITING state task:
Applied, thank you!
Thanx, Paul
> [ 21.520291] INFO: rcu_tasks_trace detected stalls on tasks:
> [ 21.521292] P85: ... nesting: 1N cpu: 2
> [ 21.521966] task:rcu_torture_sta state:D stack:15080 pid: 85 ppid:
> 2 flags:0x00004000
> [ 21.523384] Call Trace:
> [ 21.523808] __schedule+0x273/0x6e0
> [ 21.524428] schedule+0x35/0xa0
> [ 21.524971] schedule_timeout+0x1ed/0x270
> [ 21.525690] ? del_timer_sync+0x30/0x30
> [ 21.526371] ? rcu_torture_writer+0x720/0x720
> [ 21.527106] rcu_torture_stall+0x24a/0x270
> [ 21.527816] kthread+0x115/0x140
> [ 21.528401] ? set_kthread_struct+0x40/0x40
> [ 21.529136] ret_from_fork+0x22/0x30
> [ 21.529766] 1 holdouts
> [ 21.632300] INFO: rcu_tasks_trace detected stalls on tasks:
> [ 21.632345] rcu_torture_stall end.
> [ 21.633293] P85: .
> [ 21.633294] task:rcu_torture_sta state:R running task stack:15080 pid:
> 85 ppid: 2 flags:0x00004000
> [ 21.633299] Call Trace:
> [ 21.633301] ? vprintk_emit+0xab/0x180
> [ 21.633306] ? vprintk_emit+0x11a/0x180
> [ 21.633308] ? _printk+0x4d/0x69
> [ 21.633311] ? __default_send_IPI_shortcut+0x1f/0x40
>
>
>
> Thanks
> Neeraj
>
> > Thanx, Paul
> >
> > > ---
> > > kernel/rcu/tasks.h | 43 ++++++++++++++++++++++++++++++++++---------
> > > 1 file changed, 34 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> > > index e4a32db9f712..4e49b847971b 100644
> > > --- a/kernel/rcu/tasks.h
> > > +++ b/kernel/rcu/tasks.h
> > > @@ -1073,25 +1073,50 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)
> > > // Any tasks that exit after this point will set ->trc_reader_checked.
> > > }
> > > +/* Communicate task state back to the RCU tasks trace stall warning request. */
> > > +struct trc_stall_chk_rdr {
> > > + int nesting;
> > > + int ipi_to_cpu;
> > > + u8 needqs;
> > > +};
> > > +
> > > +static bool trc_check_slow_task(struct task_struct *t, void *arg)
> > > +{
> > > + struct trc_stall_chk_rdr *trc_rdrp = arg;
> > > +
> > > + if (task_curr(t))
> > > + return false; // It is running, so decline to inspect it.
> > > + trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting);
> > > + trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu);
> > > + trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs);
> > > + return true;
> > > +}
> > > +
> > > /* Show the state of a task stalling the current RCU tasks trace GP. */
> > > static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)
> > > {
> > > int cpu;
> > > + struct trc_stall_chk_rdr trc_rdr;
> > > + bool is_idle_tsk = is_idle_task(t);
> > > if (*firstreport) {
> > > pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");
> > > *firstreport = false;
> > > }
> > > - // FIXME: This should attempt to use try_invoke_on_nonrunning_task().
> > > cpu = task_cpu(t);
> > > - pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
> > > - t->pid,
> > > - ".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0],
> > > - ".i"[is_idle_task(t)],
> > > - ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
> > > - READ_ONCE(t->trc_reader_nesting),
> > > - " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)],
> > > - cpu);
> > > + if (!try_invoke_on_locked_down_task(t, trc_check_slow_task, &trc_rdr))
> > > + pr_alert("P%d: %c\n",
> > > + t->pid,
> > > + ".i"[is_idle_tsk]);
> > > + else
> > > + pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n",
> > > + t->pid,
> > > + ".I"[trc_rdr.ipi_to_cpu >= 0],
> > > + ".i"[is_idle_tsk],
> > > + ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)],
> > > + trc_rdr.nesting,
> > > + " N"[!!trc_rdr.needqs],
> > > + cpu);
> > > sched_show_task(t);
> > > }
> > > --
> > > 2.17.1
> > >