For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario
can result system oops.
CPU1 CPU2
rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
raw_spin_lock_rcu_node
np = rcu_next_node_entry(t, rnp)
if (&t->rcu_node_entry == rnp->exp_tasks)
WRITE_ONCE(rnp->exp_tasks, np)
....
raw_spin_unlock_irqrestore_rcu_node
raw_spin_lock_irqsave_rcu_node
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry)
(if rnp->exp_tasks is NULL
will trigger oops)
This problem is that CPU2 accesses rcu_node structure's->exp_tasks
without holding the rcu_node structure's ->lock and CPU2 did not
observe CPU1's change to rcu_node structure's->exp_tasks in time,
if rcu_node structure's->exp_tasks is set null pointer by CPU1, after
that CPU2 accesses members of rcu_node structure's->exp_tasks will
trigger oops.
This commit therefore allows rcu_node structure's->exp_tasks to be
accessed while holding rcu_node structure's ->lock.
Signed-off-by: Zqiang <[email protected]>
---
kernel/rcu/tree_exp.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 7cc4856da081..902e7c8709c7 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -803,9 +803,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
int ndetected = 0;
struct task_struct *t;
- if (!READ_ONCE(rnp->exp_tasks))
- return 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (!READ_ONCE(rnp->exp_tasks)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return 0;
+ }
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
--
2.25.1
>For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario
>can result system oops.
>
> CPU1 CPU2
>rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> raw_spin_lock_rcu_node
> np = rcu_next_node_entry(t, rnp)
> if (&t->rcu_node_entry == rnp->exp_tasks)
> WRITE_ONCE(rnp->exp_tasks, np)
> ....
> raw_spin_unlock_irqrestore_rcu_node
> raw_spin_lock_irqsave_rcu_node
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry)
> (if rnp->exp_tasks is NULL
> will trigger oops)
>
>This problem is that CPU2 accesses rcu_node structure's->exp_tasks
>without holding the rcu_node structure's ->lock and CPU2 did not
>observe CPU1's change to rcu_node structure's->exp_tasks in time,
>if rcu_node structure's->exp_tasks is set null pointer by CPU1, after
>that CPU2 accesses members of rcu_node structure's->exp_tasks will
>trigger oops.
>
>This commit therefore allows rcu_node structure's->exp_tasks to be
>accessed while holding rcu_node structure's ->lock.
>
>Signed-off-by: Zqiang <[email protected]>
>
Hi Paul, some suggestions for this modification?
Thanks
Zqiang
>---
> kernel/rcu/tree_exp.h | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
>diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
>index 7cc4856da081..902e7c8709c7 100644
>--- a/kernel/rcu/tree_exp.h
>+++ b/kernel/rcu/tree_exp.h
>@@ -803,9 +803,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
> int ndetected = 0;
> struct task_struct *t;
>
>- if (!READ_ONCE(rnp->exp_tasks))
>- return 0;
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
>+ if (!READ_ONCE(rnp->exp_tasks)) {
>+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>+ return 0;
>+ }
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry);
> list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
>--
>2.25.1
>
On Sat, Dec 24, 2022 at 01:25:53PM +0800, Zqiang wrote:
> For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario
> can result system oops.
>
> CPU1 CPU2
> rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> raw_spin_lock_rcu_node
> np = rcu_next_node_entry(t, rnp)
> if (&t->rcu_node_entry == rnp->exp_tasks)
> WRITE_ONCE(rnp->exp_tasks, np)
> ....
> raw_spin_unlock_irqrestore_rcu_node
> raw_spin_lock_irqsave_rcu_node
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry)
> (if rnp->exp_tasks is NULL
> will trigger oops)
>
> This problem is that CPU2 accesses rcu_node structure's->exp_tasks
> without holding the rcu_node structure's ->lock and CPU2 did not
> observe CPU1's change to rcu_node structure's->exp_tasks in time,
> if rcu_node structure's->exp_tasks is set null pointer by CPU1, after
> that CPU2 accesses members of rcu_node structure's->exp_tasks will
> trigger oops.
>
> This commit therefore allows rcu_node structure's->exp_tasks to be
> accessed while holding rcu_node structure's ->lock.
>
> Signed-off-by: Zqiang <[email protected]>
Apologies for the delay and thank you for the reminder!
Please check the wordsmithed version below, which I have queued.
Thanx, Paul
------------------------------------------------------------------------
commit 389b0eafd72829fd63548f7ff4e8d6ac90fa1f98
Author: Zqiang <[email protected]>
Date: Sat Dec 24 13:25:53 2022 +0800
rcu: Protect rcu_print_task_exp_stall() ->exp_tasks access
For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario can
result in a NULL-pointer dereference:
CPU1 CPU2
rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
raw_spin_lock_rcu_node
np = rcu_next_node_entry(t, rnp)
if (&t->rcu_node_entry == rnp->exp_tasks)
WRITE_ONCE(rnp->exp_tasks, np)
....
raw_spin_unlock_irqrestore_rcu_node
raw_spin_lock_irqsave_rcu_node
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry)
(if rnp->exp_tasks is NULL, this
will dereference a NULL pointer)
The problem is that CPU2 accesses the rcu_node structure's->exp_tasks
field without holding the rcu_node structure's ->lock and CPU2 did
not observe CPU1's change to rcu_node structure's ->exp_tasks in time.
Therefore, if CPU1 sets rcu_node structure's->exp_tasks pointer to NULL,
then CPU2 might dereference that NULL pointer.
This commit therefore holds the rcu_node structure's ->lock while
accessing that structure's->exp_tasks field.
Signed-off-by: Zqiang <[email protected]>
Signed-off-by: Paul E. McKenney <[email protected]>
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 7cc4856da0817..902e7c8709c7e 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -803,9 +803,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
int ndetected = 0;
struct task_struct *t;
- if (!READ_ONCE(rnp->exp_tasks))
- return 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
+ if (!READ_ONCE(rnp->exp_tasks)) {
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ return 0;
+ }
t = list_entry(rnp->exp_tasks->prev,
struct task_struct, rcu_node_entry);
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
> For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario
> can result system oops.
>
> CPU1 CPU2
> rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> raw_spin_lock_rcu_node
> np = rcu_next_node_entry(t, rnp)
> if (&t->rcu_node_entry == rnp->exp_tasks)
> WRITE_ONCE(rnp->exp_tasks, np)
> ....
> raw_spin_unlock_irqrestore_rcu_node
> raw_spin_lock_irqsave_rcu_node
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry)
> (if rnp->exp_tasks is NULL
> will trigger oops)
>
> This problem is that CPU2 accesses rcu_node structure's->exp_tasks
> without holding the rcu_node structure's ->lock and CPU2 did not
> observe CPU1's change to rcu_node structure's->exp_tasks in time,
> if rcu_node structure's->exp_tasks is set null pointer by CPU1, after
> that CPU2 accesses members of rcu_node structure's->exp_tasks will
> trigger oops.
>
> This commit therefore allows rcu_node structure's->exp_tasks to be
> accessed while holding rcu_node structure's ->lock.
>
> Signed-off-by: Zqiang <[email protected]>
>
>Apologies for the delay and thank you for the reminder!
>
>Please check the wordsmithed version below, which I have queued.
Thanks for wordsmithed, this description is more clear.
Thanks
Zqiang
>
> Thanx, Paul
>
>------------------------------------------------------------------------
>
>commit 389b0eafd72829fd63548f7ff4e8d6ac90fa1f98
>Author: Zqiang <[email protected]>
>Date: Sat Dec 24 13:25:53 2022 +0800
>
> rcu: Protect rcu_print_task_exp_stall() ->exp_tasks access
>
> For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario can
> result in a NULL-pointer dereference:
>
> CPU1 CPU2
> rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> raw_spin_lock_rcu_node
> np = rcu_next_node_entry(t, rnp)
> if (&t->rcu_node_entry == rnp->exp_tasks)
> WRITE_ONCE(rnp->exp_tasks, np)
> ....
> raw_spin_unlock_irqrestore_rcu_node
> raw_spin_lock_irqsave_rcu_node
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry)
> (if rnp->exp_tasks is NULL, this
> will dereference a NULL pointer)
>
> The problem is that CPU2 accesses the rcu_node structure's->exp_tasks
> field without holding the rcu_node structure's ->lock and CPU2 did
> not observe CPU1's change to rcu_node structure's ->exp_tasks in time.
> Therefore, if CPU1 sets rcu_node structure's->exp_tasks pointer to NULL,
> then CPU2 might dereference that NULL pointer.
>
> This commit therefore holds the rcu_node structure's ->lock while
> accessing that structure's->exp_tasks field.
>
> Signed-off-by: Zqiang <[email protected]>
> Signed-off-by: Paul E. McKenney <[email protected]>
>
>diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
>index 7cc4856da0817..902e7c8709c7e 100644
>--- a/kernel/rcu/tree_exp.h
>+++ b/kernel/rcu/tree_exp.h
>@@ -803,9 +803,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
> int ndetected = 0;
> struct task_struct *t;
>
>- if (!READ_ONCE(rnp->exp_tasks))
>- return 0;
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
>+ if (!READ_ONCE(rnp->exp_tasks)) {
>+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>+ return 0;
>+ }
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry);
> list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
On Thu, Jan 05, 2023 at 07:41:46PM -0800, Paul E. McKenney wrote:
> On Sat, Dec 24, 2022 at 01:25:53PM +0800, Zqiang wrote:
> > For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario
> > can result system oops.
> >
> > CPU1 CPU2
> > rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> > if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> > raw_spin_lock_rcu_node
> > np = rcu_next_node_entry(t, rnp)
> > if (&t->rcu_node_entry == rnp->exp_tasks)
> > WRITE_ONCE(rnp->exp_tasks, np)
> > ....
> > raw_spin_unlock_irqrestore_rcu_node
> > raw_spin_lock_irqsave_rcu_node
> > t = list_entry(rnp->exp_tasks->prev,
> > struct task_struct, rcu_node_entry)
> > (if rnp->exp_tasks is NULL
> > will trigger oops)
> >
> > This problem is that CPU2 accesses rcu_node structure's->exp_tasks
> > without holding the rcu_node structure's ->lock and CPU2 did not
> > observe CPU1's change to rcu_node structure's->exp_tasks in time,
> > if rcu_node structure's->exp_tasks is set null pointer by CPU1, after
> > that CPU2 accesses members of rcu_node structure's->exp_tasks will
> > trigger oops.
> >
> > This commit therefore allows rcu_node structure's->exp_tasks to be
> > accessed while holding rcu_node structure's ->lock.
> >
> > Signed-off-by: Zqiang <[email protected]>
>
> Apologies for the delay and thank you for the reminder!
>
> Please check the wordsmithed version below, which I have queued.
>
> Thanx, Paul
>
> ------------------------------------------------------------------------
>
> commit 389b0eafd72829fd63548f7ff4e8d6ac90fa1f98
> Author: Zqiang <[email protected]>
> Date: Sat Dec 24 13:25:53 2022 +0800
>
> rcu: Protect rcu_print_task_exp_stall() ->exp_tasks access
>
> For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario can
> result in a NULL-pointer dereference:
>
> CPU1 CPU2
> rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> raw_spin_lock_rcu_node
> np = rcu_next_node_entry(t, rnp)
> if (&t->rcu_node_entry == rnp->exp_tasks)
> WRITE_ONCE(rnp->exp_tasks, np)
> ....
> raw_spin_unlock_irqrestore_rcu_node
> raw_spin_lock_irqsave_rcu_node
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry)
> (if rnp->exp_tasks is NULL, this
> will dereference a NULL pointer)
>
> The problem is that CPU2 accesses the rcu_node structure's->exp_tasks
> field without holding the rcu_node structure's ->lock and CPU2 did
> not observe CPU1's change to rcu_node structure's ->exp_tasks in time.
> Therefore, if CPU1 sets rcu_node structure's->exp_tasks pointer to NULL,
> then CPU2 might dereference that NULL pointer.
>
> This commit therefore holds the rcu_node structure's ->lock while
> accessing that structure's->exp_tasks field.
>
> Signed-off-by: Zqiang <[email protected]>
> Signed-off-by: Paul E. McKenney <[email protected]>
>
> diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
> index 7cc4856da0817..902e7c8709c7e 100644
> --- a/kernel/rcu/tree_exp.h
> +++ b/kernel/rcu/tree_exp.h
> @@ -803,9 +803,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
> int ndetected = 0;
> struct task_struct *t;
>
> - if (!READ_ONCE(rnp->exp_tasks))
> - return 0;
> raw_spin_lock_irqsave_rcu_node(rnp, flags);
> + if (!READ_ONCE(rnp->exp_tasks)) {
Does it have to be READ_ONCE then?
Thanks.
> + raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> + return 0;
> + }
> t = list_entry(rnp->exp_tasks->prev,
> struct task_struct, rcu_node_entry);
> list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
On Mon, Jan 09, 2023 at 02:21:01PM +0100, Frederic Weisbecker wrote:
> On Thu, Jan 05, 2023 at 07:41:46PM -0800, Paul E. McKenney wrote:
> > On Sat, Dec 24, 2022 at 01:25:53PM +0800, Zqiang wrote:
> > > For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario
> > > can result system oops.
> > >
> > > CPU1 CPU2
> > > rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> > > if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> > > raw_spin_lock_rcu_node
> > > np = rcu_next_node_entry(t, rnp)
> > > if (&t->rcu_node_entry == rnp->exp_tasks)
> > > WRITE_ONCE(rnp->exp_tasks, np)
> > > ....
> > > raw_spin_unlock_irqrestore_rcu_node
> > > raw_spin_lock_irqsave_rcu_node
> > > t = list_entry(rnp->exp_tasks->prev,
> > > struct task_struct, rcu_node_entry)
> > > (if rnp->exp_tasks is NULL
> > > will trigger oops)
> > >
> > > This problem is that CPU2 accesses rcu_node structure's->exp_tasks
> > > without holding the rcu_node structure's ->lock and CPU2 did not
> > > observe CPU1's change to rcu_node structure's->exp_tasks in time,
> > > if rcu_node structure's->exp_tasks is set null pointer by CPU1, after
> > > that CPU2 accesses members of rcu_node structure's->exp_tasks will
> > > trigger oops.
> > >
> > > This commit therefore allows rcu_node structure's->exp_tasks to be
> > > accessed while holding rcu_node structure's ->lock.
> > >
> > > Signed-off-by: Zqiang <[email protected]>
> >
> > Apologies for the delay and thank you for the reminder!
> >
> > Please check the wordsmithed version below, which I have queued.
> >
> > Thanx, Paul
> >
> > ------------------------------------------------------------------------
> >
> > commit 389b0eafd72829fd63548f7ff4e8d6ac90fa1f98
> > Author: Zqiang <[email protected]>
> > Date: Sat Dec 24 13:25:53 2022 +0800
> >
> > rcu: Protect rcu_print_task_exp_stall() ->exp_tasks access
> >
> > For kernels built with CONFIG_PREEMPT_RCU=y, the following scenario can
> > result in a NULL-pointer dereference:
> >
> > CPU1 CPU2
> > rcu_preempt_deferred_qs_irqrestore rcu_print_task_exp_stall
> > if (special.b.blocked) READ_ONCE(rnp->exp_tasks) != NULL
> > raw_spin_lock_rcu_node
> > np = rcu_next_node_entry(t, rnp)
> > if (&t->rcu_node_entry == rnp->exp_tasks)
> > WRITE_ONCE(rnp->exp_tasks, np)
> > ....
> > raw_spin_unlock_irqrestore_rcu_node
> > raw_spin_lock_irqsave_rcu_node
> > t = list_entry(rnp->exp_tasks->prev,
> > struct task_struct, rcu_node_entry)
> > (if rnp->exp_tasks is NULL, this
> > will dereference a NULL pointer)
> >
> > The problem is that CPU2 accesses the rcu_node structure's->exp_tasks
> > field without holding the rcu_node structure's ->lock and CPU2 did
> > not observe CPU1's change to rcu_node structure's ->exp_tasks in time.
> > Therefore, if CPU1 sets rcu_node structure's->exp_tasks pointer to NULL,
> > then CPU2 might dereference that NULL pointer.
> >
> > This commit therefore holds the rcu_node structure's ->lock while
> > accessing that structure's->exp_tasks field.
> >
> > Signed-off-by: Zqiang <[email protected]>
> > Signed-off-by: Paul E. McKenney <[email protected]>
> >
> > diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
> > index 7cc4856da0817..902e7c8709c7e 100644
> > --- a/kernel/rcu/tree_exp.h
> > +++ b/kernel/rcu/tree_exp.h
> > @@ -803,9 +803,11 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
> > int ndetected = 0;
> > struct task_struct *t;
> >
> > - if (!READ_ONCE(rnp->exp_tasks))
> > - return 0;
> > raw_spin_lock_irqsave_rcu_node(rnp, flags);
> > + if (!READ_ONCE(rnp->exp_tasks)) {
>
> Does it have to be READ_ONCE then?
Good point, that should not be necessary. I will drop the READ_ONCE on
my next rebase. (Unless someone tells me there is something subtle that
I am missing.)
Thanx, Paul
> Thanks.
>
> > + raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> > + return 0;
> > + }
> > t = list_entry(rnp->exp_tasks->prev,
> > struct task_struct, rcu_node_entry);
> > list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {