2022-03-09 13:21:38

by Zqiang

[permalink] [raw]
Subject: [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads

When RCU_BOOST is enabled, the boost kthreads will boosting readers
who are blocking a given grace period, if the current reader tasks
have a higher priority than boost kthreads(the boost kthreads priority
not always 1, if the kthread_prio is set), boosting is useless, skip
current task and select next task to boosting, reduce the time for a
given grace period.

Signed-off-by: Zqiang <[email protected]>
---
v1->v2:
rename label 'end' to 'skip_boost'.
add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
do the similar thing as normal grace period.

kernel/rcu/tree.h | 2 ++
kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++++++--------
2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8d07bf92d29..862ca09b56c7 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -103,6 +103,8 @@ struct rcu_node {
/* queued on this rcu_node structure that */
/* are blocking the current grace period, */
/* there can be no such task. */
+ struct list_head *boost_exp_tasks;
+
struct rt_mutex boost_mtx;
/* Used only for the priority-boosting */
/* side effect, not as a lock. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c3d212bc5338..22bf5a8040f5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -12,6 +12,7 @@
*/

#include "../locking/rtmutex_common.h"
+#include <linux/sched/deadline.h>

static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
{
@@ -535,6 +536,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
if (&t->rcu_node_entry == rnp->boost_tasks)
WRITE_ONCE(rnp->boost_tasks, np);
+ if (&t->rcu_node_entry == rnp->boost_exp_tasks)
+ WRITE_ONCE(rnp->boost_exp_tasks, np);
}

/*
@@ -1022,7 +1025,7 @@ static int rcu_boost(struct rcu_node *rnp)
struct task_struct *t;
struct list_head *tb;

- if (READ_ONCE(rnp->exp_tasks) == NULL &&
+ if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
READ_ONCE(rnp->boost_tasks) == NULL)
return 0; /* Nothing left to boost. */

@@ -1032,7 +1035,7 @@ static int rcu_boost(struct rcu_node *rnp)
* Recheck under the lock: all tasks in need of boosting
* might exit their RCU read-side critical sections on their own.
*/
- if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
+ if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return 0;
}
@@ -1043,8 +1046,8 @@ static int rcu_boost(struct rcu_node *rnp)
* expedited grace period must boost all blocked tasks, including
* those blocking the pre-existing normal grace period.
*/
- if (rnp->exp_tasks != NULL)
- tb = rnp->exp_tasks;
+ if (rnp->boost_exp_tasks != NULL)
+ tb = rnp->boost_exp_tasks;
else
tb = rnp->boost_tasks;

@@ -1065,14 +1068,24 @@ static int rcu_boost(struct rcu_node *rnp)
* section.
*/
t = container_of(tb, struct task_struct, rcu_node_entry);
+ if (dl_task(t) || t->prio <= current->prio) {
+ tb = rcu_next_node_entry(t, rnp);
+ if (rnp->boost_exp_tasks)
+ WRITE_ONCE(rnp->boost_exp_tasks, tb);
+ else
+ WRITE_ONCE(rnp->boost_tasks, tb);
+ raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+ goto skip_boost;
+ }
+
rt_mutex_init_proxy_locked(&rnp->boost_mtx.rtmutex, t);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
/* Lock only for side effect: boosts task t's priority. */
rt_mutex_lock(&rnp->boost_mtx);
rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
rnp->n_boosts++;
-
- return READ_ONCE(rnp->exp_tasks) != NULL ||
+skip_boost:
+ return READ_ONCE(rnp->boost_exp_tasks) != NULL ||
READ_ONCE(rnp->boost_tasks) != NULL;
}

@@ -1090,7 +1103,7 @@ static int rcu_boost_kthread(void *arg)
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_WAITING);
trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
rcu_wait(READ_ONCE(rnp->boost_tasks) ||
- READ_ONCE(rnp->exp_tasks));
+ READ_ONCE(rnp->boost_exp_tasks));
trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
WRITE_ONCE(rnp->boost_kthread_status, RCU_KTHREAD_RUNNING);
more2boost = rcu_boost(rnp);
@@ -1129,13 +1142,15 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return;
}
- if (rnp->exp_tasks != NULL ||
+ if ((rnp->exp_tasks != NULL && rnp->boost_exp_tasks == NULL) ||
(rnp->gp_tasks != NULL &&
rnp->boost_tasks == NULL &&
rnp->qsmask == 0 &&
(!time_after(rnp->boost_time, jiffies) || rcu_state.cbovld))) {
if (rnp->exp_tasks == NULL)
WRITE_ONCE(rnp->boost_tasks, rnp->gp_tasks);
+ else
+ WRITE_ONCE(rnp->boost_exp_tasks, rnp->exp_tasks);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
rcu_wake_cond(rnp->boost_kthread_task,
READ_ONCE(rnp->boost_kthread_status));
--
2.25.1


2022-03-11 06:19:19

by Uladzislau Rezki

[permalink] [raw]
Subject: Re: [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads

> When RCU_BOOST is enabled, the boost kthreads will boosting readers
> who are blocking a given grace period, if the current reader tasks
> have a higher priority than boost kthreads(the boost kthreads priority
> not always 1, if the kthread_prio is set), boosting is useless, skip
> current task and select next task to boosting, reduce the time for a
> given grace period.
>
> Signed-off-by: Zqiang <[email protected]>
> ---
> v1->v2:
> rename label 'end' to 'skip_boost'.
> add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
> do the similar thing as normal grace period.
>
> kernel/rcu/tree.h | 2 ++
> kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++++++--------
> 2 files changed, 25 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index b8d07bf92d29..862ca09b56c7 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -103,6 +103,8 @@ struct rcu_node {
> /* queued on this rcu_node structure that */
> /* are blocking the current grace period, */
> /* there can be no such task. */
> + struct list_head *boost_exp_tasks;
> +
> struct rt_mutex boost_mtx;
> /* Used only for the priority-boosting */
> /* side effect, not as a lock. */
> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
> index c3d212bc5338..22bf5a8040f5 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -12,6 +12,7 @@
> */
>
> #include "../locking/rtmutex_common.h"
> +#include <linux/sched/deadline.h>
>
> static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
> {
> @@ -535,6 +536,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
> drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
> if (&t->rcu_node_entry == rnp->boost_tasks)
> WRITE_ONCE(rnp->boost_tasks, np);
> + if (&t->rcu_node_entry == rnp->boost_exp_tasks)
> + WRITE_ONCE(rnp->boost_exp_tasks, np);
> }
>
> /*
> @@ -1022,7 +1025,7 @@ static int rcu_boost(struct rcu_node *rnp)
> struct task_struct *t;
> struct list_head *tb;
>
> - if (READ_ONCE(rnp->exp_tasks) == NULL &&
> + if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
> READ_ONCE(rnp->boost_tasks) == NULL)
> return 0; /* Nothing left to boost. */
>
> @@ -1032,7 +1035,7 @@ static int rcu_boost(struct rcu_node *rnp)
> * Recheck under the lock: all tasks in need of boosting
> * might exit their RCU read-side critical sections on their own.
> */
> - if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
> + if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> return 0;
> }
> @@ -1043,8 +1046,8 @@ static int rcu_boost(struct rcu_node *rnp)
> * expedited grace period must boost all blocked tasks, including
> * those blocking the pre-existing normal grace period.
> */
> - if (rnp->exp_tasks != NULL)
> - tb = rnp->exp_tasks;
> + if (rnp->boost_exp_tasks != NULL)
> + tb = rnp->boost_exp_tasks;
> else
> tb = rnp->boost_tasks;
>
> @@ -1065,14 +1068,24 @@ static int rcu_boost(struct rcu_node *rnp)
> * section.
> */
> t = container_of(tb, struct task_struct, rcu_node_entry);
> + if (dl_task(t) || t->prio <= current->prio) {
This is a bit confusing to me. We know that "current" has SCHED_FIFO
class. In this scenario if "t->prio" has lower value(higher priority)
the task falls into all real-time categories anyway and is either:
- SCHED_FIFO
- SCHED_RR
- SCHED_DEADLINE

Checking whether the task is dl_task() sounds like unnecessary there.
Am i missing something? Could you please comment on it?

Thanks!

--
Vlad Rezki

2022-03-11 22:22:47

by Zqiang

[permalink] [raw]
Subject: RE: [PATCH v2] rcu: Only boost rcu reader tasks with lower priority than boost kthreads


> When RCU_BOOST is enabled, the boost kthreads will boosting readers
> who are blocking a given grace period, if the current reader tasks
> have a higher priority than boost kthreads(the boost kthreads priority
> not always 1, if the kthread_prio is set), boosting is useless, skip
> current task and select next task to boosting, reduce the time for a
> given grace period.
>
> Signed-off-by: Zqiang <[email protected]>
> ---
> v1->v2:
> rename label 'end' to 'skip_boost'.
> add 'boost_exp_tasks' pointer to point 'rnp->exp_tasks'
> do the similar thing as normal grace period.
>
> kernel/rcu/tree.h | 2 ++
> kernel/rcu/tree_plugin.h | 31 +++++++++++++++++++++++--------
> 2 files changed, 25 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index
> b8d07bf92d29..862ca09b56c7 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -103,6 +103,8 @@ struct rcu_node {
> /* queued on this rcu_node structure that */
> /* are blocking the current grace period, */
> /* there can be no such task. */
> + struct list_head *boost_exp_tasks;
> +
> struct rt_mutex boost_mtx;
> /* Used only for the priority-boosting */
> /* side effect, not as a lock. */ diff --git
> a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index
> c3d212bc5338..22bf5a8040f5 100644
> --- a/kernel/rcu/tree_plugin.h
> +++ b/kernel/rcu/tree_plugin.h
> @@ -12,6 +12,7 @@
> */
>
> #include "../locking/rtmutex_common.h"
> +#include <linux/sched/deadline.h>
>
> static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) { @@ -535,6
> +536,8 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
> drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx.rtmutex) == t;
> if (&t->rcu_node_entry == rnp->boost_tasks)
> WRITE_ONCE(rnp->boost_tasks, np);
> + if (&t->rcu_node_entry == rnp->boost_exp_tasks)
> + WRITE_ONCE(rnp->boost_exp_tasks, np);
> }
>
> /*
> @@ -1022,7 +1025,7 @@ static int rcu_boost(struct rcu_node *rnp)
> struct task_struct *t;
> struct list_head *tb;
>
> - if (READ_ONCE(rnp->exp_tasks) == NULL &&
> + if (READ_ONCE(rnp->boost_exp_tasks) == NULL &&
> READ_ONCE(rnp->boost_tasks) == NULL)
> return 0; /* Nothing left to boost. */
>
> @@ -1032,7 +1035,7 @@ static int rcu_boost(struct rcu_node *rnp)
> * Recheck under the lock: all tasks in need of boosting
> * might exit their RCU read-side critical sections on their own.
> */
> - if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
> + if (rnp->boost_exp_tasks == NULL && rnp->boost_tasks == NULL) {
> raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
> return 0;
> }
> @@ -1043,8 +1046,8 @@ static int rcu_boost(struct rcu_node *rnp)
> * expedited grace period must boost all blocked tasks, including
> * those blocking the pre-existing normal grace period.
> */
> - if (rnp->exp_tasks != NULL)
> - tb = rnp->exp_tasks;
> + if (rnp->boost_exp_tasks != NULL)
> + tb = rnp->boost_exp_tasks;
> else
> tb = rnp->boost_tasks;
>
> @@ -1065,14 +1068,24 @@ static int rcu_boost(struct rcu_node *rnp)
> * section.
> */
> t = container_of(tb, struct task_struct, rcu_node_entry);
> + if (dl_task(t) || t->prio <= current->prio) {
>This is a bit confusing to me. We know that "current" has SCHED_FIFO class. In this scenario if "t->prio" has lower value(higher priority) the task falls into all real-time categories anyway and is either:
> - SCHED_FIFO
> - SCHED_RR
> - SCHED_DEADLINE
>
>Checking whether the task is dl_task() sounds like unnecessary there.
>Am i missing something? Could you please comment on it?

Thanks feedback

Right, the deadline task's prio has lower value than rt fifo/rr task.
dl_task() condition detection seems redundant.

>
>Thanks!
>
>--
>Vlad Rezki