2022-06-28 09:27:05

by Nicolas Saenz Julienne

[permalink] [raw]
Subject: [PATCH] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt

dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
called sched_update_tick_dependency() preventing it from re-enabling the
tick on systems that no longer have pending SCHED_RT tasks but have
multiple runnable SCHED_OTHER tasks:

dequeue_task_rt()
dequeue_rt_entity()
dequeue_rt_stack()
dequeue_top_rt_rq()
sub_nr_running() // decrements rq->nr_running
sched_update_tick_dependency()
sched_can_stop_tick() // checks rq->rt.rt_nr_running,
...
__dequeue_rt_entity()
dec_rt_tasks() // decrements rq->rt.rt_nr_running
...

Every other scheduler class performs the operation in the opposite
order, and sched_update_tick_dependency() expects the values to be
updated as such. So avoid the misbehaviour by inverting the order in
which the above operations are performed in the RT scheduler.

Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
Signed-off-by: Nicolas Saenz Julienne <[email protected]>
---
kernel/sched/rt.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8c9ed96648409..55f39c8f42032 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
#endif /* CONFIG_SMP */

static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);

static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
@@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];

if (!rt_se) {
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
}
@@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)

static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
}

static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq)
}

static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
{
struct rq *rq = rq_of_rt_rq(rt_rq);

@@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

BUG_ON(!rq->nr_running);

- sub_nr_running(rq, rt_rq->rt_nr_running);
+ sub_nr_running(rq, count);
rt_rq->rt_queued = 0;

}
@@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct sched_rt_entity *back = NULL;
+ unsigned int rt_nr_running;

for_each_sched_rt_entity(rt_se) {
rt_se->back = back;
back = rt_se;
}

- dequeue_top_rt_rq(rt_rq_of_se(back));
+ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;

for (rt_se = back; rt_se; rt_se = rt_se->back) {
if (on_rt_rq(rt_se))
__dequeue_rt_entity(rt_se, flags);
}
+
+ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
}

static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
--
2.36.1


2022-07-01 11:42:23

by Nicolas Saenz Julienne

[permalink] [raw]
Subject: Re: [PATCH] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt

On Fri, 2022-07-01 at 12:25 +0100, Valentin Schneider wrote:
> On 28/06/22 11:22, Nicolas Saenz Julienne wrote:
> > dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
> > called sched_update_tick_dependency() preventing it from re-enabling the
> > tick on systems that no longer have pending SCHED_RT tasks but have
> > multiple runnable SCHED_OTHER tasks:
> >
> > dequeue_task_rt()
> > dequeue_rt_entity()
> > dequeue_rt_stack()
> > dequeue_top_rt_rq()
> > sub_nr_running() // decrements rq->nr_running
> > sched_update_tick_dependency()
> > sched_can_stop_tick() // checks rq->rt.rt_nr_running,
> > ...
> > __dequeue_rt_entity()
> > dec_rt_tasks() // decrements rq->rt.rt_nr_running
> > ...
> >
> > Every other scheduler class performs the operation in the opposite
> > order, and sched_update_tick_dependency() expects the values to be
> > updated as such. So avoid the misbehaviour by inverting the order in
> > which the above operations are performed in the RT scheduler.
> >
>
> I can't see anything wrong with your approach, though I did have to spend
> some time re-learning RT_GROUP_SCHED. The designated Fixes: commit looks
> about right too.
>
> > Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
> > Signed-off-by: Nicolas Saenz Julienne <[email protected]>
>
> Reviewed-by: Valentin Schneider <[email protected]>

Thanks!

--
Nicolás Sáenz

2022-07-01 11:44:15

by Valentin Schneider

[permalink] [raw]
Subject: Re: [PATCH] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt

On 28/06/22 11:22, Nicolas Saenz Julienne wrote:
> dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
> called sched_update_tick_dependency() preventing it from re-enabling the
> tick on systems that no longer have pending SCHED_RT tasks but have
> multiple runnable SCHED_OTHER tasks:
>
> dequeue_task_rt()
> dequeue_rt_entity()
> dequeue_rt_stack()
> dequeue_top_rt_rq()
> sub_nr_running() // decrements rq->nr_running
> sched_update_tick_dependency()
> sched_can_stop_tick() // checks rq->rt.rt_nr_running,
> ...
> __dequeue_rt_entity()
> dec_rt_tasks() // decrements rq->rt.rt_nr_running
> ...
>
> Every other scheduler class performs the operation in the opposite
> order, and sched_update_tick_dependency() expects the values to be
> updated as such. So avoid the misbehaviour by inverting the order in
> which the above operations are performed in the RT scheduler.
>

I can't see anything wrong with your approach, though I did have to spend
some time re-learning RT_GROUP_SCHED. The designated Fixes: commit looks
about right too.

> Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
> Signed-off-by: Nicolas Saenz Julienne <[email protected]>

Reviewed-by: Valentin Schneider <[email protected]>

2022-07-13 17:24:02

by Nicolas Saenz Julienne

[permalink] [raw]
Subject: Re: [PATCH] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt

On Tue, 2022-06-28 at 11:22 +0200, Nicolas Saenz Julienne wrote:
> dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
> called sched_update_tick_dependency() preventing it from re-enabling the
> tick on systems that no longer have pending SCHED_RT tasks but have
> multiple runnable SCHED_OTHER tasks:
>
> dequeue_task_rt()
> dequeue_rt_entity()
> dequeue_rt_stack()
> dequeue_top_rt_rq()
> sub_nr_running() // decrements rq->nr_running
> sched_update_tick_dependency()
> sched_can_stop_tick() // checks rq->rt.rt_nr_running,
> ...
> __dequeue_rt_entity()
> dec_rt_tasks() // decrements rq->rt.rt_nr_running
> ...
>
> Every other scheduler class performs the operation in the opposite
> order, and sched_update_tick_dependency() expects the values to be
> updated as such. So avoid the misbehaviour by inverting the order in
> which the above operations are performed in the RT scheduler.
>
> Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
> Signed-off-by: Nicolas Saenz Julienne <[email protected]>
> ---

Ping :)

--
Nicolás Sáenz

2022-07-15 17:17:59

by Phil Auld

[permalink] [raw]
Subject: Re: [PATCH] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt

On Tue, Jun 28, 2022 at 11:22:59AM +0200 Nicolas Saenz Julienne wrote:
> dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
> called sched_update_tick_dependency() preventing it from re-enabling the
> tick on systems that no longer have pending SCHED_RT tasks but have
> multiple runnable SCHED_OTHER tasks:
>
> dequeue_task_rt()
> dequeue_rt_entity()
> dequeue_rt_stack()
> dequeue_top_rt_rq()
> sub_nr_running() // decrements rq->nr_running
> sched_update_tick_dependency()
> sched_can_stop_tick() // checks rq->rt.rt_nr_running,
> ...
> __dequeue_rt_entity()
> dec_rt_tasks() // decrements rq->rt.rt_nr_running
> ...
>
> Every other scheduler class performs the operation in the opposite
> order, and sched_update_tick_dependency() expects the values to be
> updated as such. So avoid the misbehaviour by inverting the order in
> which the above operations are performed in the RT scheduler.
>
> Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
> Signed-off-by: Nicolas Saenz Julienne <[email protected]>

That's been around for a while. This change looks right and makes sense
to me.

Thanks,
Phil

Reviewed-by: Phil Auld <[email protected]>

--

2022-07-20 12:56:38

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt

On Wed, Jul 13, 2022 at 07:18:42PM +0200, Nicolas Saenz Julienne wrote:
> On Tue, 2022-06-28 at 11:22 +0200, Nicolas Saenz Julienne wrote:
> > dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
> > called sched_update_tick_dependency() preventing it from re-enabling the
> > tick on systems that no longer have pending SCHED_RT tasks but have
> > multiple runnable SCHED_OTHER tasks:
> >
> > dequeue_task_rt()
> > dequeue_rt_entity()
> > dequeue_rt_stack()
> > dequeue_top_rt_rq()
> > sub_nr_running() // decrements rq->nr_running
> > sched_update_tick_dependency()
> > sched_can_stop_tick() // checks rq->rt.rt_nr_running,
> > ...
> > __dequeue_rt_entity()
> > dec_rt_tasks() // decrements rq->rt.rt_nr_running
> > ...
> >
> > Every other scheduler class performs the operation in the opposite
> > order, and sched_update_tick_dependency() expects the values to be
> > updated as such. So avoid the misbehaviour by inverting the order in
> > which the above operations are performed in the RT scheduler.
> >
> > Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
> > Signed-off-by: Nicolas Saenz Julienne <[email protected]>
> > ---
>
> Ping :)

Thanks! got stuck in the retbleed backlog :/

2022-07-21 09:26:26

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: sched/core] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()

The following commit has been merged into the sched/core branch of tip:

Commit-ID: 5c66d1b9b30f737fcef85a0b75bfe0590e16b62a
Gitweb: https://git.kernel.org/tip/5c66d1b9b30f737fcef85a0b75bfe0590e16b62a
Author: Nicolas Saenz Julienne <[email protected]>
AuthorDate: Tue, 28 Jun 2022 11:22:59 +02:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Thu, 21 Jul 2022 10:39:38 +02:00

nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()

dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
called sched_update_tick_dependency() preventing it from re-enabling the
tick on systems that no longer have pending SCHED_RT tasks but have
multiple runnable SCHED_OTHER tasks:

dequeue_task_rt()
dequeue_rt_entity()
dequeue_rt_stack()
dequeue_top_rt_rq()
sub_nr_running() // decrements rq->nr_running
sched_update_tick_dependency()
sched_can_stop_tick() // checks rq->rt.rt_nr_running,
...
__dequeue_rt_entity()
dec_rt_tasks() // decrements rq->rt.rt_nr_running
...

Every other scheduler class performs the operation in the opposite
order, and sched_update_tick_dependency() expects the values to be
updated as such. So avoid the misbehaviour by inverting the order in
which the above operations are performed in the RT scheduler.

Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
Signed-off-by: Nicolas Saenz Julienne <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
Reviewed-by: Phil Auld <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
kernel/sched/rt.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8c9ed96..55f39c8 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
#endif /* CONFIG_SMP */

static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);

static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
@@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];

if (!rt_se) {
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
}
@@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)

static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
}

static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq)
}

static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
{
struct rq *rq = rq_of_rt_rq(rt_rq);

@@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

BUG_ON(!rq->nr_running);

- sub_nr_running(rq, rt_rq->rt_nr_running);
+ sub_nr_running(rq, count);
rt_rq->rt_queued = 0;

}
@@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct sched_rt_entity *back = NULL;
+ unsigned int rt_nr_running;

for_each_sched_rt_entity(rt_se) {
rt_se->back = back;
back = rt_se;
}

- dequeue_top_rt_rq(rt_rq_of_se(back));
+ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;

for (rt_se = back; rt_se; rt_se = rt_se->back) {
if (on_rt_rq(rt_se))
__dequeue_rt_entity(rt_se, flags);
}
+
+ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
}

static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)