2023-03-16 08:19:15

by Hao Jia

[permalink] [raw]
Subject: [PATCH] sched/core: Avoid selecting the task that is throttled to run when core-sched enable

When {rt, cfs}_rq or dl task is throttled, since cookied tasks
are not dequeued from the core tree, So sched_core_find() and
sched_core_next() may return throttled task, which may
cause throttled task to run on the CPU.

So we add checks in sched_core_find() and sched_core_next()
to make sure that the return is a runnable task that is
not throttled.

Co-developed-by: Cruz Zhao <[email protected]>
Signed-off-by: Cruz Zhao <[email protected]>
Signed-off-by: Hao Jia <[email protected]>
---
kernel/sched/core.c | 60 ++++++++++++++++++++++++++++-------------
kernel/sched/deadline.c | 10 +++++++
kernel/sched/fair.c | 16 +++++++++++
kernel/sched/rt.c | 19 +++++++++++++
kernel/sched/sched.h | 4 +++
5 files changed, 90 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index af017e038b48..27cb06e19b12 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -261,36 +261,51 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
resched_curr(rq);
}

-/*
- * Find left-most (aka, highest priority) task matching @cookie.
- */
-static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+static int sched_task_is_throttled(struct task_struct *p, int cpu)
{
- struct rb_node *node;
-
- node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
- /*
- * The idle task always matches any cookie!
- */
- if (!node)
- return idle_sched_class.pick_task(rq);
+ if (p->sched_class->task_is_throttled)
+ return p->sched_class->task_is_throttled(p, cpu);

- return __node_2_sc(node);
+ return 0;
}

static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie)
{
struct rb_node *node = &p->core_node;
+ int cpu = task_cpu(p);
+
+ do {
+ node = rb_next(node);
+ if (!node)
+ return NULL;
+
+ p = container_of(node, struct task_struct, core_node);
+ if (p->core_cookie != cookie)
+ return NULL;
+ } while (sched_task_is_throttled(p, cpu));
+
+ return p;
+}

- node = rb_next(node);
+/*
+ * Find left-most (aka, highest priority) and unthrottled task matching @cookie.
+ * If no suitable task is found, NULL will be returned.
+ */
+static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+{
+ struct task_struct *p;
+ struct rb_node *node;
+
+ node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
if (!node)
return NULL;

- p = container_of(node, struct task_struct, core_node);
- if (p->core_cookie != cookie)
- return NULL;
+ p = __node_2_sc(node);

- return p;
+ if (!sched_task_is_throttled(p, rq->cpu))
+ return p;
+
+ return sched_core_next(p, cookie);
}

/*
@@ -6236,7 +6251,7 @@ static bool try_steal_cookie(int this, int that)
goto unlock;

p = sched_core_find(src, cookie);
- if (p == src->idle)
+ if (!p)
goto unlock;

do {
@@ -6248,6 +6263,13 @@ static bool try_steal_cookie(int this, int that)

if (p->core_occupation > dst->idle->core_occupation)
goto next;
+ /*
+ * sched_core_find() and sched_core_next() will ensure that task @p
+ * is not throttled now, we also need to check whether the runqueue
+ * of the destination CPU is being throttled.
+ */
+ if (sched_task_is_throttled(p, this))
+ goto next;

deactivate_task(src, p, 0);
set_task_cpu(p, this);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 71b24371a6f7..4cc7e1ca066d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2704,6 +2704,13 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
#endif
}

+#ifdef CONFIG_SCHED_CORE
+static int task_is_throttled_dl(struct task_struct *p, int cpu)
+{
+ return p->dl.dl_throttled;
+}
+#endif
+
DEFINE_SCHED_CLASS(dl) = {

.enqueue_task = enqueue_task_dl,
@@ -2736,6 +2743,9 @@ DEFINE_SCHED_CLASS(dl) = {
.switched_to = switched_to_dl,

.update_curr = update_curr_dl,
+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_dl,
+#endif
};

/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a1b1f855b96..b572367249f0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11933,6 +11933,18 @@ bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,

return delta > 0;
}
+
+static int task_is_throttled_fair(struct task_struct *p, int cpu)
+{
+ struct cfs_rq *cfs_rq;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ cfs_rq = task_group(p)->cfs_rq[cpu];
+#else
+ cfs_rq = &cpu_rq(cpu)->cfs;
+#endif
+ return throttled_hierarchy(cfs_rq);
+}
#else
static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {}
#endif
@@ -12559,6 +12571,10 @@ DEFINE_SCHED_CLASS(fair) = {
.task_change_group = task_change_group_fair,
#endif

+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_fair,
+#endif
+
#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
#endif
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0a11f44adee5..9d67dfbf1000 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2677,6 +2677,21 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
return 0;
}

+#ifdef CONFIG_SCHED_CORE
+static int task_is_throttled_rt(struct task_struct *p, int cpu)
+{
+ struct rt_rq *rt_rq;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ rt_rq = task_group(p)->rt_rq[cpu];
+#else
+ rt_rq = &cpu_rq(cpu)->rt;
+#endif
+
+ return rt_rq_throttled(rt_rq);
+}
+#endif
+
DEFINE_SCHED_CLASS(rt) = {

.enqueue_task = enqueue_task_rt,
@@ -2710,6 +2725,10 @@ DEFINE_SCHED_CLASS(rt) = {

.update_curr = update_curr_rt,

+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_rt,
+#endif
+
#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3e8df6d31c1e..060616944d7a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2224,6 +2224,10 @@ struct sched_class {
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p);
#endif
+
+#ifdef CONFIG_SCHED_CORE
+ int (*task_is_throttled)(struct task_struct *p, int cpu);
+#endif
};

static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
--
2.37.0



2023-03-20 12:48:10

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] sched/core: Avoid selecting the task that is throttled to run when core-sched enable

On Thu, Mar 16, 2023 at 04:18:06PM +0800, Hao Jia wrote:

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index af017e038b48..27cb06e19b12 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -261,36 +261,51 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
> resched_curr(rq);
> }
>
> +static int sched_task_is_throttled(struct task_struct *p, int cpu)
> {
> + if (p->sched_class->task_is_throttled)
> + return p->sched_class->task_is_throttled(p, cpu);
>
> + return 0;
> }
>
> static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie)
> {
> struct rb_node *node = &p->core_node;
> + int cpu = task_cpu(p);
> +
> + do {
> + node = rb_next(node);
> + if (!node)
> + return NULL;
> +
> + p = container_of(node, struct task_struct, core_node);

I've changed this to __node_2_sc() to match the rest. It looks to have
been randomly not using it.

> + if (p->core_cookie != cookie)
> + return NULL;
> + } while (sched_task_is_throttled(p, cpu));
> +
> + return p;
> +}
>
> +/*
> + * Find left-most (aka, highest priority) and unthrottled task matching @cookie.
> + * If no suitable task is found, NULL will be returned.
> + */
> +static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
> +{
> + struct task_struct *p;
> + struct rb_node *node;
> +
> + node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
> if (!node)
> return NULL;
>
> + p = __node_2_sc(node);
> + if (!sched_task_is_throttled(p, rq->cpu))
> + return p;
> +
> + return sched_core_next(p, cookie);
> }
>
> /*

OK, fair enough.



2023-03-21 06:17:15

by Hao Jia

[permalink] [raw]
Subject: Re: [External] Re: [PATCH] sched/core: Avoid selecting the task that is throttled to run when core-sched enable



On 2023/3/20 Peter Zijlstra wrote:
> On Thu, Mar 16, 2023 at 04:18:06PM +0800, Hao Jia wrote:
>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index af017e038b48..27cb06e19b12 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -261,36 +261,51 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
>> resched_curr(rq);
>> }
>>
>> +static int sched_task_is_throttled(struct task_struct *p, int cpu)
>> {
>> + if (p->sched_class->task_is_throttled)
>> + return p->sched_class->task_is_throttled(p, cpu);
>>
>> + return 0;
>> }
>>
>> static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie)
>> {
>> struct rb_node *node = &p->core_node;
>> + int cpu = task_cpu(p);
>> +
>> + do {
>> + node = rb_next(node);
>> + if (!node)
>> + return NULL;
>> +
>> + p = container_of(node, struct task_struct, core_node);
>
> I've changed this to __node_2_sc() to match the rest. It looks to have
> been randomly not using it.

OK, Thanks!
>
>> + if (p->core_cookie != cookie)
>> + return NULL;
>> + } while (sched_task_is_throttled(p, cpu));
>> +
>> + return p;
>> +}
>>
>> +/*
>> + * Find left-most (aka, highest priority) and unthrottled task matching @cookie.
>> + * If no suitable task is found, NULL will be returned.
>> + */
>> +static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
>> +{
>> + struct task_struct *p;
>> + struct rb_node *node;
>> +
>> + node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
>> if (!node)
>> return NULL;
>>
>> + p = __node_2_sc(node);
>> + if (!sched_task_is_throttled(p, rq->cpu))
>> + return p;
>> +
>> + return sched_core_next(p, cookie);
>> }
>>
>> /*
>
> OK, fair enough.

Thanks for your review. Do I need to modify to send V2 patch?

Thanks,
Hao
>
>

2023-03-21 09:08:25

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [External] Re: [PATCH] sched/core: Avoid selecting the task that is throttled to run when core-sched enable

On Tue, Mar 21, 2023 at 02:16:54PM +0800, Hao Jia wrote:

> Thanks for your review. Do I need to modify to send V2 patch?

Nah, I've got it. I'll push it into sched/core later today..

2023-03-22 09:30:39

by tip-bot2 for Jacob Pan

[permalink] [raw]
Subject: [tip: sched/core] sched/core: Avoid selecting the task that is throttled to run when core-sched enable

The following commit has been merged into the sched/core branch of tip:

Commit-ID: 530bfad1d53d103f98cec66a3e491a36d397884d
Gitweb: https://git.kernel.org/tip/530bfad1d53d103f98cec66a3e491a36d397884d
Author: Hao Jia <[email protected]>
AuthorDate: Thu, 16 Mar 2023 16:18:06 +08:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Wed, 22 Mar 2023 10:10:58 +01:00

sched/core: Avoid selecting the task that is throttled to run when core-sched enable

When {rt, cfs}_rq or dl task is throttled, since cookied tasks
are not dequeued from the core tree, So sched_core_find() and
sched_core_next() may return throttled task, which may
cause throttled task to run on the CPU.

So we add checks in sched_core_find() and sched_core_next()
to make sure that the return is a runnable task that is
not throttled.

Co-developed-by: Cruz Zhao <[email protected]>
Signed-off-by: Cruz Zhao <[email protected]>
Signed-off-by: Hao Jia <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
kernel/sched/core.c | 60 +++++++++++++++++++++++++++-------------
kernel/sched/deadline.c | 10 +++++++-
kernel/sched/fair.c | 16 +++++++++++-
kernel/sched/rt.c | 19 +++++++++++++-
kernel/sched/sched.h | 4 +++-
5 files changed, 90 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 488655f..9140a33 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -261,36 +261,51 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags)
resched_curr(rq);
}

-/*
- * Find left-most (aka, highest priority) task matching @cookie.
- */
-static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+static int sched_task_is_throttled(struct task_struct *p, int cpu)
{
- struct rb_node *node;
-
- node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
- /*
- * The idle task always matches any cookie!
- */
- if (!node)
- return idle_sched_class.pick_task(rq);
+ if (p->sched_class->task_is_throttled)
+ return p->sched_class->task_is_throttled(p, cpu);

- return __node_2_sc(node);
+ return 0;
}

static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie)
{
struct rb_node *node = &p->core_node;
+ int cpu = task_cpu(p);
+
+ do {
+ node = rb_next(node);
+ if (!node)
+ return NULL;
+
+ p = __node_2_sc(node);
+ if (p->core_cookie != cookie)
+ return NULL;
+
+ } while (sched_task_is_throttled(p, cpu));
+
+ return p;
+}
+
+/*
+ * Find left-most (aka, highest priority) and unthrottled task matching @cookie.
+ * If no suitable task is found, NULL will be returned.
+ */
+static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+{
+ struct task_struct *p;
+ struct rb_node *node;

- node = rb_next(node);
+ node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
if (!node)
return NULL;

- p = container_of(node, struct task_struct, core_node);
- if (p->core_cookie != cookie)
- return NULL;
+ p = __node_2_sc(node);
+ if (!sched_task_is_throttled(p, rq->cpu))
+ return p;

- return p;
+ return sched_core_next(p, cookie);
}

/*
@@ -6236,7 +6251,7 @@ static bool try_steal_cookie(int this, int that)
goto unlock;

p = sched_core_find(src, cookie);
- if (p == src->idle)
+ if (!p)
goto unlock;

do {
@@ -6248,6 +6263,13 @@ static bool try_steal_cookie(int this, int that)

if (p->core_occupation > dst->idle->core_occupation)
goto next;
+ /*
+ * sched_core_find() and sched_core_next() will ensure that task @p
+ * is not throttled now, we also need to check whether the runqueue
+ * of the destination CPU is being throttled.
+ */
+ if (sched_task_is_throttled(p, this))
+ goto next;

deactivate_task(src, p, 0);
set_task_cpu(p, this);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 71b2437..4cc7e1c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2704,6 +2704,13 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
#endif
}

+#ifdef CONFIG_SCHED_CORE
+static int task_is_throttled_dl(struct task_struct *p, int cpu)
+{
+ return p->dl.dl_throttled;
+}
+#endif
+
DEFINE_SCHED_CLASS(dl) = {

.enqueue_task = enqueue_task_dl,
@@ -2736,6 +2743,9 @@ DEFINE_SCHED_CLASS(dl) = {
.switched_to = switched_to_dl,

.update_curr = update_curr_dl,
+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_dl,
+#endif
};

/* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a1b1f8..b572367 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11933,6 +11933,18 @@ bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,

return delta > 0;
}
+
+static int task_is_throttled_fair(struct task_struct *p, int cpu)
+{
+ struct cfs_rq *cfs_rq;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ cfs_rq = task_group(p)->cfs_rq[cpu];
+#else
+ cfs_rq = &cpu_rq(cpu)->cfs;
+#endif
+ return throttled_hierarchy(cfs_rq);
+}
#else
static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {}
#endif
@@ -12559,6 +12571,10 @@ DEFINE_SCHED_CLASS(fair) = {
.task_change_group = task_change_group_fair,
#endif

+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_fair,
+#endif
+
#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
#endif
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0a11f44..9d67dfb 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2677,6 +2677,21 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
return 0;
}

+#ifdef CONFIG_SCHED_CORE
+static int task_is_throttled_rt(struct task_struct *p, int cpu)
+{
+ struct rt_rq *rt_rq;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ rt_rq = task_group(p)->rt_rq[cpu];
+#else
+ rt_rq = &cpu_rq(cpu)->rt;
+#endif
+
+ return rt_rq_throttled(rt_rq);
+}
+#endif
+
DEFINE_SCHED_CLASS(rt) = {

.enqueue_task = enqueue_task_rt,
@@ -2710,6 +2725,10 @@ DEFINE_SCHED_CLASS(rt) = {

.update_curr = update_curr_rt,

+#ifdef CONFIG_SCHED_CORE
+ .task_is_throttled = task_is_throttled_rt,
+#endif
+
#ifdef CONFIG_UCLAMP_TASK
.uclamp_enabled = 1,
#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3e8df6d..0606169 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2224,6 +2224,10 @@ struct sched_class {
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p);
#endif
+
+#ifdef CONFIG_SCHED_CORE
+ int (*task_is_throttled)(struct task_struct *p, int cpu);
+#endif
};

static inline void put_prev_task(struct rq *rq, struct task_struct *prev)