2020-11-17 23:25:17

by Joel Fernandes

[permalink] [raw]
Subject: [PATCH -tip 14/32] sched: migration changes for core scheduling

From: Aubrey Li <[email protected]>

- Don't migrate if there is a cookie mismatch
Load balance tries to move task from busiest CPU to the
destination CPU. When core scheduling is enabled, if the
task's cookie does not match with the destination CPU's
core cookie, this task will be skipped by this CPU. This
mitigates the forced idle time on the destination CPU.

- Select cookie matched idle CPU
In the fast path of task wakeup, select the first cookie matched
idle CPU instead of the first idle CPU.

- Find cookie matched idlest CPU
In the slow path of task wakeup, find the idlest CPU whose core
cookie matches with task's cookie

- Don't migrate task if cookie not match
For the NUMA load balance, don't migrate task to the CPU whose
core cookie does not match with task's cookie

Tested-by: Julien Desfossez <[email protected]>
Signed-off-by: Aubrey Li <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
Signed-off-by: Vineeth Remanan Pillai <[email protected]>
Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
kernel/sched/fair.c | 64 ++++++++++++++++++++++++++++++++++++++++----
kernel/sched/sched.h | 29 ++++++++++++++++++++
2 files changed, 88 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index de82f88ba98c..ceb3906c9a8a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1921,6 +1921,15 @@ static void task_numa_find_cpu(struct task_numa_env *env,
if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
continue;

+#ifdef CONFIG_SCHED_CORE
+ /*
+ * Skip this cpu if source task's cookie does not match
+ * with CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
+ continue;
+#endif
+
env->dst_cpu = cpu;
if (task_numa_compare(env, taskimp, groupimp, maymove))
break;
@@ -5867,11 +5876,17 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this

/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+ struct rq *rq = cpu_rq(i);
+
+#ifdef CONFIG_SCHED_CORE
+ if (!sched_core_cookie_match(rq, p))
+ continue;
+#endif
+
if (sched_idle_cpu(i))
return i;

if (available_idle_cpu(i)) {
- struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
/*
@@ -6129,8 +6144,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
return -1;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
- break;
+
+ if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
+#ifdef CONFIG_SCHED_CORE
+ /*
+ * If Core Scheduling is enabled, select this cpu
+ * only if the process cookie matches core cookie.
+ */
+ if (sched_core_enabled(cpu_rq(cpu)) &&
+ p->core_cookie == cpu_rq(cpu)->core->core_cookie)
+#endif
+ break;
+ }
}

time = cpu_clock(this) - time;
@@ -7530,8 +7555,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* We do not migrate tasks that are:
* 1) throttled_lb_pair, or
* 2) cannot be migrated to this CPU due to cpus_ptr, or
- * 3) running (obviously), or
- * 4) are cache-hot on their current CPU.
+ * 3) task's cookie does not match with this CPU's core cookie
+ * 4) running (obviously), or
+ * 5) are cache-hot on their current CPU.
*/
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;
@@ -7566,6 +7592,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 0;
}

+#ifdef CONFIG_SCHED_CORE
+ /*
+ * Don't migrate task if the task's cookie does not match
+ * with the destination CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+ return 0;
+#endif
+
/* Record that we found atleast one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;

@@ -8792,6 +8827,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
p->cpus_ptr))
continue;

+#ifdef CONFIG_SCHED_CORE
+ if (sched_core_enabled(cpu_rq(this_cpu))) {
+ int i = 0;
+ bool cookie_match = false;
+
+ for_each_cpu(i, sched_group_span(group)) {
+ struct rq *rq = cpu_rq(i);
+
+ if (sched_core_cookie_match(rq, p)) {
+ cookie_match = true;
+ break;
+ }
+ }
+ /* Skip over this group if no cookie matched */
+ if (!cookie_match)
+ continue;
+ }
+#endif
+
local_group = cpumask_test_cpu(this_cpu,
sched_group_span(group));

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e72942a9ee11..de553d39aa40 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1135,6 +1135,35 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)

bool cfs_prio_less(struct task_struct *a, struct task_struct *b);

+/*
+ * Helper to check if the CPU's core cookie matches with the task's cookie
+ * when core scheduling is enabled.
+ * A special case is that the task's cookie always matches with CPU's core
+ * cookie if the CPU is in an idle core.
+ */
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ bool idle_core = true;
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+ if (!available_idle_cpu(cpu)) {
+ idle_core = false;
+ break;
+ }
+ }
+
+ /*
+ * A CPU in an idle core is always the best choice for tasks with
+ * cookies.
+ */
+ return idle_core || rq->core->core_cookie == p->core_cookie;
+}
+
extern void queue_core_balance(struct rq *rq);

#else /* !CONFIG_SCHED_CORE */
--
2.29.2.299.gdc1121823c-goog


2020-11-23 00:35:04

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On Tue, Nov 17, 2020 at 06:19:44PM -0500, Joel Fernandes (Google) wrote:
> From: Aubrey Li <[email protected]>
>
> - Don't migrate if there is a cookie mismatch
> Load balance tries to move task from busiest CPU to the
> destination CPU. When core scheduling is enabled, if the
> task's cookie does not match with the destination CPU's
> core cookie, this task will be skipped by this CPU. This
> mitigates the forced idle time on the destination CPU.
>
> - Select cookie matched idle CPU
> In the fast path of task wakeup, select the first cookie matched
> idle CPU instead of the first idle CPU.
>
> - Find cookie matched idlest CPU
> In the slow path of task wakeup, find the idlest CPU whose core
> cookie matches with task's cookie
>
> - Don't migrate task if cookie not match
> For the NUMA load balance, don't migrate task to the CPU whose
> core cookie does not match with task's cookie
>
> Tested-by: Julien Desfossez <[email protected]>
> Signed-off-by: Aubrey Li <[email protected]>
> Signed-off-by: Tim Chen <[email protected]>
> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
> Signed-off-by: Joel Fernandes (Google) <[email protected]>
> ---
> kernel/sched/fair.c | 64 ++++++++++++++++++++++++++++++++++++++++----
> kernel/sched/sched.h | 29 ++++++++++++++++++++
> 2 files changed, 88 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index de82f88ba98c..ceb3906c9a8a 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1921,6 +1921,15 @@ static void task_numa_find_cpu(struct task_numa_env *env,
> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
> continue;
>
> +#ifdef CONFIG_SCHED_CORE
> + /*
> + * Skip this cpu if source task's cookie does not match
> + * with CPU's core cookie.
> + */
> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> + continue;
> +#endif
> +

Any reason this is under an #ifdef? In sched_core_cookie_match() won't
the check for sched_core_enabled() do the right thing even when
CONFIG_SCHED_CORE is not enabed?

> env->dst_cpu = cpu;
> if (task_numa_compare(env, taskimp, groupimp, maymove))
> break;
> @@ -5867,11 +5876,17 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>
> /* Traverse only the allowed CPUs */
> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
> + struct rq *rq = cpu_rq(i);
> +
> +#ifdef CONFIG_SCHED_CORE
> + if (!sched_core_cookie_match(rq, p))
> + continue;
> +#endif
> +
> if (sched_idle_cpu(i))
> return i;
>
> if (available_idle_cpu(i)) {
> - struct rq *rq = cpu_rq(i);
> struct cpuidle_state *idle = idle_get_state(rq);
> if (idle && idle->exit_latency < min_exit_latency) {
> /*
> @@ -6129,8 +6144,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> for_each_cpu_wrap(cpu, cpus, target) {
> if (!--nr)
> return -1;
> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> - break;
> +
> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
> +#ifdef CONFIG_SCHED_CORE
> + /*
> + * If Core Scheduling is enabled, select this cpu
> + * only if the process cookie matches core cookie.
> + */
> + if (sched_core_enabled(cpu_rq(cpu)) &&
> + p->core_cookie == cpu_rq(cpu)->core->core_cookie)
> +#endif
> + break;
> + }
> }
>
> time = cpu_clock(this) - time;
> @@ -7530,8 +7555,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> * We do not migrate tasks that are:
> * 1) throttled_lb_pair, or
> * 2) cannot be migrated to this CPU due to cpus_ptr, or
> - * 3) running (obviously), or
> - * 4) are cache-hot on their current CPU.
> + * 3) task's cookie does not match with this CPU's core cookie
> + * 4) running (obviously), or
> + * 5) are cache-hot on their current CPU.
> */
> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
> return 0;
> @@ -7566,6 +7592,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> return 0;
> }
>
> +#ifdef CONFIG_SCHED_CORE
> + /*
> + * Don't migrate task if the task's cookie does not match
> + * with the destination CPU's core cookie.
> + */
> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
> + return 0;
> +#endif
> +
> /* Record that we found atleast one task that could run on dst_cpu */
> env->flags &= ~LBF_ALL_PINNED;
>
> @@ -8792,6 +8827,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
> p->cpus_ptr))
> continue;
>
> +#ifdef CONFIG_SCHED_CORE
> + if (sched_core_enabled(cpu_rq(this_cpu))) {
> + int i = 0;
> + bool cookie_match = false;
> +
> + for_each_cpu(i, sched_group_span(group)) {
> + struct rq *rq = cpu_rq(i);
> +
> + if (sched_core_cookie_match(rq, p)) {
> + cookie_match = true;
> + break;
> + }
> + }
> + /* Skip over this group if no cookie matched */
> + if (!cookie_match)
> + continue;
> + }
> +#endif
> +
> local_group = cpumask_test_cpu(this_cpu,
> sched_group_span(group));
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e72942a9ee11..de553d39aa40 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1135,6 +1135,35 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
>
> bool cfs_prio_less(struct task_struct *a, struct task_struct *b);
>
> +/*
> + * Helper to check if the CPU's core cookie matches with the task's cookie
> + * when core scheduling is enabled.
> + * A special case is that the task's cookie always matches with CPU's core
> + * cookie if the CPU is in an idle core.
> + */
> +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
> +{
> + bool idle_core = true;
> + int cpu;
> +
> + /* Ignore cookie match if core scheduler is not enabled on the CPU. */
> + if (!sched_core_enabled(rq))
> + return true;
> +
> + for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
> + if (!available_idle_cpu(cpu)) {

I was looking at this snippet and comparing this to is_core_idle(), the
major difference is the check for vcpu_is_preempted(). Do we want to
call the core as non idle if any vcpu was preempted on this CPU?

> + idle_core = false;
> + break;
> + }
> + }
> +
> + /*
> + * A CPU in an idle core is always the best choice for tasks with
> + * cookies.
> + */
> + return idle_core || rq->core->core_cookie == p->core_cookie;
> +}
> +

Balbir Singh.

2020-11-23 05:40:50

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On 2020/11/23 7:54, Balbir Singh wrote:
> On Tue, Nov 17, 2020 at 06:19:44PM -0500, Joel Fernandes (Google) wrote:
>> From: Aubrey Li <[email protected]>
>>
>> - Don't migrate if there is a cookie mismatch
>> Load balance tries to move task from busiest CPU to the
>> destination CPU. When core scheduling is enabled, if the
>> task's cookie does not match with the destination CPU's
>> core cookie, this task will be skipped by this CPU. This
>> mitigates the forced idle time on the destination CPU.
>>
>> - Select cookie matched idle CPU
>> In the fast path of task wakeup, select the first cookie matched
>> idle CPU instead of the first idle CPU.
>>
>> - Find cookie matched idlest CPU
>> In the slow path of task wakeup, find the idlest CPU whose core
>> cookie matches with task's cookie
>>
>> - Don't migrate task if cookie not match
>> For the NUMA load balance, don't migrate task to the CPU whose
>> core cookie does not match with task's cookie
>>
>> Tested-by: Julien Desfossez <[email protected]>
>> Signed-off-by: Aubrey Li <[email protected]>
>> Signed-off-by: Tim Chen <[email protected]>
>> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
>> Signed-off-by: Joel Fernandes (Google) <[email protected]>
>> ---
>> kernel/sched/fair.c | 64 ++++++++++++++++++++++++++++++++++++++++----
>> kernel/sched/sched.h | 29 ++++++++++++++++++++
>> 2 files changed, 88 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index de82f88ba98c..ceb3906c9a8a 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -1921,6 +1921,15 @@ static void task_numa_find_cpu(struct task_numa_env *env,
>> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
>> continue;
>>
>> +#ifdef CONFIG_SCHED_CORE
>> + /*
>> + * Skip this cpu if source task's cookie does not match
>> + * with CPU's core cookie.
>> + */
>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>> + continue;
>> +#endif
>> +
>
> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
> the check for sched_core_enabled() do the right thing even when
> CONFIG_SCHED_CORE is not enabed?>
Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
sense to leave a core scheduler specific function here even at compile
time. Also, for the cases in hot path, this saves CPU cycles to avoid
a judgment.


>> env->dst_cpu = cpu;
>> if (task_numa_compare(env, taskimp, groupimp, maymove))
>> break;
>> @@ -5867,11 +5876,17 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>>
>> /* Traverse only the allowed CPUs */
>> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
>> + struct rq *rq = cpu_rq(i);
>> +
>> +#ifdef CONFIG_SCHED_CORE
>> + if (!sched_core_cookie_match(rq, p))
>> + continue;
>> +#endif
>> +
>> if (sched_idle_cpu(i))
>> return i;
>>
>> if (available_idle_cpu(i)) {
>> - struct rq *rq = cpu_rq(i);
>> struct cpuidle_state *idle = idle_get_state(rq);
>> if (idle && idle->exit_latency < min_exit_latency) {
>> /*
>> @@ -6129,8 +6144,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>> for_each_cpu_wrap(cpu, cpus, target) {
>> if (!--nr)
>> return -1;
>> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
>> - break;
>> +
>> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
>> +#ifdef CONFIG_SCHED_CORE
>> + /*
>> + * If Core Scheduling is enabled, select this cpu
>> + * only if the process cookie matches core cookie.
>> + */
>> + if (sched_core_enabled(cpu_rq(cpu)) &&
>> + p->core_cookie == cpu_rq(cpu)->core->core_cookie)
>> +#endif
>> + break;
>> + }
>> }
>>
>> time = cpu_clock(this) - time;
>> @@ -7530,8 +7555,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>> * We do not migrate tasks that are:
>> * 1) throttled_lb_pair, or
>> * 2) cannot be migrated to this CPU due to cpus_ptr, or
>> - * 3) running (obviously), or
>> - * 4) are cache-hot on their current CPU.
>> + * 3) task's cookie does not match with this CPU's core cookie
>> + * 4) running (obviously), or
>> + * 5) are cache-hot on their current CPU.
>> */
>> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
>> return 0;
>> @@ -7566,6 +7592,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>> return 0;
>> }
>>
>> +#ifdef CONFIG_SCHED_CORE
>> + /*
>> + * Don't migrate task if the task's cookie does not match
>> + * with the destination CPU's core cookie.
>> + */
>> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
>> + return 0;
>> +#endif
>> +
>> /* Record that we found atleast one task that could run on dst_cpu */
>> env->flags &= ~LBF_ALL_PINNED;
>>
>> @@ -8792,6 +8827,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>> p->cpus_ptr))
>> continue;
>>
>> +#ifdef CONFIG_SCHED_CORE
>> + if (sched_core_enabled(cpu_rq(this_cpu))) {
>> + int i = 0;
>> + bool cookie_match = false;
>> +
>> + for_each_cpu(i, sched_group_span(group)) {
>> + struct rq *rq = cpu_rq(i);
>> +
>> + if (sched_core_cookie_match(rq, p)) {
>> + cookie_match = true;
>> + break;
>> + }
>> + }
>> + /* Skip over this group if no cookie matched */
>> + if (!cookie_match)
>> + continue;
>> + }
>> +#endif
>> +
>> local_group = cpumask_test_cpu(this_cpu,
>> sched_group_span(group));
>>
>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>> index e72942a9ee11..de553d39aa40 100644
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -1135,6 +1135,35 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
>>
>> bool cfs_prio_less(struct task_struct *a, struct task_struct *b);
>>
>> +/*
>> + * Helper to check if the CPU's core cookie matches with the task's cookie
>> + * when core scheduling is enabled.
>> + * A special case is that the task's cookie always matches with CPU's core
>> + * cookie if the CPU is in an idle core.
>> + */
>> +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
>> +{
>> + bool idle_core = true;
>> + int cpu;
>> +
>> + /* Ignore cookie match if core scheduler is not enabled on the CPU. */
>> + if (!sched_core_enabled(rq))
>> + return true;
>> +
>> + for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
>> + if (!available_idle_cpu(cpu)) {
>
> I was looking at this snippet and comparing this to is_core_idle(), the
> major difference is the check for vcpu_is_preempted(). Do we want to
> call the core as non idle if any vcpu was preempted on this CPU?

Yes, if there is a VCPU was preempted on this CPU, better not place task
on this core as the VCPU may be holding a spinlock and wants to be executed
again ASAP.

Thanks,
-Aubrey

2020-11-24 15:47:32

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
> >> +#ifdef CONFIG_SCHED_CORE
> >> + /*
> >> + * Skip this cpu if source task's cookie does not match
> >> + * with CPU's core cookie.
> >> + */
> >> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> >> + continue;
> >> +#endif
> >> +
> >
> > Any reason this is under an #ifdef? In sched_core_cookie_match() won't
> > the check for sched_core_enabled() do the right thing even when
> > CONFIG_SCHED_CORE is not enabed?>
> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
> sense to leave a core scheduler specific function here even at compile
> time. Also, for the cases in hot path, this saves CPU cycles to avoid
> a judgment.

No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
more.

> >> +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
> >> +{
> >> + bool idle_core = true;
> >> + int cpu;
> >> +
> >> + /* Ignore cookie match if core scheduler is not enabled on the CPU. */
> >> + if (!sched_core_enabled(rq))
> >> + return true;
> >> +
> >> + for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
> >> + if (!available_idle_cpu(cpu)) {
> >
> > I was looking at this snippet and comparing this to is_core_idle(), the
> > major difference is the check for vcpu_is_preempted(). Do we want to
> > call the core as non idle if any vcpu was preempted on this CPU?
>
> Yes, if there is a VCPU was preempted on this CPU, better not place task
> on this core as the VCPU may be holding a spinlock and wants to be executed
> again ASAP.

If you're doing core scheduling on vcpus, you deserve all the pain
possible.


2020-11-25 03:15:17

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On 2020/11/24 23:42, Peter Zijlstra wrote:
> On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
>>>> +#ifdef CONFIG_SCHED_CORE
>>>> + /*
>>>> + * Skip this cpu if source task's cookie does not match
>>>> + * with CPU's core cookie.
>>>> + */
>>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>>>> + continue;
>>>> +#endif
>>>> +
>>>
>>> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
>>> the check for sched_core_enabled() do the right thing even when
>>> CONFIG_SCHED_CORE is not enabed?>
>> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
>> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
>> sense to leave a core scheduler specific function here even at compile
>> time. Also, for the cases in hot path, this saves CPU cycles to avoid
>> a judgment.
>
> No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
> more.
>

Okay, I pasted the refined patch here.
@Joel, please let me know if you want me to send it in a separated thread.

Thanks,
-Aubrey
======================================================================
From 18e4f4592c2a159fcbae637f3a422e37ad24cb5a Mon Sep 17 00:00:00 2001
From: Aubrey Li <[email protected]>
Date: Wed, 25 Nov 2020 02:43:46 +0000
Subject: [PATCH 14/33] sched: migration changes for core scheduling

- Don't migrate if there is a cookie mismatch
Load balance tries to move task from busiest CPU to the
destination CPU. When core scheduling is enabled, if the
task's cookie does not match with the destination CPU's
core cookie, this task will be skipped by this CPU. This
mitigates the forced idle time on the destination CPU.

- Select cookie matched idle CPU
In the fast path of task wakeup, select the first cookie matched
idle CPU instead of the first idle CPU.

- Find cookie matched idlest CPU
In the slow path of task wakeup, find the idlest CPU whose core
cookie matches with task's cookie

- Don't migrate task if cookie not match
For the NUMA load balance, don't migrate task to the CPU whose
core cookie does not match with task's cookie

Tested-by: Julien Desfossez <[email protected]>
Signed-off-by: Aubrey Li <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
Signed-off-by: Vineeth Remanan Pillai <[email protected]>
Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
kernel/sched/fair.c | 58 ++++++++++++++++++++++++++++++++++++++++----
kernel/sched/sched.h | 33 +++++++++++++++++++++++++
2 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index de82f88ba98c..7eea5da6685a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
continue;

+ /*
+ * Skip this cpu if source task's cookie does not match
+ * with CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
+ continue;
+
env->dst_cpu = cpu;
if (task_numa_compare(env, taskimp, groupimp, maymove))
break;
@@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this

/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+ struct rq *rq = cpu_rq(i);
+
+ if (!sched_core_cookie_match(rq, p))
+ continue;
+
if (sched_idle_cpu(i))
return i;

if (available_idle_cpu(i)) {
- struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
/*
@@ -6129,8 +6140,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
return -1;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
- break;
+
+ if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
+#ifdef CONFIG_SCHED_CORE
+ /*
+ * If Core Scheduling is enabled, select this cpu
+ * only if the process cookie matches core cookie.
+ */
+ if (sched_core_enabled(cpu_rq(cpu)) &&
+ p->core_cookie == cpu_rq(cpu)->core->core_cookie)
+#endif
+ break;
+ }
}

time = cpu_clock(this) - time;
@@ -7530,8 +7551,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* We do not migrate tasks that are:
* 1) throttled_lb_pair, or
* 2) cannot be migrated to this CPU due to cpus_ptr, or
- * 3) running (obviously), or
- * 4) are cache-hot on their current CPU.
+ * 3) task's cookie does not match with this CPU's core cookie
+ * 4) running (obviously), or
+ * 5) are cache-hot on their current CPU.
*/
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;
@@ -7566,6 +7588,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 0;
}

+ /*
+ * Don't migrate task if the task's cookie does not match
+ * with the destination CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+ return 0;
+
/* Record that we found atleast one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;

@@ -8792,6 +8821,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
p->cpus_ptr))
continue;

+#ifdef CONFIG_SCHED_CORE
+ if (sched_core_enabled(cpu_rq(this_cpu))) {
+ int i = 0;
+ bool cookie_match = false;
+
+ for_each_cpu(i, sched_group_span(group)) {
+ struct rq *rq = cpu_rq(i);
+
+ if (sched_core_cookie_match(rq, p)) {
+ cookie_match = true;
+ break;
+ }
+ }
+ /* Skip over this group if no cookie matched */
+ if (!cookie_match)
+ continue;
+ }
+#endif
+
local_group = cpumask_test_cpu(this_cpu,
sched_group_span(group));

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e72942a9ee11..05b93787fe62 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1135,6 +1135,35 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)

bool cfs_prio_less(struct task_struct *a, struct task_struct *b);

+/*
+ * Helper to check if the CPU's core cookie matches with the task's cookie
+ * when core scheduling is enabled.
+ * A special case is that the task's cookie always matches with CPU's core
+ * cookie if the CPU is in an idle core.
+ */
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ bool idle_core = true;
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+ if (!available_idle_cpu(cpu)) {
+ idle_core = false;
+ break;
+ }
+ }
+
+ /*
+ * A CPU in an idle core is always the best choice for tasks with
+ * cookies.
+ */
+ return idle_core || rq->core->core_cookie == p->core_cookie;
+}
+
extern void queue_core_balance(struct rq *rq);

#else /* !CONFIG_SCHED_CORE */
@@ -1153,6 +1182,10 @@ static inline void queue_core_balance(struct rq *rq)
{
}

+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
#endif /* CONFIG_SCHED_CORE */

#ifdef CONFIG_SCHED_SMT
--
2.17.1

2020-11-25 23:47:02

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On Wed, Nov 25, 2020 at 11:12:53AM +0800, Li, Aubrey wrote:
> On 2020/11/24 23:42, Peter Zijlstra wrote:
> > On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
> >>>> +#ifdef CONFIG_SCHED_CORE
> >>>> + /*
> >>>> + * Skip this cpu if source task's cookie does not match
> >>>> + * with CPU's core cookie.
> >>>> + */
> >>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> >>>> + continue;
> >>>> +#endif
> >>>> +
> >>>
> >>> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
> >>> the check for sched_core_enabled() do the right thing even when
> >>> CONFIG_SCHED_CORE is not enabed?>
> >> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
> >> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
> >> sense to leave a core scheduler specific function here even at compile
> >> time. Also, for the cases in hot path, this saves CPU cycles to avoid
> >> a judgment.
> >
> > No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
> > more.
> >
>
> Okay, I pasted the refined patch here.
> @Joel, please let me know if you want me to send it in a separated thread.
>

You still have a bunch of #ifdefs, can't we just do

#ifndef CONFIG_SCHED_CORE
static inline bool sched_core_enabled(struct rq *rq)
{
return false;
}
#endif

and frankly I think even that is not needed because there is a jump
label __sched_core_enabled that tells us if sched_core is enabled or
not.

Balbir

2020-11-26 09:39:46

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On 2020/11/26 6:57, Balbir Singh wrote:
> On Wed, Nov 25, 2020 at 11:12:53AM +0800, Li, Aubrey wrote:
>> On 2020/11/24 23:42, Peter Zijlstra wrote:
>>> On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
>>>>>> +#ifdef CONFIG_SCHED_CORE
>>>>>> + /*
>>>>>> + * Skip this cpu if source task's cookie does not match
>>>>>> + * with CPU's core cookie.
>>>>>> + */
>>>>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>>>>>> + continue;
>>>>>> +#endif
>>>>>> +
>>>>>
>>>>> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
>>>>> the check for sched_core_enabled() do the right thing even when
>>>>> CONFIG_SCHED_CORE is not enabed?>
>>>> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
>>>> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
>>>> sense to leave a core scheduler specific function here even at compile
>>>> time. Also, for the cases in hot path, this saves CPU cycles to avoid
>>>> a judgment.
>>>
>>> No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
>>> more.
>>>
>>
>> Okay, I pasted the refined patch here.
>> @Joel, please let me know if you want me to send it in a separated thread.
>>
>
> You still have a bunch of #ifdefs, can't we just do
>
> #ifndef CONFIG_SCHED_CORE
> static inline bool sched_core_enabled(struct rq *rq)
> {
> return false;
> }
> #endif
>
> and frankly I think even that is not needed because there is a jump
> label __sched_core_enabled that tells us if sched_core is enabled or
> not.

Hmm..., I need another wrapper for CONFIG_SCHED_CORE specific variables.
How about this one?

Thanks,
-Aubrey

From 61dac9067e66b5b9ea26c684c8c8235714bab38a Mon Sep 17 00:00:00 2001
From: Aubrey Li <[email protected]>
Date: Thu, 26 Nov 2020 03:08:04 +0000
Subject: [PATCH] sched: migration changes for core scheduling

- Don't migrate if there is a cookie mismatch
Load balance tries to move task from busiest CPU to the
destination CPU. When core scheduling is enabled, if the
task's cookie does not match with the destination CPU's
core cookie, this task will be skipped by this CPU. This
mitigates the forced idle time on the destination CPU.

- Select cookie matched idle CPU
In the fast path of task wakeup, select the first cookie matched
idle CPU instead of the first idle CPU.

- Find cookie matched idlest CPU
In the slow path of task wakeup, find the idlest CPU whose core
cookie matches with task's cookie

- Don't migrate task if cookie not match
For the NUMA load balance, don't migrate task to the CPU whose
core cookie does not match with task's cookie

Tested-by: Julien Desfossez <[email protected]>
Signed-off-by: Aubrey Li <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
Signed-off-by: Vineeth Remanan Pillai <[email protected]>
Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++++++++++++----
kernel/sched/sched.h | 43 +++++++++++++++++++++++++++++++++
2 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index de82f88ba98c..70dd013dff1d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
continue;

+ /*
+ * Skip this cpu if source task's cookie does not match
+ * with CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
+ continue;
+
env->dst_cpu = cpu;
if (task_numa_compare(env, taskimp, groupimp, maymove))
break;
@@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this

/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+ struct rq *rq = cpu_rq(i);
+
+ if (!sched_core_cookie_match(rq, p))
+ continue;
+
if (sched_idle_cpu(i))
return i;

if (available_idle_cpu(i)) {
- struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
/*
@@ -6129,8 +6140,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
return -1;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
- break;
+
+ if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
+ /*
+ * If Core Scheduling is enabled, select this cpu
+ * only if the process cookie matches core cookie.
+ */
+ if (sched_core_enabled(cpu_rq(cpu))) {
+ if (__cookie_match(cpu_rq(cpu), p))
+ break;
+ } else {
+ break;
+ }
+ }
}

time = cpu_clock(this) - time;
@@ -7530,8 +7552,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* We do not migrate tasks that are:
* 1) throttled_lb_pair, or
* 2) cannot be migrated to this CPU due to cpus_ptr, or
- * 3) running (obviously), or
- * 4) are cache-hot on their current CPU.
+ * 3) task's cookie does not match with this CPU's core cookie
+ * 4) running (obviously), or
+ * 5) are cache-hot on their current CPU.
*/
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;
@@ -7566,6 +7589,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 0;
}

+ /*
+ * Don't migrate task if the task's cookie does not match
+ * with the destination CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+ return 0;
+
/* Record that we found atleast one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;

@@ -8792,6 +8822,23 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
p->cpus_ptr))
continue;

+ if (sched_core_enabled(cpu_rq(this_cpu))) {
+ int i = 0;
+ bool cookie_match = false;
+
+ for_each_cpu(i, sched_group_span(group)) {
+ struct rq *rq = cpu_rq(i);
+
+ if (sched_core_cookie_match(rq, p)) {
+ cookie_match = true;
+ break;
+ }
+ }
+ /* Skip over this group if no cookie matched */
+ if (!cookie_match)
+ continue;
+ }
+
local_group = cpumask_test_cpu(this_cpu,
sched_group_span(group));

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e72942a9ee11..8bb3b72d593c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1135,6 +1135,40 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)

bool cfs_prio_less(struct task_struct *a, struct task_struct *b);

+/*
+ * Helper to check if the CPU's core cookie matches with the task's cookie
+ * when core scheduling is enabled.
+ * A special case is that the task's cookie always matches with CPU's core
+ * cookie if the CPU is in an idle core.
+ */
+static inline bool __cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ bool idle_core = true;
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+ if (!available_idle_cpu(cpu)) {
+ idle_core = false;
+ break;
+ }
+ }
+
+ /*
+ * A CPU in an idle core is always the best choice for tasks with
+ * cookies.
+ */
+ return idle_core || __cookie_match(rq, p);
+}
+
extern void queue_core_balance(struct rq *rq);

#else /* !CONFIG_SCHED_CORE */
@@ -1153,6 +1187,15 @@ static inline void queue_core_balance(struct rq *rq)
{
}

+static inline bool __cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
#endif /* CONFIG_SCHED_CORE */

#ifdef CONFIG_SCHED_SMT
--
2.17.1

2020-11-26 10:57:22

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On Thu, Nov 26, 2020 at 11:20:41AM +0800, Li, Aubrey wrote:
> On 2020/11/26 6:57, Balbir Singh wrote:
> > On Wed, Nov 25, 2020 at 11:12:53AM +0800, Li, Aubrey wrote:
> >> On 2020/11/24 23:42, Peter Zijlstra wrote:
> >>> On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
> >>>>>> +#ifdef CONFIG_SCHED_CORE
> >>>>>> + /*
> >>>>>> + * Skip this cpu if source task's cookie does not match
> >>>>>> + * with CPU's core cookie.
> >>>>>> + */
> >>>>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> >>>>>> + continue;
> >>>>>> +#endif
> >>>>>> +
> >>>>>
> >>>>> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
> >>>>> the check for sched_core_enabled() do the right thing even when
> >>>>> CONFIG_SCHED_CORE is not enabed?>
> >>>> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
> >>>> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
> >>>> sense to leave a core scheduler specific function here even at compile
> >>>> time. Also, for the cases in hot path, this saves CPU cycles to avoid
> >>>> a judgment.
> >>>
> >>> No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
> >>> more.
> >>>
> >>
> >> Okay, I pasted the refined patch here.
> >> @Joel, please let me know if you want me to send it in a separated thread.
> >>
> >
> > You still have a bunch of #ifdefs, can't we just do
> >
> > #ifndef CONFIG_SCHED_CORE
> > static inline bool sched_core_enabled(struct rq *rq)
> > {
> > return false;
> > }
> > #endif
> >
> > and frankly I think even that is not needed because there is a jump
> > label __sched_core_enabled that tells us if sched_core is enabled or
> > not.
>
> Hmm..., I need another wrapper for CONFIG_SCHED_CORE specific variables.
> How about this one?
>

Much better :)

> Thanks,
> -Aubrey
>
> From 61dac9067e66b5b9ea26c684c8c8235714bab38a Mon Sep 17 00:00:00 2001
> From: Aubrey Li <[email protected]>
> Date: Thu, 26 Nov 2020 03:08:04 +0000
> Subject: [PATCH] sched: migration changes for core scheduling
>
> - Don't migrate if there is a cookie mismatch
> Load balance tries to move task from busiest CPU to the
> destination CPU. When core scheduling is enabled, if the
> task's cookie does not match with the destination CPU's
> core cookie, this task will be skipped by this CPU. This
> mitigates the forced idle time on the destination CPU.
>
> - Select cookie matched idle CPU
> In the fast path of task wakeup, select the first cookie matched
> idle CPU instead of the first idle CPU.
>
> - Find cookie matched idlest CPU
> In the slow path of task wakeup, find the idlest CPU whose core
> cookie matches with task's cookie
>
> - Don't migrate task if cookie not match
> For the NUMA load balance, don't migrate task to the CPU whose
> core cookie does not match with task's cookie
>
> Tested-by: Julien Desfossez <[email protected]>
> Signed-off-by: Aubrey Li <[email protected]>
> Signed-off-by: Tim Chen <[email protected]>
> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
> Signed-off-by: Joel Fernandes (Google) <[email protected]>
> ---
> kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++++++++++++----
> kernel/sched/sched.h | 43 +++++++++++++++++++++++++++++++++
> 2 files changed, 95 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index de82f88ba98c..70dd013dff1d 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
> continue;
>
> + /*
> + * Skip this cpu if source task's cookie does not match
> + * with CPU's core cookie.
> + */
> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> + continue;
> +
> env->dst_cpu = cpu;
> if (task_numa_compare(env, taskimp, groupimp, maymove))
> break;
> @@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>
> /* Traverse only the allowed CPUs */
> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
> + struct rq *rq = cpu_rq(i);
> +
> + if (!sched_core_cookie_match(rq, p))
> + continue;
> +
> if (sched_idle_cpu(i))
> return i;
>
> if (available_idle_cpu(i)) {
> - struct rq *rq = cpu_rq(i);
> struct cpuidle_state *idle = idle_get_state(rq);
> if (idle && idle->exit_latency < min_exit_latency) {
> /*
> @@ -6129,8 +6140,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> for_each_cpu_wrap(cpu, cpus, target) {
> if (!--nr)
> return -1;
> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> - break;
> +
> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
> + /*
> + * If Core Scheduling is enabled, select this cpu
> + * only if the process cookie matches core cookie.
> + */
> + if (sched_core_enabled(cpu_rq(cpu))) {
> + if (__cookie_match(cpu_rq(cpu), p))
> + break;
> + } else {
> + break;
> + }
> + }

Isn't this better and equivalent?

if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
sched_core_cookie_match(cpu_rq(cpu), p))
break;

> }
>
> time = cpu_clock(this) - time;
> @@ -7530,8 +7552,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> * We do not migrate tasks that are:
> * 1) throttled_lb_pair, or
> * 2) cannot be migrated to this CPU due to cpus_ptr, or
> - * 3) running (obviously), or
> - * 4) are cache-hot on their current CPU.
> + * 3) task's cookie does not match with this CPU's core cookie
> + * 4) running (obviously), or
> + * 5) are cache-hot on their current CPU.
> */
> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
> return 0;
> @@ -7566,6 +7589,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> return 0;
> }
>
> + /*
> + * Don't migrate task if the task's cookie does not match
> + * with the destination CPU's core cookie.
> + */
> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
> + return 0;
> +
> /* Record that we found atleast one task that could run on dst_cpu */
> env->flags &= ~LBF_ALL_PINNED;
>
> @@ -8792,6 +8822,23 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
> p->cpus_ptr))
> continue;
>
> + if (sched_core_enabled(cpu_rq(this_cpu))) {
> + int i = 0;
> + bool cookie_match = false;
> +
> + for_each_cpu(i, sched_group_span(group)) {
> + struct rq *rq = cpu_rq(i);
> +
> + if (sched_core_cookie_match(rq, p)) {
> + cookie_match = true;
> + break;
> + }
> + }
> + /* Skip over this group if no cookie matched */
> + if (!cookie_match)
> + continue;
> + }
> +

Again, I think this can be refactored because sched_core_cookie_match checks
for sched_core_enabled()

int i = 0;
bool cookie_match = false;
for_each_cpu(i, sched_group_span(group)) {
if (sched_core_cookie_match(cpu_rq(i), p))
break;
}
if (i >= nr_cpu_ids)
continue;

> + }
> + /* Skip over this group if no cookie matched */
> + if (!cookie_match)
> + continue;

> local_group = cpumask_test_cpu(this_cpu,
> sched_group_span(group));
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e72942a9ee11..8bb3b72d593c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1135,6 +1135,40 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
>
> bool cfs_prio_less(struct task_struct *a, struct task_struct *b);
>
> +/*
> + * Helper to check if the CPU's core cookie matches with the task's cookie
> + * when core scheduling is enabled.
> + * A special case is that the task's cookie always matches with CPU's core
> + * cookie if the CPU is in an idle core.
> + */
> +static inline bool __cookie_match(struct rq *rq, struct task_struct *p)
> +{
> + return rq->core->core_cookie == p->core_cookie;
> +}
> +
> +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
> +{
> + bool idle_core = true;
> + int cpu;
> +
> + /* Ignore cookie match if core scheduler is not enabled on the CPU. */
> + if (!sched_core_enabled(rq))
> + return true;
> +
> + for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
> + if (!available_idle_cpu(cpu)) {
> + idle_core = false;
> + break;
> + }
> + }
> +
> + /*
> + * A CPU in an idle core is always the best choice for tasks with
> + * cookies.
> + */
> + return idle_core || __cookie_match(rq, p);
> +}
> +
> extern void queue_core_balance(struct rq *rq);
>
> #else /* !CONFIG_SCHED_CORE */
> @@ -1153,6 +1187,15 @@ static inline void queue_core_balance(struct rq *rq)
> {
> }
>
> +static inline bool __cookie_match(struct rq *rq, struct task_struct *p)
> +{
> + return true;
> +}
> +
> +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
> +{
> + return true;
> +}
> #endif /* CONFIG_SCHED_CORE */
>
> #ifdef CONFIG_SCHED_SMT
> --
> 2.17.1
>

Balbir Singh.

2020-11-27 06:22:46

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On 2020/11/26 16:32, Balbir Singh wrote:
> On Thu, Nov 26, 2020 at 11:20:41AM +0800, Li, Aubrey wrote:
>> On 2020/11/26 6:57, Balbir Singh wrote:
>>> On Wed, Nov 25, 2020 at 11:12:53AM +0800, Li, Aubrey wrote:
>>>> On 2020/11/24 23:42, Peter Zijlstra wrote:
>>>>> On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
>>>>>>>> +#ifdef CONFIG_SCHED_CORE
>>>>>>>> + /*
>>>>>>>> + * Skip this cpu if source task's cookie does not match
>>>>>>>> + * with CPU's core cookie.
>>>>>>>> + */
>>>>>>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>>>>>>>> + continue;
>>>>>>>> +#endif
>>>>>>>> +
>>>>>>>
>>>>>>> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
>>>>>>> the check for sched_core_enabled() do the right thing even when
>>>>>>> CONFIG_SCHED_CORE is not enabed?>
>>>>>> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
>>>>>> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
>>>>>> sense to leave a core scheduler specific function here even at compile
>>>>>> time. Also, for the cases in hot path, this saves CPU cycles to avoid
>>>>>> a judgment.
>>>>>
>>>>> No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
>>>>> more.
>>>>>
>>>>
>>>> Okay, I pasted the refined patch here.
>>>> @Joel, please let me know if you want me to send it in a separated thread.
>>>>
>>>
>>> You still have a bunch of #ifdefs, can't we just do
>>>
>>> #ifndef CONFIG_SCHED_CORE
>>> static inline bool sched_core_enabled(struct rq *rq)
>>> {
>>> return false;
>>> }
>>> #endif
>>>
>>> and frankly I think even that is not needed because there is a jump
>>> label __sched_core_enabled that tells us if sched_core is enabled or
>>> not.
>>
>> Hmm..., I need another wrapper for CONFIG_SCHED_CORE specific variables.
>> How about this one?
>>
>
> Much better :)
>
>> Thanks,
>> -Aubrey
>>
>> From 61dac9067e66b5b9ea26c684c8c8235714bab38a Mon Sep 17 00:00:00 2001
>> From: Aubrey Li <[email protected]>
>> Date: Thu, 26 Nov 2020 03:08:04 +0000
>> Subject: [PATCH] sched: migration changes for core scheduling
>>
>> - Don't migrate if there is a cookie mismatch
>> Load balance tries to move task from busiest CPU to the
>> destination CPU. When core scheduling is enabled, if the
>> task's cookie does not match with the destination CPU's
>> core cookie, this task will be skipped by this CPU. This
>> mitigates the forced idle time on the destination CPU.
>>
>> - Select cookie matched idle CPU
>> In the fast path of task wakeup, select the first cookie matched
>> idle CPU instead of the first idle CPU.
>>
>> - Find cookie matched idlest CPU
>> In the slow path of task wakeup, find the idlest CPU whose core
>> cookie matches with task's cookie
>>
>> - Don't migrate task if cookie not match
>> For the NUMA load balance, don't migrate task to the CPU whose
>> core cookie does not match with task's cookie
>>
>> Tested-by: Julien Desfossez <[email protected]>
>> Signed-off-by: Aubrey Li <[email protected]>
>> Signed-off-by: Tim Chen <[email protected]>
>> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
>> Signed-off-by: Joel Fernandes (Google) <[email protected]>
>> ---
>> kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++++++++++++----
>> kernel/sched/sched.h | 43 +++++++++++++++++++++++++++++++++
>> 2 files changed, 95 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index de82f88ba98c..70dd013dff1d 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
>> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
>> continue;
>>
>> + /*
>> + * Skip this cpu if source task's cookie does not match
>> + * with CPU's core cookie.
>> + */
>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>> + continue;
>> +
>> env->dst_cpu = cpu;
>> if (task_numa_compare(env, taskimp, groupimp, maymove))
>> break;
>> @@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>>
>> /* Traverse only the allowed CPUs */
>> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
>> + struct rq *rq = cpu_rq(i);
>> +
>> + if (!sched_core_cookie_match(rq, p))
>> + continue;
>> +
>> if (sched_idle_cpu(i))
>> return i;
>>
>> if (available_idle_cpu(i)) {
>> - struct rq *rq = cpu_rq(i);
>> struct cpuidle_state *idle = idle_get_state(rq);
>> if (idle && idle->exit_latency < min_exit_latency) {
>> /*
>> @@ -6129,8 +6140,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>> for_each_cpu_wrap(cpu, cpus, target) {
>> if (!--nr)
>> return -1;
>> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
>> - break;
>> +
>> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
>> + /*
>> + * If Core Scheduling is enabled, select this cpu
>> + * only if the process cookie matches core cookie.
>> + */
>> + if (sched_core_enabled(cpu_rq(cpu))) {
>> + if (__cookie_match(cpu_rq(cpu), p))
>> + break;
>> + } else {
>> + break;
>> + }
>> + }
>
> Isn't this better and equivalent?
>
> if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
> sched_core_cookie_match(cpu_rq(cpu), p))
> break;
>


That's my previous implementation in the earlier version.
But since here is the hot code path, we want to remove the idle
core check in sched_core_cookie_match.

>> }
>>
>> time = cpu_clock(this) - time;
>> @@ -7530,8 +7552,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>> * We do not migrate tasks that are:
>> * 1) throttled_lb_pair, or
>> * 2) cannot be migrated to this CPU due to cpus_ptr, or
>> - * 3) running (obviously), or
>> - * 4) are cache-hot on their current CPU.
>> + * 3) task's cookie does not match with this CPU's core cookie
>> + * 4) running (obviously), or
>> + * 5) are cache-hot on their current CPU.
>> */
>> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
>> return 0;
>> @@ -7566,6 +7589,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>> return 0;
>> }
>>
>> + /*
>> + * Don't migrate task if the task's cookie does not match
>> + * with the destination CPU's core cookie.
>> + */
>> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
>> + return 0;
>> +
>> /* Record that we found atleast one task that could run on dst_cpu */
>> env->flags &= ~LBF_ALL_PINNED;
>>
>> @@ -8792,6 +8822,23 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>> p->cpus_ptr))
>> continue;
>>
>> + if (sched_core_enabled(cpu_rq(this_cpu))) {
>> + int i = 0;
>> + bool cookie_match = false;
>> +
>> + for_each_cpu(i, sched_group_span(group)) {
>> + struct rq *rq = cpu_rq(i);
>> +
>> + if (sched_core_cookie_match(rq, p)) {
>> + cookie_match = true;
>> + break;
>> + }
>> + }
>> + /* Skip over this group if no cookie matched */
>> + if (!cookie_match)
>> + continue;
>> + }
>> +
>
> Again, I think this can be refactored because sched_core_cookie_match checks
> for sched_core_enabled()
>
> int i = 0;
> bool cookie_match = false;
> for_each_cpu(i, sched_group_span(group)) {
> if (sched_core_cookie_match(cpu_rq(i), p))
> break;
> }
> if (i >= nr_cpu_ids)
> continue;

There is a loop here when CONFIG_SCHED_CORE=n, which is unwanted I guess.

Thanks,
-Aubrey

2020-11-30 09:36:03

by Balbir Singh

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On Thu, Nov 26, 2020 at 05:26:31PM +0800, Li, Aubrey wrote:
> On 2020/11/26 16:32, Balbir Singh wrote:
> > On Thu, Nov 26, 2020 at 11:20:41AM +0800, Li, Aubrey wrote:
> >> On 2020/11/26 6:57, Balbir Singh wrote:
> >>> On Wed, Nov 25, 2020 at 11:12:53AM +0800, Li, Aubrey wrote:
> >>>> On 2020/11/24 23:42, Peter Zijlstra wrote:
> >>>>> On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
> >>>>>>>> +#ifdef CONFIG_SCHED_CORE
> >>>>>>>> + /*
> >>>>>>>> + * Skip this cpu if source task's cookie does not match
> >>>>>>>> + * with CPU's core cookie.
> >>>>>>>> + */
> >>>>>>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> >>>>>>>> + continue;
> >>>>>>>> +#endif
> >>>>>>>> +
> >>>>>>>
> >>>>>>> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
> >>>>>>> the check for sched_core_enabled() do the right thing even when
> >>>>>>> CONFIG_SCHED_CORE is not enabed?>
> >>>>>> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
> >>>>>> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
> >>>>>> sense to leave a core scheduler specific function here even at compile
> >>>>>> time. Also, for the cases in hot path, this saves CPU cycles to avoid
> >>>>>> a judgment.
> >>>>>
> >>>>> No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
> >>>>> more.
> >>>>>
> >>>>
> >>>> Okay, I pasted the refined patch here.
> >>>> @Joel, please let me know if you want me to send it in a separated thread.
> >>>>
> >>>
> >>> You still have a bunch of #ifdefs, can't we just do
> >>>
> >>> #ifndef CONFIG_SCHED_CORE
> >>> static inline bool sched_core_enabled(struct rq *rq)
> >>> {
> >>> return false;
> >>> }
> >>> #endif
> >>>
> >>> and frankly I think even that is not needed because there is a jump
> >>> label __sched_core_enabled that tells us if sched_core is enabled or
> >>> not.
> >>
> >> Hmm..., I need another wrapper for CONFIG_SCHED_CORE specific variables.
> >> How about this one?
> >>
> >
> > Much better :)
> >
> >> Thanks,
> >> -Aubrey
> >>
> >> From 61dac9067e66b5b9ea26c684c8c8235714bab38a Mon Sep 17 00:00:00 2001
> >> From: Aubrey Li <[email protected]>
> >> Date: Thu, 26 Nov 2020 03:08:04 +0000
> >> Subject: [PATCH] sched: migration changes for core scheduling
> >>
> >> - Don't migrate if there is a cookie mismatch
> >> Load balance tries to move task from busiest CPU to the
> >> destination CPU. When core scheduling is enabled, if the
> >> task's cookie does not match with the destination CPU's
> >> core cookie, this task will be skipped by this CPU. This
> >> mitigates the forced idle time on the destination CPU.
> >>
> >> - Select cookie matched idle CPU
> >> In the fast path of task wakeup, select the first cookie matched
> >> idle CPU instead of the first idle CPU.
> >>
> >> - Find cookie matched idlest CPU
> >> In the slow path of task wakeup, find the idlest CPU whose core
> >> cookie matches with task's cookie
> >>
> >> - Don't migrate task if cookie not match
> >> For the NUMA load balance, don't migrate task to the CPU whose
> >> core cookie does not match with task's cookie
> >>
> >> Tested-by: Julien Desfossez <[email protected]>
> >> Signed-off-by: Aubrey Li <[email protected]>
> >> Signed-off-by: Tim Chen <[email protected]>
> >> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
> >> Signed-off-by: Joel Fernandes (Google) <[email protected]>
> >> ---
> >> kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++++++++++++----
> >> kernel/sched/sched.h | 43 +++++++++++++++++++++++++++++++++
> >> 2 files changed, 95 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> >> index de82f88ba98c..70dd013dff1d 100644
> >> --- a/kernel/sched/fair.c
> >> +++ b/kernel/sched/fair.c
> >> @@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
> >> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
> >> continue;
> >>
> >> + /*
> >> + * Skip this cpu if source task's cookie does not match
> >> + * with CPU's core cookie.
> >> + */
> >> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> >> + continue;
> >> +
> >> env->dst_cpu = cpu;
> >> if (task_numa_compare(env, taskimp, groupimp, maymove))
> >> break;
> >> @@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
> >>
> >> /* Traverse only the allowed CPUs */
> >> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
> >> + struct rq *rq = cpu_rq(i);
> >> +
> >> + if (!sched_core_cookie_match(rq, p))
> >> + continue;
> >> +
> >> if (sched_idle_cpu(i))
> >> return i;
> >>
> >> if (available_idle_cpu(i)) {
> >> - struct rq *rq = cpu_rq(i);
> >> struct cpuidle_state *idle = idle_get_state(rq);
> >> if (idle && idle->exit_latency < min_exit_latency) {
> >> /*
> >> @@ -6129,8 +6140,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> >> for_each_cpu_wrap(cpu, cpus, target) {
> >> if (!--nr)
> >> return -1;
> >> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> >> - break;
> >> +
> >> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
> >> + /*
> >> + * If Core Scheduling is enabled, select this cpu
> >> + * only if the process cookie matches core cookie.
> >> + */
> >> + if (sched_core_enabled(cpu_rq(cpu))) {
> >> + if (__cookie_match(cpu_rq(cpu), p))
> >> + break;
> >> + } else {
> >> + break;
> >> + }
> >> + }
> >
> > Isn't this better and equivalent?
> >
> > if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
> > sched_core_cookie_match(cpu_rq(cpu), p))
> > break;
> >
>
>
> That's my previous implementation in the earlier version.
> But since here is the hot code path, we want to remove the idle
> core check in sched_core_cookie_match.

I see, so we basically need a jump label, if sched_core_cookie_match
can be inlined with a check for sched_core_enabled() upfront, it might
solve a lot of the concern, readability of this section of code is not
the best.

>
> >> }
> >>
> >> time = cpu_clock(this) - time;
> >> @@ -7530,8 +7552,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> >> * We do not migrate tasks that are:
> >> * 1) throttled_lb_pair, or
> >> * 2) cannot be migrated to this CPU due to cpus_ptr, or
> >> - * 3) running (obviously), or
> >> - * 4) are cache-hot on their current CPU.
> >> + * 3) task's cookie does not match with this CPU's core cookie
> >> + * 4) running (obviously), or
> >> + * 5) are cache-hot on their current CPU.
> >> */
> >> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
> >> return 0;
> >> @@ -7566,6 +7589,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> >> return 0;
> >> }
> >>
> >> + /*
> >> + * Don't migrate task if the task's cookie does not match
> >> + * with the destination CPU's core cookie.
> >> + */
> >> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
> >> + return 0;
> >> +
> >> /* Record that we found atleast one task that could run on dst_cpu */
> >> env->flags &= ~LBF_ALL_PINNED;
> >>
> >> @@ -8792,6 +8822,23 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
> >> p->cpus_ptr))
> >> continue;
> >>
> >> + if (sched_core_enabled(cpu_rq(this_cpu))) {
> >> + int i = 0;
> >> + bool cookie_match = false;
> >> +
> >> + for_each_cpu(i, sched_group_span(group)) {
> >> + struct rq *rq = cpu_rq(i);
> >> +
> >> + if (sched_core_cookie_match(rq, p)) {
> >> + cookie_match = true;
> >> + break;
> >> + }
> >> + }
> >> + /* Skip over this group if no cookie matched */
> >> + if (!cookie_match)
> >> + continue;
> >> + }
> >> +
> >
> > Again, I think this can be refactored because sched_core_cookie_match checks
> > for sched_core_enabled()
> >
> > int i = 0;
> > bool cookie_match = false;
> > for_each_cpu(i, sched_group_span(group)) {
> > if (sched_core_cookie_match(cpu_rq(i), p))
> > break;
> > }
> > if (i >= nr_cpu_ids)
> > continue;
>
> There is a loop here when CONFIG_SCHED_CORE=n, which is unwanted I guess.
>

Yes, potentially, may be abstract the for_each_cpu into a function and then
optimize out the case for SCHED_CORE=n, I feel all the extra checks in the
various places make the code hard to read.

Balbir

2020-11-30 10:38:42

by Vincent Guittot

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On Wed, 18 Nov 2020 at 00:20, Joel Fernandes (Google)
<[email protected]> wrote:
>
> From: Aubrey Li <[email protected]>
>
> - Don't migrate if there is a cookie mismatch
> Load balance tries to move task from busiest CPU to the
> destination CPU. When core scheduling is enabled, if the
> task's cookie does not match with the destination CPU's
> core cookie, this task will be skipped by this CPU. This
> mitigates the forced idle time on the destination CPU.
>
> - Select cookie matched idle CPU
> In the fast path of task wakeup, select the first cookie matched
> idle CPU instead of the first idle CPU.
>
> - Find cookie matched idlest CPU
> In the slow path of task wakeup, find the idlest CPU whose core
> cookie matches with task's cookie
>
> - Don't migrate task if cookie not match
> For the NUMA load balance, don't migrate task to the CPU whose
> core cookie does not match with task's cookie
>
> Tested-by: Julien Desfossez <[email protected]>
> Signed-off-by: Aubrey Li <[email protected]>
> Signed-off-by: Tim Chen <[email protected]>
> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
> Signed-off-by: Joel Fernandes (Google) <[email protected]>
> ---
> kernel/sched/fair.c | 64 ++++++++++++++++++++++++++++++++++++++++----
> kernel/sched/sched.h | 29 ++++++++++++++++++++
> 2 files changed, 88 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index de82f88ba98c..ceb3906c9a8a 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1921,6 +1921,15 @@ static void task_numa_find_cpu(struct task_numa_env *env,
> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
> continue;
>
> +#ifdef CONFIG_SCHED_CORE
> + /*
> + * Skip this cpu if source task's cookie does not match
> + * with CPU's core cookie.
> + */
> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
> + continue;
> +#endif
> +
> env->dst_cpu = cpu;
> if (task_numa_compare(env, taskimp, groupimp, maymove))
> break;
> @@ -5867,11 +5876,17 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>
> /* Traverse only the allowed CPUs */
> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
> + struct rq *rq = cpu_rq(i);
> +
> +#ifdef CONFIG_SCHED_CORE
> + if (!sched_core_cookie_match(rq, p))
> + continue;
> +#endif
> +
> if (sched_idle_cpu(i))
> return i;
>
> if (available_idle_cpu(i)) {
> - struct rq *rq = cpu_rq(i);
> struct cpuidle_state *idle = idle_get_state(rq);
> if (idle && idle->exit_latency < min_exit_latency) {
> /*
> @@ -6129,8 +6144,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> for_each_cpu_wrap(cpu, cpus, target) {
> if (!--nr)
> return -1;
> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> - break;
> +
> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
> +#ifdef CONFIG_SCHED_CORE
> + /*
> + * If Core Scheduling is enabled, select this cpu
> + * only if the process cookie matches core cookie.
> + */
> + if (sched_core_enabled(cpu_rq(cpu)) &&
> + p->core_cookie == cpu_rq(cpu)->core->core_cookie)
> +#endif
> + break;
> + }

This makes code unreadable.
Put this coresched specific stuff in an inline function; You can have
a look at what is done with asym_fits_capacity()

> }
>
> time = cpu_clock(this) - time;
> @@ -7530,8 +7555,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> * We do not migrate tasks that are:
> * 1) throttled_lb_pair, or
> * 2) cannot be migrated to this CPU due to cpus_ptr, or
> - * 3) running (obviously), or
> - * 4) are cache-hot on their current CPU.
> + * 3) task's cookie does not match with this CPU's core cookie
> + * 4) running (obviously), or
> + * 5) are cache-hot on their current CPU.
> */
> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
> return 0;
> @@ -7566,6 +7592,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> return 0;
> }
>
> +#ifdef CONFIG_SCHED_CORE
> + /*
> + * Don't migrate task if the task's cookie does not match
> + * with the destination CPU's core cookie.
> + */
> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
> + return 0;
> +#endif
> +
> /* Record that we found atleast one task that could run on dst_cpu */
> env->flags &= ~LBF_ALL_PINNED;
>
> @@ -8792,6 +8827,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
> p->cpus_ptr))
> continue;
>
> +#ifdef CONFIG_SCHED_CORE
> + if (sched_core_enabled(cpu_rq(this_cpu))) {
> + int i = 0;
> + bool cookie_match = false;
> +
> + for_each_cpu(i, sched_group_span(group)) {
> + struct rq *rq = cpu_rq(i);
> +
> + if (sched_core_cookie_match(rq, p)) {
> + cookie_match = true;
> + break;
> + }
> + }
> + /* Skip over this group if no cookie matched */
> + if (!cookie_match)
> + continue;
> + }
> +#endif

same here, encapsulate this to keep find_idlest_group readable

> +
> local_group = cpumask_test_cpu(this_cpu,
> sched_group_span(group));
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e72942a9ee11..de553d39aa40 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1135,6 +1135,35 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
>
> bool cfs_prio_less(struct task_struct *a, struct task_struct *b);
>
> +/*
> + * Helper to check if the CPU's core cookie matches with the task's cookie
> + * when core scheduling is enabled.
> + * A special case is that the task's cookie always matches with CPU's core
> + * cookie if the CPU is in an idle core.
> + */
> +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
> +{
> + bool idle_core = true;
> + int cpu;
> +
> + /* Ignore cookie match if core scheduler is not enabled on the CPU. */
> + if (!sched_core_enabled(rq))
> + return true;
> +
> + for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
> + if (!available_idle_cpu(cpu)) {
> + idle_core = false;
> + break;
> + }
> + }
> +
> + /*
> + * A CPU in an idle core is always the best choice for tasks with
> + * cookies.
> + */
> + return idle_core || rq->core->core_cookie == p->core_cookie;
> +}
> +
> extern void queue_core_balance(struct rq *rq);
>
> #else /* !CONFIG_SCHED_CORE */
> --
> 2.29.2.299.gdc1121823c-goog
>

2020-11-30 12:34:55

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On 2020/11/30 17:33, Balbir Singh wrote:
> On Thu, Nov 26, 2020 at 05:26:31PM +0800, Li, Aubrey wrote:
>> On 2020/11/26 16:32, Balbir Singh wrote:
>>> On Thu, Nov 26, 2020 at 11:20:41AM +0800, Li, Aubrey wrote:
>>>> On 2020/11/26 6:57, Balbir Singh wrote:
>>>>> On Wed, Nov 25, 2020 at 11:12:53AM +0800, Li, Aubrey wrote:
>>>>>> On 2020/11/24 23:42, Peter Zijlstra wrote:
>>>>>>> On Mon, Nov 23, 2020 at 12:36:10PM +0800, Li, Aubrey wrote:
>>>>>>>>>> +#ifdef CONFIG_SCHED_CORE
>>>>>>>>>> + /*
>>>>>>>>>> + * Skip this cpu if source task's cookie does not match
>>>>>>>>>> + * with CPU's core cookie.
>>>>>>>>>> + */
>>>>>>>>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>>>>>>>>>> + continue;
>>>>>>>>>> +#endif
>>>>>>>>>> +
>>>>>>>>>
>>>>>>>>> Any reason this is under an #ifdef? In sched_core_cookie_match() won't
>>>>>>>>> the check for sched_core_enabled() do the right thing even when
>>>>>>>>> CONFIG_SCHED_CORE is not enabed?>
>>>>>>>> Yes, sched_core_enabled works properly when CONFIG_SCHED_CORE is not
>>>>>>>> enabled. But when CONFIG_SCHED_CORE is not enabled, it does not make
>>>>>>>> sense to leave a core scheduler specific function here even at compile
>>>>>>>> time. Also, for the cases in hot path, this saves CPU cycles to avoid
>>>>>>>> a judgment.
>>>>>>>
>>>>>>> No, that's nonsense. If it works, remove the #ifdef. Less (#ifdef) is
>>>>>>> more.
>>>>>>>
>>>>>>
>>>>>> Okay, I pasted the refined patch here.
>>>>>> @Joel, please let me know if you want me to send it in a separated thread.
>>>>>>
>>>>>
>>>>> You still have a bunch of #ifdefs, can't we just do
>>>>>
>>>>> #ifndef CONFIG_SCHED_CORE
>>>>> static inline bool sched_core_enabled(struct rq *rq)
>>>>> {
>>>>> return false;
>>>>> }
>>>>> #endif
>>>>>
>>>>> and frankly I think even that is not needed because there is a jump
>>>>> label __sched_core_enabled that tells us if sched_core is enabled or
>>>>> not.
>>>>
>>>> Hmm..., I need another wrapper for CONFIG_SCHED_CORE specific variables.
>>>> How about this one?
>>>>
>>>
>>> Much better :)
>>>
>>>> Thanks,
>>>> -Aubrey
>>>>
>>>> From 61dac9067e66b5b9ea26c684c8c8235714bab38a Mon Sep 17 00:00:00 2001
>>>> From: Aubrey Li <[email protected]>
>>>> Date: Thu, 26 Nov 2020 03:08:04 +0000
>>>> Subject: [PATCH] sched: migration changes for core scheduling
>>>>
>>>> - Don't migrate if there is a cookie mismatch
>>>> Load balance tries to move task from busiest CPU to the
>>>> destination CPU. When core scheduling is enabled, if the
>>>> task's cookie does not match with the destination CPU's
>>>> core cookie, this task will be skipped by this CPU. This
>>>> mitigates the forced idle time on the destination CPU.
>>>>
>>>> - Select cookie matched idle CPU
>>>> In the fast path of task wakeup, select the first cookie matched
>>>> idle CPU instead of the first idle CPU.
>>>>
>>>> - Find cookie matched idlest CPU
>>>> In the slow path of task wakeup, find the idlest CPU whose core
>>>> cookie matches with task's cookie
>>>>
>>>> - Don't migrate task if cookie not match
>>>> For the NUMA load balance, don't migrate task to the CPU whose
>>>> core cookie does not match with task's cookie
>>>>
>>>> Tested-by: Julien Desfossez <[email protected]>
>>>> Signed-off-by: Aubrey Li <[email protected]>
>>>> Signed-off-by: Tim Chen <[email protected]>
>>>> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
>>>> Signed-off-by: Joel Fernandes (Google) <[email protected]>
>>>> ---
>>>> kernel/sched/fair.c | 57 ++++++++++++++++++++++++++++++++++++++++----
>>>> kernel/sched/sched.h | 43 +++++++++++++++++++++++++++++++++
>>>> 2 files changed, 95 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>>> index de82f88ba98c..70dd013dff1d 100644
>>>> --- a/kernel/sched/fair.c
>>>> +++ b/kernel/sched/fair.c
>>>> @@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
>>>> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
>>>> continue;
>>>>
>>>> + /*
>>>> + * Skip this cpu if source task's cookie does not match
>>>> + * with CPU's core cookie.
>>>> + */
>>>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>>>> + continue;
>>>> +
>>>> env->dst_cpu = cpu;
>>>> if (task_numa_compare(env, taskimp, groupimp, maymove))
>>>> break;
>>>> @@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>>>>
>>>> /* Traverse only the allowed CPUs */
>>>> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
>>>> + struct rq *rq = cpu_rq(i);
>>>> +
>>>> + if (!sched_core_cookie_match(rq, p))
>>>> + continue;
>>>> +
>>>> if (sched_idle_cpu(i))
>>>> return i;
>>>>
>>>> if (available_idle_cpu(i)) {
>>>> - struct rq *rq = cpu_rq(i);
>>>> struct cpuidle_state *idle = idle_get_state(rq);
>>>> if (idle && idle->exit_latency < min_exit_latency) {
>>>> /*
>>>> @@ -6129,8 +6140,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>>>> for_each_cpu_wrap(cpu, cpus, target) {
>>>> if (!--nr)
>>>> return -1;
>>>> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
>>>> - break;
>>>> +
>>>> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
>>>> + /*
>>>> + * If Core Scheduling is enabled, select this cpu
>>>> + * only if the process cookie matches core cookie.
>>>> + */
>>>> + if (sched_core_enabled(cpu_rq(cpu))) {
>>>> + if (__cookie_match(cpu_rq(cpu), p))
>>>> + break;
>>>> + } else {
>>>> + break;
>>>> + }
>>>> + }
>>>
>>> Isn't this better and equivalent?
>>>
>>> if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
>>> sched_core_cookie_match(cpu_rq(cpu), p))
>>> break;
>>>
>>
>>
>> That's my previous implementation in the earlier version.
>> But since here is the hot code path, we want to remove the idle
>> core check in sched_core_cookie_match.
>
> I see, so we basically need a jump label, if sched_core_cookie_match
> can be inlined with a check for sched_core_enabled() upfront, it might
> solve a lot of the concern, readability of this section of code is not
> the best.
>
>>
>>>> }
>>>>
>>>> time = cpu_clock(this) - time;
>>>> @@ -7530,8 +7552,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>>>> * We do not migrate tasks that are:
>>>> * 1) throttled_lb_pair, or
>>>> * 2) cannot be migrated to this CPU due to cpus_ptr, or
>>>> - * 3) running (obviously), or
>>>> - * 4) are cache-hot on their current CPU.
>>>> + * 3) task's cookie does not match with this CPU's core cookie
>>>> + * 4) running (obviously), or
>>>> + * 5) are cache-hot on their current CPU.
>>>> */
>>>> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
>>>> return 0;
>>>> @@ -7566,6 +7589,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>>>> return 0;
>>>> }
>>>>
>>>> + /*
>>>> + * Don't migrate task if the task's cookie does not match
>>>> + * with the destination CPU's core cookie.
>>>> + */
>>>> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
>>>> + return 0;
>>>> +
>>>> /* Record that we found atleast one task that could run on dst_cpu */
>>>> env->flags &= ~LBF_ALL_PINNED;
>>>>
>>>> @@ -8792,6 +8822,23 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>>>> p->cpus_ptr))
>>>> continue;
>>>>
>>>> + if (sched_core_enabled(cpu_rq(this_cpu))) {
>>>> + int i = 0;
>>>> + bool cookie_match = false;
>>>> +
>>>> + for_each_cpu(i, sched_group_span(group)) {
>>>> + struct rq *rq = cpu_rq(i);
>>>> +
>>>> + if (sched_core_cookie_match(rq, p)) {
>>>> + cookie_match = true;
>>>> + break;
>>>> + }
>>>> + }
>>>> + /* Skip over this group if no cookie matched */
>>>> + if (!cookie_match)
>>>> + continue;
>>>> + }
>>>> +
>>>
>>> Again, I think this can be refactored because sched_core_cookie_match checks
>>> for sched_core_enabled()
>>>
>>> int i = 0;
>>> bool cookie_match = false;
>>> for_each_cpu(i, sched_group_span(group)) {
>>> if (sched_core_cookie_match(cpu_rq(i), p))
>>> break;
>>> }
>>> if (i >= nr_cpu_ids)
>>> continue;
>>
>> There is a loop here when CONFIG_SCHED_CORE=n, which is unwanted I guess.
>>
>
> Yes, potentially, may be abstract the for_each_cpu into a function and then
> optimize out the case for SCHED_CORE=n, I feel all the extra checks in the
> various places make the code hard to read.

Okay, I see your point, let me try if I can make it better.

Thanks,
-Aubrey

2020-11-30 12:38:42

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On 2020/11/30 18:35, Vincent Guittot wrote:
> On Wed, 18 Nov 2020 at 00:20, Joel Fernandes (Google)
> <[email protected]> wrote:
>>
>> From: Aubrey Li <[email protected]>
>>
>> - Don't migrate if there is a cookie mismatch
>> Load balance tries to move task from busiest CPU to the
>> destination CPU. When core scheduling is enabled, if the
>> task's cookie does not match with the destination CPU's
>> core cookie, this task will be skipped by this CPU. This
>> mitigates the forced idle time on the destination CPU.
>>
>> - Select cookie matched idle CPU
>> In the fast path of task wakeup, select the first cookie matched
>> idle CPU instead of the first idle CPU.
>>
>> - Find cookie matched idlest CPU
>> In the slow path of task wakeup, find the idlest CPU whose core
>> cookie matches with task's cookie
>>
>> - Don't migrate task if cookie not match
>> For the NUMA load balance, don't migrate task to the CPU whose
>> core cookie does not match with task's cookie
>>
>> Tested-by: Julien Desfossez <[email protected]>
>> Signed-off-by: Aubrey Li <[email protected]>
>> Signed-off-by: Tim Chen <[email protected]>
>> Signed-off-by: Vineeth Remanan Pillai <[email protected]>
>> Signed-off-by: Joel Fernandes (Google) <[email protected]>
>> ---
>> kernel/sched/fair.c | 64 ++++++++++++++++++++++++++++++++++++++++----
>> kernel/sched/sched.h | 29 ++++++++++++++++++++
>> 2 files changed, 88 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index de82f88ba98c..ceb3906c9a8a 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -1921,6 +1921,15 @@ static void task_numa_find_cpu(struct task_numa_env *env,
>> if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
>> continue;
>>
>> +#ifdef CONFIG_SCHED_CORE
>> + /*
>> + * Skip this cpu if source task's cookie does not match
>> + * with CPU's core cookie.
>> + */
>> + if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
>> + continue;
>> +#endif
>> +
>> env->dst_cpu = cpu;
>> if (task_numa_compare(env, taskimp, groupimp, maymove))
>> break;
>> @@ -5867,11 +5876,17 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
>>
>> /* Traverse only the allowed CPUs */
>> for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
>> + struct rq *rq = cpu_rq(i);
>> +
>> +#ifdef CONFIG_SCHED_CORE
>> + if (!sched_core_cookie_match(rq, p))
>> + continue;
>> +#endif
>> +
>> if (sched_idle_cpu(i))
>> return i;
>>
>> if (available_idle_cpu(i)) {
>> - struct rq *rq = cpu_rq(i);
>> struct cpuidle_state *idle = idle_get_state(rq);
>> if (idle && idle->exit_latency < min_exit_latency) {
>> /*
>> @@ -6129,8 +6144,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>> for_each_cpu_wrap(cpu, cpus, target) {
>> if (!--nr)
>> return -1;
>> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
>> - break;
>> +
>> + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) {
>> +#ifdef CONFIG_SCHED_CORE
>> + /*
>> + * If Core Scheduling is enabled, select this cpu
>> + * only if the process cookie matches core cookie.
>> + */
>> + if (sched_core_enabled(cpu_rq(cpu)) &&
>> + p->core_cookie == cpu_rq(cpu)->core->core_cookie)
>> +#endif
>> + break;
>> + }
>
> This makes code unreadable.
> Put this coresched specific stuff in an inline function; You can have
> a look at what is done with asym_fits_capacity()
>
This is done in a refined version. Sorry I pasted the version embedded in this thread,
this is not the latest version.

>> }
>>
>> time = cpu_clock(this) - time;
>> @@ -7530,8 +7555,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>> * We do not migrate tasks that are:
>> * 1) throttled_lb_pair, or
>> * 2) cannot be migrated to this CPU due to cpus_ptr, or
>> - * 3) running (obviously), or
>> - * 4) are cache-hot on their current CPU.
>> + * 3) task's cookie does not match with this CPU's core cookie
>> + * 4) running (obviously), or
>> + * 5) are cache-hot on their current CPU.
>> */
>> if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
>> return 0;
>> @@ -7566,6 +7592,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>> return 0;
>> }
>>
>> +#ifdef CONFIG_SCHED_CORE
>> + /*
>> + * Don't migrate task if the task's cookie does not match
>> + * with the destination CPU's core cookie.
>> + */
>> + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
>> + return 0;
>> +#endif
>> +
>> /* Record that we found atleast one task that could run on dst_cpu */
>> env->flags &= ~LBF_ALL_PINNED;
>>
>> @@ -8792,6 +8827,25 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
>> p->cpus_ptr))
>> continue;
>>
>> +#ifdef CONFIG_SCHED_CORE
>> + if (sched_core_enabled(cpu_rq(this_cpu))) {
>> + int i = 0;
>> + bool cookie_match = false;
>> +
>> + for_each_cpu(i, sched_group_span(group)) {
>> + struct rq *rq = cpu_rq(i);
>> +
>> + if (sched_core_cookie_match(rq, p)) {
>> + cookie_match = true;
>> + break;
>> + }
>> + }
>> + /* Skip over this group if no cookie matched */
>> + if (!cookie_match)
>> + continue;
>> + }
>> +#endif
>
> same here, encapsulate this to keep find_idlest_group readable
>
Okay, I'll try to refine this.

Thanks,
-Aubrey

2020-12-02 14:15:40

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

Hi Balbir,

I still placed the patch embedded in this thread, welcome any comments.

Thanks,
-Aubrey
======================================================================

From d64455dcaf47329673903a68a9df1151400cdd7a Mon Sep 17 00:00:00 2001
From: Aubrey Li <[email protected]>
Date: Wed, 2 Dec 2020 13:53:30 +0000
Subject: [PATCH] sched: migration changes for core scheduling

- Don't migrate if there is a cookie mismatch
Load balance tries to move task from busiest CPU to the
destination CPU. When core scheduling is enabled, if the
task's cookie does not match with the destination CPU's
core cookie, this task will be skipped by this CPU. This
mitigates the forced idle time on the destination CPU.

- Select cookie matched idle CPU
In the fast path of task wakeup, select the first cookie matched
idle CPU instead of the first idle CPU.

- Find cookie matched idlest CPU
In the slow path of task wakeup, find the idlest CPU whose core
cookie matches with task's cookie

- Don't migrate task if cookie not match
For the NUMA load balance, don't migrate task to the CPU whose
core cookie does not match with task's cookie

Cc: Balbir Singh <[email protected]>
Cc: Vincent Guittot <[email protected]>
Tested-by: Julien Desfossez <[email protected]>
Signed-off-by: Aubrey Li <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
Signed-off-by: Vineeth Remanan Pillai <[email protected]>
Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
kernel/sched/fair.c | 33 +++++++++++++++++---
kernel/sched/sched.h | 71 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index de82f88ba98c..b8657766b660 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
continue;

+ /*
+ * Skip this cpu if source task's cookie does not match
+ * with CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
+ continue;
+
env->dst_cpu = cpu;
if (task_numa_compare(env, taskimp, groupimp, maymove))
break;
@@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this

/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+ struct rq *rq = cpu_rq(i);
+
+ if (!sched_core_cookie_match(rq, p))
+ continue;
+
if (sched_idle_cpu(i))
return i;

if (available_idle_cpu(i)) {
- struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
/*
@@ -6129,7 +6140,9 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
return -1;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+
+ if (available_idle_cpu(cpu) || sched_idle_cpu(cpu) &&
+ sched_cpu_cookie_match(cpu_rq(cpu), p))
break;
}

@@ -7530,8 +7543,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* We do not migrate tasks that are:
* 1) throttled_lb_pair, or
* 2) cannot be migrated to this CPU due to cpus_ptr, or
- * 3) running (obviously), or
- * 4) are cache-hot on their current CPU.
+ * 3) task's cookie does not match with this CPU's core cookie
+ * 4) running (obviously), or
+ * 5) are cache-hot on their current CPU.
*/
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;
@@ -7566,6 +7580,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 0;
}

+ /*
+ * Don't migrate task if the task's cookie does not match
+ * with the destination CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+ return 0;
+
/* Record that we found atleast one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;

@@ -8792,6 +8813,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
p->cpus_ptr))
continue;

+ /* Skip over this group if no cookie matched */
+ if (!sched_group_cookie_match(cpu_rq(this_cpu), p, group))
+ continue;
+
local_group = cpumask_test_cpu(this_cpu,
sched_group_span(group));

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e72942a9ee11..e1adfffe6e39 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1135,6 +1135,61 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)

bool cfs_prio_less(struct task_struct *a, struct task_struct *b);

+/*
+ * Helpers to check if the CPU's core cookie matches with the task's cookie
+ * when core scheduling is enabled.
+ * A special case is that the task's cookie always matches with CPU's core
+ * cookie if the CPU is in an idle core.
+ */
+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ return rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ bool idle_core = true;
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+ if (!available_idle_cpu(cpu)) {
+ idle_core = false;
+ break;
+ }
+ }
+
+ /*
+ * A CPU in an idle core is always the best choice for tasks with
+ * cookies.
+ */
+ return idle_core || __cookie_match(rq, p);
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+ struct task_struct *p,
+ struct sched_group *group)
+{
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
+ if (sched_core_cookie_match(cpu_rq(cpu), p))
+ return true;
+ }
+ return false;
+}
+
extern void queue_core_balance(struct rq *rq);

#else /* !CONFIG_SCHED_CORE */
@@ -1153,6 +1208,22 @@ static inline void queue_core_balance(struct rq *rq)
{
}

+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+ struct task_struct *p,
+ struct sched_group *group)
+{
+ return true;
+}
#endif /* CONFIG_SCHED_CORE */

#ifdef CONFIG_SCHED_SMT
--
2.17.1

2020-12-03 01:10:38

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH -tip 14/32] sched: migration changes for core scheduling

On 2020/12/2 22:09, Li, Aubrey wrote:
> Hi Balbir,
>
> I still placed the patch embedded in this thread, welcome any comments.

Sorry, this version needs more work, refined as below, and I realized
I should place a version number to the patch, start from v2 now.

Thanks,
-Aubrey
======================================================================
From aff2919889635aa9311d15bac3e949af0300ddc1 Mon Sep 17 00:00:00 2001
From: Aubrey Li <[email protected]>
Date: Thu, 3 Dec 2020 00:51:18 +0000
Subject: [PATCH v2] sched: migration changes for core scheduling

- Don't migrate if there is a cookie mismatch
Load balance tries to move task from busiest CPU to the
destination CPU. When core scheduling is enabled, if the
task's cookie does not match with the destination CPU's
core cookie, this task will be skipped by this CPU. This
mitigates the forced idle time on the destination CPU.

- Select cookie matched idle CPU
In the fast path of task wakeup, select the first cookie matched
idle CPU instead of the first idle CPU.

- Find cookie matched idlest CPU
In the slow path of task wakeup, find the idlest CPU whose core
cookie matches with task's cookie

- Don't migrate task if cookie not match
For the NUMA load balance, don't migrate task to the CPU whose
core cookie does not match with task's cookie

Cc: Balbir Singh <[email protected]>
Cc: Vincent Guittot <[email protected]>
Tested-by: Julien Desfossez <[email protected]>
Signed-off-by: Aubrey Li <[email protected]>
Signed-off-by: Tim Chen <[email protected]>
Signed-off-by: Vineeth Remanan Pillai <[email protected]>
Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
kernel/sched/fair.c | 33 +++++++++++++++++---
kernel/sched/sched.h | 72 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index de82f88ba98c..afdfea70c58c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1921,6 +1921,13 @@ static void task_numa_find_cpu(struct task_numa_env *env,
if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
continue;

+ /*
+ * Skip this cpu if source task's cookie does not match
+ * with CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(cpu), env->p))
+ continue;
+
env->dst_cpu = cpu;
if (task_numa_compare(env, taskimp, groupimp, maymove))
break;
@@ -5867,11 +5874,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this

/* Traverse only the allowed CPUs */
for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+ struct rq *rq = cpu_rq(i);
+
+ if (!sched_core_cookie_match(rq, p))
+ continue;
+
if (sched_idle_cpu(i))
return i;

if (available_idle_cpu(i)) {
- struct rq *rq = cpu_rq(i);
struct cpuidle_state *idle = idle_get_state(rq);
if (idle && idle->exit_latency < min_exit_latency) {
/*
@@ -6129,7 +6140,9 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
return -1;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+
+ if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
+ sched_cpu_cookie_match(cpu_rq(cpu), p))
break;
}

@@ -7530,8 +7543,9 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
* We do not migrate tasks that are:
* 1) throttled_lb_pair, or
* 2) cannot be migrated to this CPU due to cpus_ptr, or
- * 3) running (obviously), or
- * 4) are cache-hot on their current CPU.
+ * 3) task's cookie does not match with this CPU's core cookie
+ * 4) running (obviously), or
+ * 5) are cache-hot on their current CPU.
*/
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
return 0;
@@ -7566,6 +7580,13 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 0;
}

+ /*
+ * Don't migrate task if the task's cookie does not match
+ * with the destination CPU's core cookie.
+ */
+ if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+ return 0;
+
/* Record that we found atleast one task that could run on dst_cpu */
env->flags &= ~LBF_ALL_PINNED;

@@ -8792,6 +8813,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
p->cpus_ptr))
continue;

+ /* Skip over this group if no cookie matched */
+ if (!sched_group_cookie_match(cpu_rq(this_cpu), p, group))
+ continue;
+
local_group = cpumask_test_cpu(this_cpu,
sched_group_span(group));

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e72942a9ee11..82917ce183b4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1119,6 +1119,7 @@ static inline bool is_migration_disabled(struct task_struct *p)

#ifdef CONFIG_SCHED_CORE
DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
+static inline struct cpumask *sched_group_span(struct sched_group *sg);

static inline bool sched_core_enabled(struct rq *rq)
{
@@ -1135,6 +1136,61 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)

bool cfs_prio_less(struct task_struct *a, struct task_struct *b);

+/*
+ * Helpers to check if the CPU's core cookie matches with the task's cookie
+ * when core scheduling is enabled.
+ * A special case is that the task's cookie always matches with CPU's core
+ * cookie if the CPU is in an idle core.
+ */
+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ return rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ bool idle_core = true;
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+ if (!available_idle_cpu(cpu)) {
+ idle_core = false;
+ break;
+ }
+ }
+
+ /*
+ * A CPU in an idle core is always the best choice for tasks with
+ * cookies.
+ */
+ return idle_core || rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+ struct task_struct *p,
+ struct sched_group *group)
+{
+ int cpu;
+
+ /* Ignore cookie match if core scheduler is not enabled on the CPU. */
+ if (!sched_core_enabled(rq))
+ return true;
+
+ for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
+ if (sched_core_cookie_match(rq, p))
+ return true;
+ }
+ return false;
+}
+
extern void queue_core_balance(struct rq *rq);

#else /* !CONFIG_SCHED_CORE */
@@ -1153,6 +1209,22 @@ static inline void queue_core_balance(struct rq *rq)
{
}

+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+ return true;
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+ struct task_struct *p,
+ struct sched_group *group)
+{
+ return true;
+}
#endif /* CONFIG_SCHED_CORE */

#ifdef CONFIG_SCHED_SMT
--
2.17.1