2021-01-15 10:12:41

by Mel Gorman

[permalink] [raw]
Subject: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

From: Peter Zijlstra (Intel) <[email protected]>

Both select_idle_core() and select_idle_cpu() do a loop over the same
cpumask. Observe that by clearing the already visited CPUs, we can
fold the iteration and iterate a core at a time.

All we need to do is remember any non-idle CPU we encountered while
scanning for an idle core. This way we'll only iterate every CPU once.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Mel Gorman <[email protected]>
---
kernel/sched/fair.c | 97 +++++++++++++++++++++++++++------------------
1 file changed, 59 insertions(+), 38 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 12e08da90024..6c0f841e9e75 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
return new_cpu;
}

+static inline int __select_idle_cpu(struct task_struct *p, int core, struct cpumask *cpus)
+{
+ if (available_idle_cpu(core) || sched_idle_cpu(core))
+ return core;
+
+ return -1;
+}
+
#ifdef CONFIG_SCHED_SMT
DEFINE_STATIC_KEY_FALSE(sched_smt_present);
EXPORT_SYMBOL_GPL(sched_smt_present);
@@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq)
* there are no idle cores left in the system; tracked through
* sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
*/
-static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
{
- struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
- int core, cpu;
+ bool idle = true;
+ int cpu;

if (!static_branch_likely(&sched_smt_present))
- return -1;
-
- if (!test_idle_cores(target, false))
- return -1;
-
- cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+ return __select_idle_cpu(p, core, cpus);

- for_each_cpu_wrap(core, cpus, target) {
- bool idle = true;
-
- for_each_cpu(cpu, cpu_smt_mask(core)) {
- if (!available_idle_cpu(cpu)) {
- idle = false;
- break;
+ for_each_cpu(cpu, cpu_smt_mask(core)) {
+ if (!available_idle_cpu(cpu)) {
+ idle = false;
+ if (*idle_cpu == -1) {
+ if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
+ *idle_cpu = cpu;
+ break;
+ }
+ continue;
}
+ break;
}
-
- if (idle)
- return core;
-
- cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
+ if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
+ *idle_cpu = cpu;
}

- /*
- * Failed to find an idle core; stop looking for one.
- */
- set_idle_cores(target, 0);
+ if (idle)
+ return core;

+ cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
return -1;
}

@@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int

#define sched_smt_weight 1

-static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+static inline void set_idle_cores(int cpu, int val)
{
- return -1;
+}
+
+static inline bool test_idle_cores(int cpu, bool def)
+{
+ return def;
+}
+
+static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
+{
+ return __select_idle_cpu(p, core, cpus);
}

#endif /* CONFIG_SCHED_SMT */
@@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct *p, struct sched_domain *s
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
{
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+ int i, cpu, idle_cpu = -1, nr = INT_MAX;
+ bool smt = test_idle_cores(target, false);
+ int this = smp_processor_id();
struct sched_domain *this_sd;
u64 time;
- int this = smp_processor_id();
- int cpu, nr = INT_MAX;

this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
@@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t

cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);

- if (sched_feat(SIS_PROP)) {
+ if (sched_feat(SIS_PROP) && !smt) {
u64 avg_cost, avg_idle, span_avg;

/*
@@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
return -1;
- if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
- break;
+ if (smt) {
+ i = select_idle_core(p, cpu, cpus, &idle_cpu);
+ if ((unsigned int)i < nr_cpumask_bits)
+ return i;
+
+ } else {
+ i = __select_idle_cpu(p, cpu, cpus);
+ if ((unsigned int)i < nr_cpumask_bits) {
+ idle_cpu = i;
+ break;
+ }
+ }
}

- if (sched_feat(SIS_PROP)) {
+ if (smt)
+ set_idle_cores(this, false);
+
+ if (sched_feat(SIS_PROP) && !smt) {
time = cpu_clock(this) - time;
update_avg(&this_sd->avg_scan_cost, time);
}

- return cpu;
+ return idle_cpu;
}

/*
@@ -6297,10 +6322,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
if (!sd)
return target;

- i = select_idle_core(p, sd, target);
- if ((unsigned)i < nr_cpumask_bits)
- return i;
-
i = select_idle_cpu(p, sd, target);
if ((unsigned)i < nr_cpumask_bits)
return i;
--
2.26.2


2021-01-18 13:00:42

by Li, Aubrey

[permalink] [raw]
Subject: Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

On 2021/1/15 18:08, Mel Gorman wrote:
> From: Peter Zijlstra (Intel) <[email protected]>
>
> Both select_idle_core() and select_idle_cpu() do a loop over the same
> cpumask. Observe that by clearing the already visited CPUs, we can
> fold the iteration and iterate a core at a time.
>
> All we need to do is remember any non-idle CPU we encountered while
> scanning for an idle core. This way we'll only iterate every CPU once.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Signed-off-by: Mel Gorman <[email protected]>
> ---
> kernel/sched/fair.c | 97 +++++++++++++++++++++++++++------------------
> 1 file changed, 59 insertions(+), 38 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 12e08da90024..6c0f841e9e75 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
> return new_cpu;
> }
>
> +static inline int __select_idle_cpu(struct task_struct *p, int core, struct cpumask *cpus)

Sorry if I missed anything, why p and cpus are needed here?

> +{
> + if (available_idle_cpu(core) || sched_idle_cpu(core))
> + return core;
> +
> + return -1;
> +}
> +
> #ifdef CONFIG_SCHED_SMT
> DEFINE_STATIC_KEY_FALSE(sched_smt_present);
> EXPORT_SYMBOL_GPL(sched_smt_present);
> @@ -6066,40 +6074,34 @@ void __update_idle_core(struct rq *rq)
> * there are no idle cores left in the system; tracked through
> * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
> */
> -static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
> +static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
> {
> - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> - int core, cpu;
> + bool idle = true;
> + int cpu;
>
> if (!static_branch_likely(&sched_smt_present))
> - return -1;
> -
> - if (!test_idle_cores(target, false))
> - return -1;
> -
> - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> + return __select_idle_cpu(p, core, cpus);
>
> - for_each_cpu_wrap(core, cpus, target) {
> - bool idle = true;
> -
> - for_each_cpu(cpu, cpu_smt_mask(core)) {
> - if (!available_idle_cpu(cpu)) {
> - idle = false;
> - break;
> + for_each_cpu(cpu, cpu_smt_mask(core)) {
> + if (!available_idle_cpu(cpu)) {
> + idle = false;
> + if (*idle_cpu == -1) {
> + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
> + *idle_cpu = cpu;
> + break;
> + }
> + continue;
> }
> + break;
> }
> -
> - if (idle)
> - return core;
> -
> - cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
> + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr))
> + *idle_cpu = cpu;
> }
>
> - /*
> - * Failed to find an idle core; stop looking for one.
> - */
> - set_idle_cores(target, 0);
> + if (idle)
> + return core;
>
> + cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
> return -1;
> }
>
> @@ -6107,9 +6109,18 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
>
> #define sched_smt_weight 1
>
> -static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
> +static inline void set_idle_cores(int cpu, int val)
> {
> - return -1;
> +}
> +
> +static inline bool test_idle_cores(int cpu, bool def)
> +{
> + return def;
> +}
> +
> +static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
> +{
> + return __select_idle_cpu(p, core, cpus);
> }
>
> #endif /* CONFIG_SCHED_SMT */
> @@ -6124,10 +6135,11 @@ static inline int select_idle_core(struct task_struct *p, struct sched_domain *s
> static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
> {
> struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> + int i, cpu, idle_cpu = -1, nr = INT_MAX;
> + bool smt = test_idle_cores(target, false);
> + int this = smp_processor_id();
> struct sched_domain *this_sd;
> u64 time;
> - int this = smp_processor_id();
> - int cpu, nr = INT_MAX;
>
> this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
> if (!this_sd)
> @@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>
> cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
>
> - if (sched_feat(SIS_PROP)) {
> + if (sched_feat(SIS_PROP) && !smt) {
Is it possible the system does have a idle core, but I still don't want to scan the entire llc domain?

> u64 avg_cost, avg_idle, span_avg;
>
> /*
> @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> for_each_cpu_wrap(cpu, cpus, target) {
> if (!--nr)
> return -1;

It looks like nr only makes sense when smt = false now, can it be moved into else branch below?

> - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> - break;
> + if (smt) {
> + i = select_idle_core(p, cpu, cpus, &idle_cpu);
> + if ((unsigned int)i < nr_cpumask_bits)
> + return i;

What if the last idle core is selected here, should we set_idle_cores false before return?

> +
> + } else {
> + i = __select_idle_cpu(p, cpu, cpus);
> + if ((unsigned int)i < nr_cpumask_bits) {
> + idle_cpu = i;
> + break;
> + }
> + }
> }
>
> - if (sched_feat(SIS_PROP)) {
> + if (smt)
> + set_idle_cores(this, false);
> +
> + if (sched_feat(SIS_PROP) && !smt) {
> time = cpu_clock(this) - time;
> update_avg(&this_sd->avg_scan_cost, time);
> }
>
> - return cpu;
> + return idle_cpu;
> }
>
> /*
> @@ -6297,10 +6322,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
> if (!sd)
> return target;
>
> - i = select_idle_core(p, sd, target);
> - if ((unsigned)i < nr_cpumask_bits)
> - return i;
> -
> i = select_idle_cpu(p, sd, target);
> if ((unsigned)i < nr_cpumask_bits)
> return i;
>

2021-01-19 04:36:16

by Mel Gorman

[permalink] [raw]
Subject: Re: [PATCH 5/5] sched/fair: Merge select_idle_core/cpu()

On Mon, Jan 18, 2021 at 08:55:03PM +0800, Li, Aubrey wrote:
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 12e08da90024..6c0f841e9e75 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -6006,6 +6006,14 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
> > return new_cpu;
> > }
> >
> > +static inline int __select_idle_cpu(struct task_struct *p, int core, struct cpumask *cpus)
>
> Sorry if I missed anything, why p and cpus are needed here?
>

They are not needed. The original code was matching the calling pattern
for select_idle_core() which needs p and cpus to check if sibling CPUs
are allowed.

> > @@ -6135,7 +6147,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> >
> > cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> >
> > - if (sched_feat(SIS_PROP)) {
> > + if (sched_feat(SIS_PROP) && !smt) {
>
> Is it possible the system does have a idle core, but I still don't want to scan the entire llc domain?
>

This version is matching historical behaviour. To limit the scan for cores,
select_idle_core() would need to obey SIS_PROP and that patch was dropped
as it introduced regressions. It would only be considered once SIS_PROP
had better metrics for limiting the depth of the search.

> > u64 avg_cost, avg_idle, span_avg;
> >
> > /*
> > @@ -6159,16 +6171,29 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
> > for_each_cpu_wrap(cpu, cpus, target) {
> > if (!--nr)
> > return -1;
>
> It looks like nr only makes sense when smt = false now, can it be moved into else branch below?
>

It can. I expect the saving to be marginal and it will need to move back
when/if select_idle_core() obeys SIS_PROP.

> > - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> > - break;
> > + if (smt) {
> > + i = select_idle_core(p, cpu, cpus, &idle_cpu);
> > + if ((unsigned int)i < nr_cpumask_bits)
> > + return i;
>
> What if the last idle core is selected here, should we set_idle_cores false before return?
>

We'd have to check what bits were still set in the cpus mask and
determine if they represent an idle core. I severely doubt it would be
worth the cost given that the availability of idle cores can change at
any instant.

--
Mel Gorman
SUSE Labs