select_idle_cpu() will scan the LLC domain for idle CPUs,
it's always expensive. so the next commit :
1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
introduces a way to limit how many CPUs we scan.
But it consume some CPUs out of 'nr' that are not allowed
for the task and thus waste our attempts. The function
always return nr_cpumask_bits, and we can't find a CPU
which our task is allowed to run.
Cpumask may be too big, similar to select_idle_core(), use
per_cpu_ptr 'select_idle_mask' to prevent stack overflow.
Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
Signed-off-by: Cheng Jian <[email protected]>
---
kernel/sched/fair.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 08a233e97a01..d48244388ce9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5828,6 +5828,7 @@ static inline int select_idle_smt(struct task_struct *p, int target)
*/
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
{
+ struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
struct sched_domain *this_sd;
u64 avg_cost, avg_idle;
u64 time, cost;
@@ -5859,11 +5860,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
time = cpu_clock(this);
- for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+ cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+
+ for_each_cpu_wrap(cpu, cpus, target) {
if (!--nr)
return si_cpu;
- if (!cpumask_test_cpu(cpu, p->cpus_ptr))
- continue;
if (available_idle_cpu(cpu))
break;
if (si_cpu == -1 && sched_idle_cpu(cpu))
--
2.20.1
* Cheng Jian <[email protected]> [2019-12-13 10:45:30]:
> Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
> Signed-off-by: Cheng Jian <[email protected]>
> ---
> kernel/sched/fair.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 08a233e97a01..d48244388ce9 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5828,6 +5828,7 @@ static inline int select_idle_smt(struct task_struct *p, int target)
> */
> static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
> {
> + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> struct sched_domain *this_sd;
> u64 avg_cost, avg_idle;
> u64 time, cost;
> @@ -5859,11 +5860,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>
> time = cpu_clock(this);
>
> - for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
> + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> +
> + for_each_cpu_wrap(cpu, cpus, target) {
> if (!--nr)
> return si_cpu;
> - if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> - continue;
> if (available_idle_cpu(cpu))
> break;
> if (si_cpu == -1 && sched_idle_cpu(cpu))
Looks good to me.
Reviewed-by: Srikar Dronamraju <[email protected]>
--
Thanks and Regards
Srikar Dronamraju
On Fri, 13 Dec 2019 at 03:48, Cheng Jian <[email protected]> wrote:
>
> select_idle_cpu() will scan the LLC domain for idle CPUs,
> it's always expensive. so the next commit :
>
> 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
>
> introduces a way to limit how many CPUs we scan.
>
> But it consume some CPUs out of 'nr' that are not allowed
> for the task and thus waste our attempts. The function
> always return nr_cpumask_bits, and we can't find a CPU
> which our task is allowed to run.
>
> Cpumask may be too big, similar to select_idle_core(), use
> per_cpu_ptr 'select_idle_mask' to prevent stack overflow.
>
> Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
> Signed-off-by: Cheng Jian <[email protected]>
Reviewed-by: Vincent Guittot <[email protected]>
> ---
> kernel/sched/fair.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 08a233e97a01..d48244388ce9 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5828,6 +5828,7 @@ static inline int select_idle_smt(struct task_struct *p, int target)
> */
> static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
> {
> + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> struct sched_domain *this_sd;
> u64 avg_cost, avg_idle;
> u64 time, cost;
> @@ -5859,11 +5860,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>
> time = cpu_clock(this);
>
> - for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
> + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> +
> + for_each_cpu_wrap(cpu, cpus, target) {
> if (!--nr)
> return si_cpu;
> - if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> - continue;
> if (available_idle_cpu(cpu))
> break;
> if (si_cpu == -1 && sched_idle_cpu(cpu))
> --
> 2.20.1
>
On 13/12/2019 02:45, Cheng Jian wrote:
> select_idle_cpu() will scan the LLC domain for idle CPUs,
> it's always expensive. so the next commit :
>
> 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
>
> introduces a way to limit how many CPUs we scan.
>
> But it consume some CPUs out of 'nr' that are not allowed
> for the task and thus waste our attempts. The function
> always return nr_cpumask_bits, and we can't find a CPU
> which our task is allowed to run.
>
> Cpumask may be too big, similar to select_idle_core(), use
> per_cpu_ptr 'select_idle_mask' to prevent stack overflow.
>
> Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
> Signed-off-by: Cheng Jian <[email protected]>
Reviewed-by: Valentin Schneider <[email protected]>
> ---
> kernel/sched/fair.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 08a233e97a01..d48244388ce9 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5828,6 +5828,7 @@ static inline int select_idle_smt(struct task_struct *p, int target)
> */
> static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
> {
> + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
> struct sched_domain *this_sd;
> u64 avg_cost, avg_idle;
> u64 time, cost;
> @@ -5859,11 +5860,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
>
> time = cpu_clock(this);
>
> - for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
> + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
> +
> + for_each_cpu_wrap(cpu, cpus, target) {
> if (!--nr)
> return si_cpu;
> - if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> - continue;
> if (available_idle_cpu(cpu))
> break;
> if (si_cpu == -1 && sched_idle_cpu(cpu))
>