Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754075AbdHUPXw (ORCPT ); Mon, 21 Aug 2017 11:23:52 -0400 Received: from usa-sjc-mx-foss1.foss.arm.com ([217.140.101.70]:59778 "EHLO foss.arm.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753689AbdHUPXu (ORCPT ); Mon, 21 Aug 2017 11:23:50 -0400 From: Brendan Jackman To: linux-kernel@vger.kernel.org Cc: Joel Fernandes , Andres Oportus , Ingo Molnar , Morten Rasmussen , Peter Zijlstra , Dietmar Eggemann , Vincent Guittot Subject: [PATCH 2/2] sched/fair: Fix use of NULL with find_idlest_group Date: Mon, 21 Aug 2017 16:21:28 +0100 Message-Id: <20170821152128.14418-3-brendan.jackman@arm.com> X-Mailer: git-send-email 2.14.1 In-Reply-To: <20170821152128.14418-1-brendan.jackman@arm.com> References: <20170821152128.14418-1-brendan.jackman@arm.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5611 Lines: 163 The current use of returning NULL from find_idlest_group is broken in two cases: a1) The local group is not allowed. In this case, we currently do not change this_runnable_load or this_avg_load from its initial value of 0, which means we return NULL regardless of the load of the other, allowed groups. This results in pointlessly continuing the find_idlest_group search within the local group and then returning prev_cpu from select_task_rq_fair. a2) No CPUs in the sched_domain are allowed. In this case we also return NULL and again pointlessly continue the search. b) smp_processor_id() is the "idlest" and != prev_cpu. find_idlest_group also returns NULL when the local group is allowed and is the idlest. The caller then continues the find_idlest_group search at a lower level of the current CPU's sched_domain hierarchy. However new_cpu is not updated. This means the search is pointless and we return prev_cpu from select_task_rq_fair. This is fixed by: 1. Returning NULL from find_idlest_group only when _no_ groups were allowed in the current sched_domain. In this case, we now break from the while(sd) loop and immediately return prev_cpu. This fixes case a2). 2. Initializing this_runnable_load and this_avg_load to ULONG_MAX instead of 0. This means in case a1) we now return the idlest non-local group. 3. Explicitly updating new_cpu when find_idlest_group returns the local group, fixing case b). This patch also re-words the check for whether the group in consideration is local, under the assumption that the first group in the sched domain is always the local one. Signed-off-by: Brendan Jackman Cc: Ingo Molnar Cc: Morten Rasmussen Cc: Peter Zijlstra Cc: Dietmar Eggemann Cc: Vincent Guittot --- kernel/sched/fair.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 64618d768546..7cb5ed719cf9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5382,26 +5382,29 @@ static unsigned long capacity_spare_wake(int cpu, struct task_struct *p) * domain. */ static struct sched_group * -find_idlest_group(struct sched_domain *sd, struct task_struct *p, - int this_cpu, int sd_flag) +find_idlest_group(struct sched_domain *sd, struct task_struct *p, int sd_flag) { struct sched_group *idlest = NULL, *group = sd->groups; + struct sched_group *local_group = sd->groups; struct sched_group *most_spare_sg = NULL; - unsigned long min_runnable_load = ULONG_MAX, this_runnable_load = 0; - unsigned long min_avg_load = ULONG_MAX, this_avg_load = 0; + unsigned long min_runnable_load = ULONG_MAX, this_runnable_load = ULONG_MAX; + unsigned long min_avg_load = ULONG_MAX, this_avg_load = ULONG_MAX; unsigned long most_spare = 0, this_spare = 0; int load_idx = sd->forkexec_idx; int imbalance_scale = 100 + (sd->imbalance_pct-100)/2; unsigned long imbalance = scale_load_down(NICE_0_LOAD) * (sd->imbalance_pct-100) / 100; + if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) + return NULL; + if (sd_flag & SD_BALANCE_WAKE) load_idx = sd->wake_idx; do { unsigned long load, avg_load, runnable_load; unsigned long spare_cap, max_spare_cap; - int local_group; + bool group_is_local = group == local_group; int i; /* Skip over this group if it has no CPUs allowed */ @@ -5409,9 +5412,6 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, &p->cpus_allowed)) continue; - local_group = cpumask_test_cpu(this_cpu, - sched_group_span(group)); - /* * Tally up the load of all CPUs in the group and find * the group containing the CPU with most spare capacity. @@ -5422,7 +5422,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, for_each_cpu(i, sched_group_span(group)) { /* Bias balancing toward cpus of our domain */ - if (local_group) + if (group_is_local) load = source_load(i, load_idx); else load = target_load(i, load_idx); @@ -5443,7 +5443,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity; - if (local_group) { + if (group_is_local) { this_runnable_load = runnable_load; this_avg_load = avg_load; this_spare = max_spare_cap; @@ -5489,21 +5489,21 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, if (this_spare > task_util(p) / 2 && imbalance_scale*this_spare > 100*most_spare) - return NULL; + return local_group; if (most_spare > task_util(p) / 2) return most_spare_sg; skip_spare: if (!idlest) - return NULL; + return local_group; if (min_runnable_load > (this_runnable_load + imbalance)) - return NULL; + return local_group; if ((this_runnable_load < (min_runnable_load + imbalance)) && (100*this_avg_load < imbalance_scale*min_avg_load)) - return NULL; + return local_group; return idlest; } @@ -5927,8 +5927,12 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f continue; } - group = find_idlest_group(sd, p, cpu, sd_flag); + group = find_idlest_group(sd, p, sd_flag); if (!group) { + break; + } else if (group == sd->groups) { + new_cpu = cpu; + /* Now try balancing at a lower domain level of cpu */ sd = sd->child; continue; } -- 2.14.1