Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751606AbdFHT13 (ORCPT ); Thu, 8 Jun 2017 15:27:29 -0400 Received: from aserp1040.oracle.com ([141.146.126.69]:22747 "EHLO aserp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751132AbdFHT12 (ORCPT ); Thu, 8 Jun 2017 15:27:28 -0400 From: Subhra Mazumdar To: linux-kernel@vger.kernel.org Cc: peterz@infradead.org, mingo@kernel.org Subject: [RFC PATCH] sched: select_idle_core should select least utilized core Date: Thu, 8 Jun 2017 15:26:32 -0400 Message-Id: <1496949992-629076-1-git-send-email-subhra.mazumdar@oracle.com> X-Mailer: git-send-email 1.7.1 X-Source-IP: userv0022.oracle.com [156.151.31.74] Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6105 Lines: 226 Current select_idle_core tries to find a fully idle core and if it fails select_idle_cpu next returns any idle cpu in the llc domain. This is not optimal for architectures with many (more than 2) hyperthreads in a core. This patch changes select_idle_core to find the core with least number of busy hyperthreads and return an idle cpu in that core. Signed-off-by: Subhra Mazumdar --- kernel/sched/fair.c | 113 +++++++++------------------------------------- kernel/sched/idle_task.c | 1 - kernel/sched/sched.h | 10 ---- 3 files changed, 21 insertions(+), 103 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d711093..eb2c33c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5679,111 +5679,49 @@ static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int * #ifdef CONFIG_SCHED_SMT -static inline void set_idle_cores(int cpu, int val) -{ - struct sched_domain_shared *sds; - - sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - if (sds) - WRITE_ONCE(sds->has_idle_cores, val); -} - -static inline bool test_idle_cores(int cpu, bool def) -{ - struct sched_domain_shared *sds; - - sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - if (sds) - return READ_ONCE(sds->has_idle_cores); - - return def; -} - /* - * Scans the local SMT mask to see if the entire core is idle, and records this - * information in sd_llc_shared->has_idle_cores. - * - * Since SMT siblings share all cache levels, inspecting this limited remote - * state should be fairly cheap. - */ -void __update_idle_core(struct rq *rq) -{ - int core = cpu_of(rq); - int cpu; - - rcu_read_lock(); - if (test_idle_cores(core, true)) - goto unlock; - - for_each_cpu(cpu, cpu_smt_mask(core)) { - if (cpu == core) - continue; - - if (!idle_cpu(cpu)) - goto unlock; - } - - set_idle_cores(core, 1); -unlock: - rcu_read_unlock(); -} - -/* - * Scan the entire LLC domain for idle cores; this dynamically switches off if - * there are no idle cores left in the system; tracked through - * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above. + * Scan the entire LLC domain for idle cores; Find the core with minimum number + * of busy strands and return a idle strand in that core */ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); - int core, cpu, wrap; + int core, cpu, wrap, min_util = INT_MAX, min_cpu = -1; if (!static_branch_likely(&sched_smt_present)) return -1; - if (!test_idle_cores(target, false)) - return -1; - cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); for_each_cpu_wrap(core, cpus, target, wrap) { bool idle = true; + int util = 0; + int cp = -1; for_each_cpu(cpu, cpu_smt_mask(core)) { cpumask_clear_cpu(cpu, cpus); - if (!idle_cpu(cpu)) + if (!idle_cpu(cpu)) { idle = false; + util++; + } else if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { + cp = cpu; + } } if (idle) return core; - } - /* - * Failed to find an idle core; stop looking for one. - */ - set_idle_cores(target, 0); + if (util < min_util && cp != -1) { + min_util = util; + min_cpu = cp; + } + } - return -1; + return min_cpu; } -/* - * Scan the local SMT mask for idle CPUs. - */ -static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) +static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu; - - if (!static_branch_likely(&sched_smt_present)) - return -1; - - for_each_cpu(cpu, cpu_smt_mask(target)) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) - continue; - if (idle_cpu(cpu)) - return cpu; - } - return -1; } @@ -5794,13 +5732,6 @@ static inline int select_idle_core(struct task_struct *p, struct sched_domain *s return -1; } -static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) -{ - return -1; -} - -#endif /* CONFIG_SCHED_SMT */ - /* * Scan the LLC domain for idle CPUs; this is dynamically regulated by * comparing the average scan cost (tracked in sd->avg_scan_cost) against the @@ -5830,8 +5761,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t time = local_clock(); for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) { - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) - continue; + if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) + continue; if (idle_cpu(cpu)) break; } @@ -5844,6 +5775,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t return cpu; } +#endif /* CONFIG_SCHED_SMT */ + /* * Try and locate an idle core/thread in the LLC cache domain. */ @@ -5873,10 +5806,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if ((unsigned)i < nr_cpumask_bits) return i; - i = select_idle_smt(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; - return target; } diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 0c00172..a3d5a7c 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -27,7 +27,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { put_prev_task(rq, prev); - update_idle_core(rq); schedstat_inc(rq->sched_goidle); return rq->idle; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6dda2aa..96ef012 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -772,16 +772,6 @@ static inline int cpu_of(struct rq *rq) extern struct static_key_false sched_smt_present; -extern void __update_idle_core(struct rq *rq); - -static inline void update_idle_core(struct rq *rq) -{ - if (static_branch_unlikely(&sched_smt_present)) - __update_idle_core(rq); -} - -#else -static inline void update_idle_core(struct rq *rq) { } #endif DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -- 1.7.1