Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758146AbcDHKeu (ORCPT ); Fri, 8 Apr 2016 06:34:50 -0400 Received: from mx1.redhat.com ([209.132.183.28]:37396 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753783AbcDHKer (ORCPT ); Fri, 8 Apr 2016 06:34:47 -0400 From: Xunlei Pang To: linux-kernel@vger.kernel.org Cc: Peter Zijlstra , Juri Lelli , Ingo Molnar , Steven Rostedt , Xunlei Pang Subject: [PATCH] sched/rt/deadline: Share cpumask between rt and deadline for SMP scheduling Date: Fri, 8 Apr 2016 18:34:38 +0800 Message-Id: <1460111678-27974-1-git-send-email-xlpang@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5679 Lines: 183 Rt and deadline use their separate percpu mask to do SMP scheduling. All the operations related to the mask are under the protection of some spin_lock_irqsave(), hence we can safely use a shared one. The patch introduced "sched_pp_shared_mask"(pp means Push/Pull which are main users of the mask), removed all the init_sched_rt(dl)_class() stuff and initialized the shared mask directly in sched_init() instead. We can do this, because at the sched_init() stage there is only one task (to be "idle") and with irq disabled, no schedule can happen, also the "cpu_possible_mask" was already initiated. After that we again safely removed the NULL cpumask judgement from find_lowest_rq() and find_later_rq(), because "sched_pp_shared_mask" was surely initialized before the first SMP scheduling happens. Inspired-by: Peter Zijlstra Signed-off-by: Xunlei Pang --- kernel/sched/core.c | 13 ++++++++++--- kernel/sched/deadline.c | 18 ++---------------- kernel/sched/rt.c | 19 ++----------------- kernel/sched/sched.h | 6 ++++-- 4 files changed, 18 insertions(+), 38 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a533566..dbc22ab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -93,6 +93,11 @@ DEFINE_MUTEX(sched_domains_mutex); DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +#ifdef CONFIG_SMP +/* Used by Push/Pull scheduling, and shared by rt and deadline. */ +DEFINE_PER_CPU(cpumask_var_t, sched_pp_shared_mask); +#endif + static void update_rq_clock_task(struct rq *rq, s64 delta); void update_rq_clock(struct rq *rq) @@ -7187,9 +7192,6 @@ void __init sched_init_smp(void) BUG(); sched_init_granularity(); free_cpumask_var(non_isolated_cpus); - - init_sched_rt_class(); - init_sched_dl_class(); } #else void __init sched_init_smp(void) @@ -7389,6 +7391,11 @@ void __init sched_init(void) zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); idle_thread_set_boot_cpu(); set_cpu_rq_start_time(); + + for_each_possible_cpu(i) { + zalloc_cpumask_var_node(&per_cpu(sched_pp_shared_mask, i), + GFP_KERNEL, cpu_to_node(i)); + } #endif init_sched_fair_class(); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e564c88..169d40d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1269,19 +1269,13 @@ next_node: return NULL; } -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); - static int find_later_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); + struct cpumask *later_mask; int this_cpu = smp_processor_id(); int best_cpu, cpu = task_cpu(task); - /* Make sure the mask is initialized first */ - if (unlikely(!later_mask)) - return -1; - if (task->nr_cpus_allowed == 1) return -1; @@ -1289,6 +1283,7 @@ static int find_later_rq(struct task_struct *task) * We have to consider system topology and task affinity * first, then we can look for a suitable cpu. */ + later_mask = this_cpu_cpumask_var_ptr(sched_pp_shared_mask); best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask); if (best_cpu == -1) @@ -1675,15 +1670,6 @@ static void rq_offline_dl(struct rq *rq) cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); } -void __init init_sched_dl_class(void) -{ - unsigned int i; - - for_each_possible_cpu(i) - zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), - GFP_KERNEL, cpu_to_node(i)); -} - #endif /* CONFIG_SMP */ static void switched_from_dl(struct rq *rq, struct task_struct *p) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5624713..ecfc83d 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1612,22 +1612,17 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) return NULL; } -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); - static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; - struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); + struct cpumask *lowest_mask; int this_cpu = smp_processor_id(); int cpu = task_cpu(task); - /* Make sure the mask is initialized first */ - if (unlikely(!lowest_mask)) - return -1; - if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */ + lowest_mask = this_cpu_cpumask_var_ptr(sched_pp_shared_mask); if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) return -1; /* No targets found */ @@ -2164,16 +2159,6 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) queue_pull_task(rq); } - -void __init init_sched_rt_class(void) -{ - unsigned int i; - - for_each_possible_cpu(i) { - zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), - GFP_KERNEL, cpu_to_node(i)); - } -} #endif /* CONFIG_SMP */ /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e6d4a3f..e880b37 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -715,6 +715,10 @@ static inline int cpu_of(struct rq *rq) DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); +#ifdef CONFIG_SMP +DECLARE_PER_CPU(cpumask_var_t, sched_pp_shared_mask); +#endif + #define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) #define this_rq() this_cpu_ptr(&runqueues) #define task_rq(p) cpu_rq(task_cpu(p)) @@ -1296,8 +1300,6 @@ extern void sysrq_sched_debug_show(void); extern void sched_init_granularity(void); extern void update_max_interval(void); -extern void init_sched_dl_class(void); -extern void init_sched_rt_class(void); extern void init_sched_fair_class(void); extern void resched_curr(struct rq *rq); -- 1.8.3.1