Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752505AbdIAQnK (ORCPT ); Fri, 1 Sep 2017 12:43:10 -0400 Received: from mail-wr0-f194.google.com ([209.85.128.194]:36199 "EHLO mail-wr0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752347AbdIAQmO (ORCPT ); Fri, 1 Sep 2017 12:42:14 -0400 X-Google-Smtp-Source: ADKCNb6kVMvM/gEzp772egVfBe5caIIIOVc4AtgBbHfE+inpjrUr1OPqF1KjWG1zym34lKcY9U+7ug== From: Frederic Weisbecker To: LKML Cc: Frederic Weisbecker , Peter Zijlstra , Chris Metcalf , Thomas Gleixner , Luiz Capitulino , Christoph Lameter , "Paul E . McKenney" , Ingo Molnar , Mike Galbraith , Rik van Riel , Wanpeng Li Subject: [PATCH 10/12] housekeeping: Move isolcpus to housekeeping Date: Fri, 1 Sep 2017 18:41:45 +0200 Message-Id: <1504284108-6157-11-git-send-email-fweisbec@gmail.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1504284108-6157-1-git-send-email-fweisbec@gmail.com> References: <1504284108-6157-1-git-send-email-fweisbec@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10938 Lines: 340 We want to centralize the isolation features on the housekeeping subsystem and scheduler domain isolation is a significant part of it. No intended behaviour change, we just reuse the housekeeping cpumask and core code. Signed-off-by: Frederic Weisbecker Cc: Chris Metcalf Cc: Rik van Riel Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Mike Galbraith Cc: Ingo Molnar Cc: Christoph Lameter Cc: Paul E. McKenney Cc: Wanpeng Li Cc: Luiz Capitulino --- drivers/base/cpu.c | 11 ++++++++- include/linux/housekeeping.h | 1 + include/linux/sched.h | 2 -- kernel/cgroup/cpuset.c | 14 ++++------- kernel/housekeeping.c | 57 +++++++++++++++++++++++++++++++++++--------- kernel/sched/core.c | 16 +------------ kernel/sched/topology.c | 21 ++++------------ 7 files changed, 67 insertions(+), 55 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 2c3b359..8e33b9e 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "base.h" @@ -271,8 +272,16 @@ static ssize_t print_cpus_isolated(struct device *dev, struct device_attribute *attr, char *buf) { int n = 0, len = PAGE_SIZE-2; + cpumask_var_t isolated; - n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(cpu_isolated_map)); + if (!alloc_cpumask_var(&isolated, GFP_KERNEL)) + return -ENOMEM; + + cpumask_andnot(isolated, cpu_possible_mask, + housekeeping_cpumask(HK_FLAG_DOMAIN)); + n = scnprintf(buf, len, "%*pbl\n", cpumask_pr_args(isolated)); + + free_cpumask_var(isolated); return n; } diff --git a/include/linux/housekeeping.h b/include/linux/housekeeping.h index 35fb197..a99466e 100644 --- a/include/linux/housekeeping.h +++ b/include/linux/housekeeping.h @@ -11,6 +11,7 @@ enum hk_flags { HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), HK_FLAG_TICK = (1 << 4), + HK_FLAG_DOMAIN = (1 << 5), }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/sched.h b/include/linux/sched.h index c28b182..816ff52 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -166,8 +166,6 @@ struct task_group; /* Task command name length: */ #define TASK_COMM_LEN 16 -extern cpumask_var_t cpu_isolated_map; - extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 8d51516..c65e73a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -639,7 +640,6 @@ static int generate_sched_domains(cpumask_var_t **domains, int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ - cpumask_var_t non_isolated_cpus; /* load balanced CPUs */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ @@ -649,10 +649,6 @@ static int generate_sched_domains(cpumask_var_t **domains, dattr = NULL; csa = NULL; - if (!alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL)) - goto done; - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { ndoms = 1; @@ -666,7 +662,7 @@ static int generate_sched_domains(cpumask_var_t **domains, update_domain_attr_tree(dattr, &top_cpuset); } cpumask_and(doms[0], top_cpuset.effective_cpus, - non_isolated_cpus); + housekeeping_cpumask(HK_FLAG_DOMAIN)); goto done; } @@ -690,7 +686,8 @@ static int generate_sched_domains(cpumask_var_t **domains, */ if (!cpumask_empty(cp->cpus_allowed) && !(is_sched_load_balance(cp) && - cpumask_intersects(cp->cpus_allowed, non_isolated_cpus))) + cpumask_intersects(cp->cpus_allowed, + housekeeping_cpumask(HK_FLAG_DOMAIN)))) continue; if (is_sched_load_balance(cp)) @@ -772,7 +769,7 @@ static int generate_sched_domains(cpumask_var_t **domains, if (apn == b->pn) { cpumask_or(dp, dp, b->effective_cpus); - cpumask_and(dp, dp, non_isolated_cpus); + cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN)); if (dattr) update_domain_attr_tree(dattr + nslot, b); @@ -785,7 +782,6 @@ static int generate_sched_domains(cpumask_var_t **domains, BUG_ON(nslot != ndoms); done: - free_cpumask_var(non_isolated_cpus); kfree(csa); /* diff --git a/kernel/housekeeping.c b/kernel/housekeeping.c index b2006bd..5f0e7ec 100644 --- a/kernel/housekeeping.c +++ b/kernel/housekeeping.c @@ -58,32 +58,67 @@ void __init housekeeping_init(void) WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); } -#ifdef CONFIG_NO_HZ_FULL -static int __init housekeeping_nohz_full_setup(char *str) +static int __init housekeeping_setup(char *str, enum hk_flags flags) { cpumask_var_t non_housekeeping_mask; alloc_bootmem_cpumask_var(&non_housekeeping_mask); if (cpulist_parse(str, non_housekeeping_mask) < 0) { - pr_warn("Housekeeping: Incorrect nohz_full cpumask\n"); + pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n"); free_bootmem_cpumask_var(non_housekeeping_mask); return 0; } - alloc_bootmem_cpumask_var(&housekeeping_mask); - cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask); + if (!housekeeping_flags) { + alloc_bootmem_cpumask_var(&housekeeping_mask); + cpumask_andnot(housekeeping_mask, + cpu_possible_mask, non_housekeeping_mask); + if (cpumask_empty(housekeeping_mask)) + cpumask_set_cpu(smp_processor_id(), housekeeping_mask); + } else { + cpumask_var_t tmp; - if (cpumask_empty(housekeeping_mask)) - cpumask_set_cpu(smp_processor_id(), housekeeping_mask); + alloc_bootmem_cpumask_var(&tmp); + cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask); + if (!cpumask_equal(tmp, housekeeping_mask)) { + pr_warn("Housekeeping: nohz_full= must match isolcpus=\n"); + free_bootmem_cpumask_var(tmp); + free_bootmem_cpumask_var(non_housekeeping_mask); + return 0; + } + free_bootmem_cpumask_var(tmp); + } - housekeeping_flags = HK_FLAG_TICK | HK_FLAG_TIMER | - HK_FLAG_RCU | HK_FLAG_MISC; + if ((flags & HK_FLAG_TICK) && !(housekeeping_flags & HK_FLAG_TICK)) { + if (IS_ENABLED(CONFIG_NO_HZ_FULL)) { + tick_nohz_full_setup(non_housekeeping_mask); + } else { + pr_warn("Housekeeping: nohz unsupported." + " Build with CONFIG_NO_HZ_FULL\n"); + free_bootmem_cpumask_var(non_housekeeping_mask); + return 0; + } + } - tick_nohz_full_setup(non_housekeeping_mask); + housekeeping_flags |= flags; free_bootmem_cpumask_var(non_housekeeping_mask); return 1; } + +static int __init housekeeping_nohz_full_setup(char *str) +{ + unsigned int flags; + + flags = HK_FLAG_TICK | HK_FLAG_TIMER | HK_FLAG_RCU | HK_FLAG_MISC; + + return housekeeping_setup(str, flags); +} __setup("nohz_full=", housekeeping_nohz_full_setup); -#endif + +static int __init housekeeping_isolcpus_setup(char *str) +{ + return housekeeping_setup(str, HK_FLAG_DOMAIN); +} +__setup("isolcpus=", housekeeping_isolcpus_setup); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 877c85d..de20aa1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -84,9 +84,6 @@ __read_mostly int scheduler_running; */ int sysctl_sched_rt_runtime = 950000; -/* CPUs with isolated domains */ -cpumask_var_t cpu_isolated_map; - /* * __task_rq_lock - lock the rq @p resides on. */ @@ -5672,10 +5669,6 @@ static inline void sched_init_smt(void) { } void __init sched_init_smp(void) { - cpumask_var_t non_isolated_cpus; - - alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); - sched_init_numa(); /* @@ -5685,16 +5678,12 @@ void __init sched_init_smp(void) */ mutex_lock(&sched_domains_mutex); sched_init_domains(cpu_active_mask); - cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); - if (cpumask_empty(non_isolated_cpus)) - cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); mutex_unlock(&sched_domains_mutex); /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0) + if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) BUG(); sched_init_granularity(); - free_cpumask_var(non_isolated_cpus); init_sched_rt_class(); init_sched_dl_class(); @@ -5898,9 +5887,6 @@ void __init sched_init(void) calc_load_update = jiffies + LOAD_FREQ; #ifdef CONFIG_SMP - /* May be allocated at isolcpus cmdline parse time */ - if (cpu_isolated_map == NULL) - zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); idle_thread_set_boot_cpu(); set_cpu_rq_start_time(smp_processor_id()); #endif diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index bd8b6d6..1e03138 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -3,6 +3,7 @@ */ #include #include +#include #include "sched.h" @@ -466,21 +467,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) update_top_cache_domain(cpu); } -/* Setup the mask of CPUs configured for isolated domains */ -static int __init isolated_cpu_setup(char *str) -{ - int ret; - - alloc_bootmem_cpumask_var(&cpu_isolated_map); - ret = cpulist_parse(str, cpu_isolated_map); - if (ret) { - pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids); - return 0; - } - return 1; -} -__setup("isolcpus=", isolated_cpu_setup); - struct s_data { struct sched_domain ** __percpu sd; struct root_domain *rd; @@ -1775,7 +1761,7 @@ int sched_init_domains(const struct cpumask *cpu_map) doms_cur = alloc_sched_domains(ndoms_cur); if (!doms_cur) doms_cur = &fallback_doms; - cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); + cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN)); err = build_sched_domains(doms_cur[0], NULL); register_sched_domain_sysctl(); @@ -1871,7 +1857,8 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], if (doms_new == NULL) { n = 0; doms_new = &fallback_doms; - cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); + cpumask_and(doms_new[0], cpu_active_mask, + housekeeping_cpumask(HK_FLAG_DOMAIN)); WARN_ON_ONCE(dattr_new); } -- 2.7.4