Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S965661AbcLVPPl (ORCPT ); Thu, 22 Dec 2016 10:15:41 -0500 Received: from proxmox.maurer-it.com ([212.186.127.180]:56433 "EHLO proxmox.maurer-it.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757556AbcLVPPj (ORCPT ); Thu, 22 Dec 2016 10:15:39 -0500 X-Greylist: delayed 461 seconds by postgrey-1.27 at vger.kernel.org; Thu, 22 Dec 2016 10:15:38 EST From: Wolfgang Bumiller To: Li Zefan Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, lxc-devel@lists.linuxcontainers.org, Serge Hallyn Subject: [RFC PATCH] cgroup, cpuset: add cpuset.remap_cpus Date: Thu, 22 Dec 2016 16:07:51 +0100 Message-Id: <1482419271-15410-1-git-send-email-w.bumiller@proxmox.com> X-Mailer: git-send-email 2.1.4 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5729 Lines: 183 Changes a cpuset, recursively remapping all its descendants to the new range. Signed-off-by: Wolfgang Bumiller --- Currently once a cpuset cgroup has a subdirectory it's impossible to remove cpu without manually recursing through the cgroup file system. The problem gets worse if you want to remap cpus of a larger subtree. This is particularly useful with containers and problematic in that the recursion might race against the creation of new subdirectories. I'm not sure why this functionality isn't there yet and thought I'd give it a try and send an RFC patch. I'm sure there's a reason though, given how surprisingly small/simple the patch turned out to be and I'm rarely the first to think of a feature like that ;-) I hope this is something we could add one way or another, if possible required changes to the patch are within the scope of my abilities. include/linux/cpumask.h | 17 ++++++++++++++++ kernel/cpuset.c | 54 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 59915ea..f5487c8 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -514,6 +514,23 @@ static inline void cpumask_copy(struct cpumask *dstp, } /** + * cpumask_remap - *dstp = map(old, new)(*srcp) + * @dstp: the result + * @srcp: the input cpumask + * @oldp: the old mask + * @newp: the new mask + */ +static inline void cpumask_remap(struct cpumask *dstp, + const struct cpumask *srcp, + const struct cpumask *oldp, + const struct cpumask *newp) +{ + bitmap_remap(cpumask_bits(dstp), cpumask_bits(srcp), + cpumask_bits(oldp), cpumask_bits(newp), + nr_cpumask_bits); +} + +/** * cpumask_any - pick a "random" cpu from *srcp * @srcp: the input cpumask * diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 02a8ea5..22d0cb2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -450,7 +450,8 @@ static void free_trial_cpuset(struct cpuset *trial) * Return 0 if valid, -errno if not. */ -static int validate_change(struct cpuset *cur, struct cpuset *trial) +static int validate_change(struct cpuset *cur, struct cpuset *trial, + bool remap) { struct cgroup_subsys_state *css; struct cpuset *c, *par; @@ -458,11 +459,13 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) rcu_read_lock(); - /* Each of our child cpusets must be a subset of us */ - ret = -EBUSY; - cpuset_for_each_child(c, css, cur) - if (!is_cpuset_subset(c, trial)) - goto out; + if (!remap) { + /* Each of our child cpusets must be a subset of us */ + ret = -EBUSY; + cpuset_for_each_child(c, css, cur) + if (!is_cpuset_subset(c, trial)) + goto out; + } /* Remaining checks don't apply to root cpuset */ ret = 0; @@ -925,11 +928,15 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) * @cs: the cpuset to consider * @trialcs: trial cpuset * @buf: buffer of cpu numbers written to this cpuset + * @remap: recursively remap all child nodes */ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, - const char *buf) + const char *buf, bool remap) { int retval; + struct cpuset *cp; + struct cgroup_subsys_state *pos_css; + struct cpumask tempmask; /* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */ if (cs == &top_cpuset) @@ -957,11 +964,25 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed)) return 0; - retval = validate_change(cs, trialcs); + retval = validate_change(cs, trialcs, remap); if (retval < 0) return retval; spin_lock_irq(&callback_lock); + if (remap) { + rcu_read_lock(); + cpuset_for_each_descendant_pre(cp, pos_css, cs) { + /* skip empty subtrees */ + if (cpumask_empty(cp->cpus_allowed)) { + pos_css = css_rightmost_descendant(pos_css); + continue; + } + cpumask_copy(&tempmask, cp->cpus_allowed); + cpumask_remap(cp->cpus_allowed, &tempmask, + cs->cpus_allowed, trialcs->cpus_allowed); + } + rcu_read_unlock(); + } cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); spin_unlock_irq(&callback_lock); @@ -1217,7 +1238,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, retval = 0; /* Too easy - nothing to do */ goto done; } - retval = validate_change(cs, trialcs); + retval = validate_change(cs, trialcs, false); if (retval < 0) goto done; @@ -1304,7 +1325,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, else clear_bit(bit, &trialcs->flags); - err = validate_change(cs, trialcs); + err = validate_change(cs, trialcs, false); if (err < 0) goto out; @@ -1563,6 +1584,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) typedef enum { FILE_MEMORY_MIGRATE, FILE_CPULIST, + FILE_REMAP_CPULIST, FILE_MEMLIST, FILE_EFFECTIVE_CPULIST, FILE_EFFECTIVE_MEMLIST, @@ -1695,7 +1717,10 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, switch (of_cft(of)->private) { case FILE_CPULIST: - retval = update_cpumask(cs, trialcs, buf); + retval = update_cpumask(cs, trialcs, buf, false); + break; + case FILE_REMAP_CPULIST: + retval = update_cpumask(cs, trialcs, buf, true); break; case FILE_MEMLIST: retval = update_nodemask(cs, trialcs, buf); @@ -1811,6 +1836,13 @@ static struct cftype files[] = { }, { + .name = "remap_cpus", + .write = cpuset_write_resmask, + .max_write_len = (100U + 6 * NR_CPUS), + .private = FILE_REMAP_CPULIST, + }, + + { .name = "mems", .seq_show = cpuset_common_seq_show, .write = cpuset_write_resmask, -- 2.1.4