2017-08-15 17:27:33

by Waiman Long

[permalink] [raw]
Subject: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

Cpuset v2 has some valuable attributes that are not present in
v1 because of backward compatibility concern. One of that is the
restoration of the original cpu and memory node mask after a hot
removal and addition event sequence.

This patch adds a new kernel command line option "cpuset_v2_mode=" to
allow cpuset to have v2 behavior in a v1 cgroup when using a non-zero
value. This enables users to optionally enable cpuset v2 behavior if
they choose to.

Signed-off-by: Waiman Long <[email protected]>
---
Documentation/admin-guide/kernel-parameters.txt | 7 ++++
kernel/cgroup/cpuset.c | 46 ++++++++++++++++++-------
2 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 372cc66..7abca11 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -668,6 +668,13 @@
on every CPU online, such as boot, and resume from suspend.
Default: 10000

+ cpuset_v2_mode= [KNL] Enable cpuset v2 behavior in cpuset v1 cgroups.
+ In v2 mode, the cpus and mems can be restored to
+ their original values after a removal-addition
+ event sequence.
+ 0: default value, cpuset v1 keeps legacy behavior.
+ 1: cpuset v1 behaves like cpuset v2.
+
cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
Format:
<first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 9ed6a05..7e56383 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -300,6 +300,17 @@ static inline int is_spread_slab(const struct cpuset *cs)
static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);

/*
+ * Flag to make cpuset v1 behaves like v2 so that cpus and mems can be
+ * restored back to their original values after an offline-online sequence.
+ */
+static bool cpuset_v2_mode __read_mostly;
+
+static inline bool is_in_v2_mode(void)
+{
+ return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) || cpuset_v2_mode;
+}
+
+/*
* This is ugly, but preserves the userspace API for existing cpuset
* users. If someone tries to mount the "cpuset" filesystem, we
* silently switch it to mount "cgroup" instead
@@ -489,8 +500,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)

/* On legacy hiearchy, we must be a subset of our parent cpuset. */
ret = -EACCES;
- if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
- !is_cpuset_subset(trial, par))
+ if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
goto out;

/*
@@ -903,8 +913,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
* If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some CPUs.
*/
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
- cpumask_empty(new_cpus))
+ if (is_in_v2_mode() && cpumask_empty(new_cpus))
cpumask_copy(new_cpus, parent->effective_cpus);

/* Skip the whole subtree if the cpumask remains the same. */
@@ -921,7 +930,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
cpumask_copy(cp->effective_cpus, new_cpus);
spin_unlock_irq(&callback_lock);

- WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+ WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));

update_tasks_cpumask(cp);
@@ -1157,8 +1166,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
* If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some MEMs.
*/
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
- nodes_empty(*new_mems))
+ if (is_in_v2_mode() && nodes_empty(*new_mems))
*new_mems = parent->effective_mems;

/* Skip the whole subtree if the nodemask remains the same. */
@@ -1175,7 +1183,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
cp->effective_mems = *new_mems;
spin_unlock_irq(&callback_lock);

- WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+ WARN_ON(!is_in_v2_mode() &&
!nodes_equal(cp->mems_allowed, cp->effective_mems));

update_tasks_nodemask(cp);
@@ -1467,7 +1475,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)

/* allow moving tasks into an empty cpuset if on default hierarchy */
ret = -ENOSPC;
- if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
+ if (!is_in_v2_mode() &&
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock;

@@ -1985,7 +1993,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cpuset_inc();

spin_lock_irq(&callback_lock);
- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
+ if (is_in_v2_mode()) {
cpumask_copy(cs->effective_cpus, parent->effective_cpus);
cs->effective_mems = parent->effective_mems;
}
@@ -2062,7 +2070,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
mutex_lock(&cpuset_mutex);
spin_lock_irq(&callback_lock);

- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
+ if (is_in_v2_mode()) {
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
top_cpuset.mems_allowed = node_possible_map;
} else {
@@ -2135,6 +2143,18 @@ int __init cpuset_init(void)
return 0;
}

+static int __init set_v2_mode(char *str)
+{
+ ssize_t ret, val;
+
+ if (!str)
+ return 0;
+ ret = kstrtol(str, 0, &val);
+ cpuset_v2_mode = !!val;
+ return ret;
+}
+__setup("cpuset_v2_mode=", set_v2_mode);
+
/*
* If CPU and/or memory hotplug handlers, below, unplug any CPUs
* or memory nodes, we need to walk over the cpuset hierarchy,
@@ -2256,7 +2276,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs)
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
mems_updated = !nodes_equal(new_mems, cs->effective_mems);

- if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
+ if (is_in_v2_mode())
hotplug_update_tasks(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated);
else
@@ -2287,7 +2307,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
static cpumask_t new_cpus;
static nodemask_t new_mems;
bool cpus_updated, mems_updated;
- bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys);
+ bool on_dfl = is_in_v2_mode();

mutex_lock(&cpuset_mutex);

--
1.8.3.1


2017-08-16 01:20:57

by Zefan Li

[permalink] [raw]
Subject: Re: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

On 2017/8/16 1:27, Waiman Long wrote:
> Cpuset v2 has some valuable attributes that are not present in
> v1 because of backward compatibility concern. One of that is the
> restoration of the original cpu and memory node mask after a hot
> removal and addition event sequence.
>
> This patch adds a new kernel command line option "cpuset_v2_mode=" to
> allow cpuset to have v2 behavior in a v1 cgroup when using a non-zero
> value. This enables users to optionally enable cpuset v2 behavior if
> they choose to.
>
> Signed-off-by: Waiman Long <[email protected]>

I have no strong objection. I have received quite a few requests for
this in the past, and some were from other departments in my company.

2017-08-16 14:29:33

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

Hello, Waiman.

On Tue, Aug 15, 2017 at 01:27:20PM -0400, Waiman Long wrote:
> + cpuset_v2_mode= [KNL] Enable cpuset v2 behavior in cpuset v1 cgroups.
> + In v2 mode, the cpus and mems can be restored to
> + their original values after a removal-addition
> + event sequence.
> + 0: default value, cpuset v1 keeps legacy behavior.
> + 1: cpuset v1 behaves like cpuset v2.
> +

It feels weird to make this a kernel boot param when all other options
are specified on mount time. Is there a reason why this can't be a
mount option too?

Thanks.

--
tejun

2017-08-16 14:34:08

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

On 08/16/2017 10:29 AM, Tejun Heo wrote:
> Hello, Waiman.
>
> On Tue, Aug 15, 2017 at 01:27:20PM -0400, Waiman Long wrote:
>> + cpuset_v2_mode= [KNL] Enable cpuset v2 behavior in cpuset v1 cgroups.
>> + In v2 mode, the cpus and mems can be restored to
>> + their original values after a removal-addition
>> + event sequence.
>> + 0: default value, cpuset v1 keeps legacy behavior.
>> + 1: cpuset v1 behaves like cpuset v2.
>> +
> It feels weird to make this a kernel boot param when all other options
> are specified on mount time. Is there a reason why this can't be a
> mount option too?
>
> Thanks.
>
Yes, we can certainly make this a mount option instead of a boot time
parameter. BTW, where do we usually document the mount options for cgroup?

Cheers,
Longman

2017-08-16 14:36:33

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

Hello,

On Wed, Aug 16, 2017 at 10:34:05AM -0400, Waiman Long wrote:
> > It feels weird to make this a kernel boot param when all other options
> > are specified on mount time. Is there a reason why this can't be a
> > mount option too?
> >
> Yes, we can certainly make this a mount option instead of a boot time
> parameter. BTW, where do we usually document the mount options for cgroup?

I don't think there's a central place. Each controller documents
theirs in their own file.

Thanks.

--
tejun

2017-08-16 14:39:25

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

On 08/16/2017 10:36 AM, Tejun Heo wrote:
> Hello,
>
> On Wed, Aug 16, 2017 at 10:34:05AM -0400, Waiman Long wrote:
>>> It feels weird to make this a kernel boot param when all other options
>>> are specified on mount time. Is there a reason why this can't be a
>>> mount option too?
>>>
>> Yes, we can certainly make this a mount option instead of a boot time
>> parameter. BTW, where do we usually document the mount options for cgroup?
> I don't think there's a central place. Each controller documents
> theirs in their own file.
>
> Thanks.
>
OK, I am going to update the patch by controlling cpuset behavior by
mount option instead.

Thanks,
Longman

2017-08-17 21:06:30

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

Hi Waiman,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.13-rc5 next-20170817]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Waiman-Long/cpuset-Allow-v2-behavior-in-v1-cgroup/20170818-040416
config: i386-randconfig-n0-201733 (attached as .config)
compiler: gcc-4.8 (Debian 4.8.4-1) 4.8.4
reproduce:
# save the attached .config to linux build tree
make ARCH=i386

All warnings (new ones prefixed by >>):

kernel/cgroup/cpuset.c: In function 'set_v2_mode':
>> kernel/cgroup/cpuset.c:2145:2: warning: passing argument 3 of 'kstrtol' from incompatible pointer type [enabled by default]
ret = kstrtol(str, 0, &val);
^
In file included from include/linux/list.h:8:0,
from include/linux/kobject.h:20,
from include/linux/device.h:17,
from include/linux/node.h:17,
from include/linux/cpu.h:16,
from kernel/cgroup/cpuset.c:25:
include/linux/kernel.h:332:32: note: expected 'long int *' but argument is of type 'ssize_t *'
static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
^

vim +/kstrtol +2145 kernel/cgroup/cpuset.c

2138
2139 static int __init set_v2_mode(char *str)
2140 {
2141 ssize_t ret, val;
2142
2143 if (!str)
2144 return 0;
> 2145 ret = kstrtol(str, 0, &val);
2146 cpuset_v2_mode = !!val;
2147 return ret;
2148 }
2149 __setup("cpuset_v2_mode=", set_v2_mode);
2150

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.76 kB)
.config.gz (28.60 kB)
Download all attachments

2017-08-17 21:14:12

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] cpuset: Allow v2 behavior in v1 cgroup

Hi Waiman,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.13-rc5 next-20170817]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Waiman-Long/cpuset-Allow-v2-behavior-in-v1-cgroup/20170818-040416
config: i386-defconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386

All errors (new ones prefixed by >>):

kernel/cgroup/cpuset.c: In function 'set_v2_mode':
>> kernel/cgroup/cpuset.c:2145:24: error: passing argument 3 of 'kstrtol' from incompatible pointer type [-Werror=incompatible-pointer-types]
ret = kstrtol(str, 0, &val);
^
In file included from include/linux/list.h:8:0,
from include/linux/kobject.h:20,
from include/linux/device.h:17,
from include/linux/node.h:17,
from include/linux/cpu.h:16,
from kernel/cgroup/cpuset.c:25:
include/linux/kernel.h:332:32: note: expected 'long int *' but argument is of type 'ssize_t * {aka int *}'
static inline int __must_check kstrtol(const char *s, unsigned int base, long *res)
^~~~~~~
cc1: some warnings being treated as errors

vim +/kstrtol +2145 kernel/cgroup/cpuset.c

2138
2139 static int __init set_v2_mode(char *str)
2140 {
2141 ssize_t ret, val;
2142
2143 if (!str)
2144 return 0;
> 2145 ret = kstrtol(str, 0, &val);
2146 cpuset_v2_mode = !!val;
2147 return ret;
2148 }
2149 __setup("cpuset_v2_mode=", set_v2_mode);
2150

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (1.85 kB)
.config.gz (25.80 kB)
Download all attachments