2020-03-26 19:18:07

by Joel Fernandes

[permalink] [raw]
Subject: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

This deliberately changes the behavior of the per-cpuset
cpus file to not be effected by hotplug. When a cpu is offlined,
it will be removed from the cpuset/cpus file. When a cpu is onlined,
if the cpuset originally requested that that cpu was part of the cpuset,
that cpu will be restored to the cpuset. The cpus files still
have to be hierachical, but the ranges no longer have to be out of
the currently online cpus, just the physically present cpus.

To show the problem:
# echo '1-3' > cpuset.cpus
# cat cpuset.cpus
1-3
# echo 0 > /sys/devices/system/cpu/cpu2/online
# cat cpuset.cpus
1,3
# echo 1 > /sys/devices/system/cpu/cpu2/online
# cat cpuset.cpus
1,3

With patch, the last command outputs:
# cat cpuset.cpus
1-3

Cc: Dmitry Shmidt <[email protected]>
Cc: Amit Pundir <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Guenter Roeck <[email protected]>
Cc: Waiman Long <[email protected]>
Cc: Greg Kerr <[email protected]>
(Original idea from Riley Andrews <[email protected]> who has since
left Google).
(Joel: Forward ported from Android and ChromeOS trees to upstream,
adjusted slightly to handle the scheduling partitions work.)
Signed-off-by: Joel Fernandes (Google) <[email protected]>

---
This patch is in various kernel trees for > 3 years. Atleast 3
organizations using Linux need this patch to handle hotplug: Google's
Android and ChromeOS, DigitalOcean.

kernel/cgroup/cpuset.c | 45 +++++++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 58f5073acff7d..5eb1fb613d0a6 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -105,6 +105,7 @@ struct cpuset {

/* user-configured CPUs and Memory Nodes allow to tasks */
cpumask_var_t cpus_allowed;
+ cpumask_var_t cpus_requested;
nodemask_t mems_allowed;

/* effective CPUs and Memory Nodes allow to tasks */
@@ -443,7 +444,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,

static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
{
- return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+ return cpumask_subset(p->cpus_requested, q->cpus_requested) &&
nodes_subset(p->mems_allowed, q->mems_allowed) &&
is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
is_mem_exclusive(p) <= is_mem_exclusive(q);
@@ -459,12 +460,13 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
*/
static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
{
- cpumask_var_t *pmask1, *pmask2, *pmask3;
+ cpumask_var_t *pmask1, *pmask2, *pmask3, *pmask4;

if (cs) {
pmask1 = &cs->cpus_allowed;
pmask2 = &cs->effective_cpus;
pmask3 = &cs->subparts_cpus;
+ pmask4 = &cs->cpus_requested;
} else {
pmask1 = &tmp->new_cpus;
pmask2 = &tmp->addmask;
@@ -480,8 +482,13 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
if (!zalloc_cpumask_var(pmask3, GFP_KERNEL))
goto free_two;

+ if (cs && !zalloc_cpumask_var(pmask4, GFP_KERNEL))
+ goto free_three;
+
return 0;

+free_three:
+ free_cpumask_var(*pmask3);
free_two:
free_cpumask_var(*pmask2);
free_one:
@@ -498,6 +505,7 @@ static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
{
if (cs) {
free_cpumask_var(cs->cpus_allowed);
+ free_cpumask_var(cs->cpus_requested);
free_cpumask_var(cs->effective_cpus);
free_cpumask_var(cs->subparts_cpus);
}
@@ -526,6 +534,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
}

cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
+ cpumask_copy(trial->cpus_requested, cs->cpus_requested);
cpumask_copy(trial->effective_cpus, cs->effective_cpus);
return trial;
}
@@ -594,7 +603,8 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
cpuset_for_each_child(c, css, par) {
if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
c != cur &&
- cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+ cpumask_intersects(trial->cpus_requested,
+ c->cpus_requested))
goto out;
if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
c != cur &&
@@ -1056,10 +1066,11 @@ static void compute_effective_cpumask(struct cpumask *new_cpus,
if (parent->nr_subparts_cpus) {
cpumask_or(new_cpus, parent->effective_cpus,
parent->subparts_cpus);
- cpumask_and(new_cpus, new_cpus, cs->cpus_allowed);
+ cpumask_and(new_cpus, new_cpus, cs->cpus_requested);
cpumask_and(new_cpus, new_cpus, cpu_active_mask);
} else {
- cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus);
+ cpumask_and(new_cpus, cs->cpus_requested,
+ parent->effective_cpus);
}
}

@@ -1482,27 +1493,29 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
return -EACCES;

/*
- * An empty cpus_allowed is ok only if the cpuset has no tasks.
+ * An empty cpus_requested is ok only if the cpuset has no tasks.
* Since cpulist_parse() fails on an empty mask, we special case
* that parsing. The validate_change() call ensures that cpusets
* with tasks have cpus.
*/
if (!*buf) {
- cpumask_clear(trialcs->cpus_allowed);
+ cpumask_clear(trialcs->cpus_requested);
} else {
- retval = cpulist_parse(buf, trialcs->cpus_allowed);
+ retval = cpulist_parse(buf, trialcs->cpus_requested);
if (retval < 0)
return retval;
-
- if (!cpumask_subset(trialcs->cpus_allowed,
- top_cpuset.cpus_allowed))
- return -EINVAL;
}

+ if (!cpumask_subset(trialcs->cpus_requested, top_cpuset.cpus_requested))
+ return -EINVAL;
+
/* Nothing to do if the cpus didn't change */
- if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+ if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
return 0;

+ cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested,
+ cpu_active_mask);
+
retval = validate_change(cs, trialcs);
if (retval < 0)
return retval;
@@ -1528,6 +1541,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,

spin_lock_irq(&callback_lock);
cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+ cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);

/*
* Make sure that subparts_cpus is a subset of cpus_allowed.
@@ -2409,7 +2423,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)

switch (type) {
case FILE_CPULIST:
- seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed));
+ seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
break;
case FILE_MEMLIST:
seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
@@ -2778,6 +2792,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cs->mems_allowed = parent->mems_allowed;
cs->effective_mems = parent->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+ cpumask_copy(cs->cpus_requested, parent->cpus_requested);
cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
spin_unlock_irq(&callback_lock);
out_unlock:
@@ -2892,10 +2907,12 @@ int __init cpuset_init(void)
BUG_ON(percpu_init_rwsem(&cpuset_rwsem));

BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
+ BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));

cpumask_setall(top_cpuset.cpus_allowed);
+ cpumask_setall(top_cpuset.cpus_requested);
nodes_setall(top_cpuset.mems_allowed);
cpumask_setall(top_cpuset.effective_cpus);
nodes_setall(top_cpuset.effective_mems);
--
2.25.1.696.g5e7596f4ac-goog


2020-03-26 19:21:33

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
> This deliberately changes the behavior of the per-cpuset
> cpus file to not be effected by hotplug. When a cpu is offlined,
> it will be removed from the cpuset/cpus file. When a cpu is onlined,
> if the cpuset originally requested that that cpu was part of the cpuset,
> that cpu will be restored to the cpuset. The cpus files still
> have to be hierachical, but the ranges no longer have to be out of
> the currently online cpus, just the physically present cpus.

This is already the behavior on cgroup2 and I don't think we want to
introduce this big a behavior change to cgroup1 cpuset at this point.

Thanks.

--
tejun

2020-03-26 19:45:29

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

Hi Tejun,

On Thu, Mar 26, 2020 at 03:20:35PM -0400, Tejun Heo wrote:
> On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
> > This deliberately changes the behavior of the per-cpuset
> > cpus file to not be effected by hotplug. When a cpu is offlined,
> > it will be removed from the cpuset/cpus file. When a cpu is onlined,
> > if the cpuset originally requested that that cpu was part of the cpuset,
> > that cpu will be restored to the cpuset. The cpus files still
> > have to be hierachical, but the ranges no longer have to be out of
> > the currently online cpus, just the physically present cpus.
>
> This is already the behavior on cgroup2 and I don't think we want to
> introduce this big a behavior change to cgroup1 cpuset at this point.

It is not really that big a change. Please go over the patch, we are not
changing anything with how ->cpus_allowed works and interacts with the rest
of the system and the scheduler. We have just introduced a new mask to keep
track of which CPUs were requested without them being affected by hotplug. On
CPU onlining, we restore the state of ->cpus_allowed as not be affected by
hotplug.

There's 3 companies that have this issue so that should tell you something.
We don't want to carry this patch forever. Many people consider the hotplug
behavior to be completely broken.

thanks,

- Joel

2020-03-26 19:50:01

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Thu, Mar 26, 2020 at 03:44:48PM -0400, Joel Fernandes wrote:
> It is not really that big a change. Please go over the patch, we are not
> changing anything with how ->cpus_allowed works and interacts with the rest
> of the system and the scheduler. We have just introduced a new mask to keep
> track of which CPUs were requested without them being affected by hotplug. On
> CPU onlining, we restore the state of ->cpus_allowed as not be affected by
> hotplug.

It's not the code. It's the behavior. I'm not flipping the behavior for
the existing cgroup1 users underneath them at this point. As-is, it's a
hard nack. If you really really really want it, put it behind a mount
option.

--
tejun

2020-03-26 19:59:41

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On 3/26/20 3:44 PM, Joel Fernandes wrote:
> Hi Tejun,
>
> On Thu, Mar 26, 2020 at 03:20:35PM -0400, Tejun Heo wrote:
>> On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
>>> This deliberately changes the behavior of the per-cpuset
>>> cpus file to not be effected by hotplug. When a cpu is offlined,
>>> it will be removed from the cpuset/cpus file. When a cpu is onlined,
>>> if the cpuset originally requested that that cpu was part of the cpuset,
>>> that cpu will be restored to the cpuset. The cpus files still
>>> have to be hierachical, but the ranges no longer have to be out of
>>> the currently online cpus, just the physically present cpus.
>> This is already the behavior on cgroup2 and I don't think we want to
>> introduce this big a behavior change to cgroup1 cpuset at this point.
> It is not really that big a change. Please go over the patch, we are not
> changing anything with how ->cpus_allowed works and interacts with the rest
> of the system and the scheduler. We have just introduced a new mask to keep
> track of which CPUs were requested without them being affected by hotplug. On
> CPU onlining, we restore the state of ->cpus_allowed as not be affected by
> hotplug.
>
> There's 3 companies that have this issue so that should tell you something.
> We don't want to carry this patch forever. Many people consider the hotplug
> behavior to be completely broken.
>
I think Tejun is concerned about a change in the default behavior of
cpuset v1.

There is a special v2 mode for cpuset that is enabled by the mount
option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
v2 behavior. I introduced this v2 mode a while back to address, I think,
a similar concern. Could you try that to see if it is able to address
your problem? If not, you can make some code adjustment within the
framework of the v2 mode. As long as it is an opt-in, I think we are
open to further change.

Cheers,
Longman

2020-03-26 20:06:10

by Sonny Rao

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Thu, Mar 26, 2020 at 12:57 PM Waiman Long <[email protected]> wrote:
>
> On 3/26/20 3:44 PM, Joel Fernandes wrote:
> > Hi Tejun,
> >
> > On Thu, Mar 26, 2020 at 03:20:35PM -0400, Tejun Heo wrote:
> >> On Thu, Mar 26, 2020 at 03:16:23PM -0400, Joel Fernandes (Google) wrote:
> >>> This deliberately changes the behavior of the per-cpuset
> >>> cpus file to not be effected by hotplug. When a cpu is offlined,
> >>> it will be removed from the cpuset/cpus file. When a cpu is onlined,
> >>> if the cpuset originally requested that that cpu was part of the cpuset,
> >>> that cpu will be restored to the cpuset. The cpus files still
> >>> have to be hierachical, but the ranges no longer have to be out of
> >>> the currently online cpus, just the physically present cpus.
> >> This is already the behavior on cgroup2 and I don't think we want to
> >> introduce this big a behavior change to cgroup1 cpuset at this point.
> > It is not really that big a change. Please go over the patch, we are not
> > changing anything with how ->cpus_allowed works and interacts with the rest
> > of the system and the scheduler. We have just introduced a new mask to keep
> > track of which CPUs were requested without them being affected by hotplug. On
> > CPU onlining, we restore the state of ->cpus_allowed as not be affected by
> > hotplug.
> >
> > There's 3 companies that have this issue so that should tell you something.
> > We don't want to carry this patch forever. Many people consider the hotplug
> > behavior to be completely broken.
> >
> I think Tejun is concerned about a change in the default behavior of
> cpuset v1.
>
> There is a special v2 mode for cpuset that is enabled by the mount
> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> v2 behavior. I introduced this v2 mode a while back to address, I think,
> a similar concern. Could you try that to see if it is able to address
> your problem? If not, you can make some code adjustment within the
> framework of the v2 mode. As long as it is an opt-in, I think we are
> open to further change.

I am surprised if anyone actually wants this behavior, we (Chrome OS)
found out about it accidentally, and then found that Android had been
carrying a patch to fix it. And if it were a desirable behavior then
why isn't it an option in v2?

>
> Cheers,
> Longman
>

2020-03-26 20:19:51

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Thu, Mar 26, 2020 at 01:05:04PM -0700, Sonny Rao wrote:
> I am surprised if anyone actually wants this behavior, we (Chrome OS)

The behavior is silly but consistent in that it doesn't allow empty active
cpusets and it has been like that for many many years now.

> found out about it accidentally, and then found that Android had been
> carrying a patch to fix it. And if it were a desirable behavior then
> why isn't it an option in v2?

Nobody is saying it's a good behavior (hence the change in cgroup2) and there
are situations where changing things like this is justifiable, but, here:

* The proposed change makes the interface inconsistent and does so
unconditionally on what is now a mostly legacy interface.

* There already is a newer version of the interface which includes the
desired behavior.

* I forgot but as Waiman pointed out, you can even opt-in to the new
behavior, which is more comprehensive without the inconsitencies,
while staying on cgroup1.

Thanks.

--
tejun

2020-03-26 20:24:45

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Thu, Mar 26, 2020 at 04:18:59PM -0400, Tejun Heo wrote:
> On Thu, Mar 26, 2020 at 01:05:04PM -0700, Sonny Rao wrote:
> > I am surprised if anyone actually wants this behavior, we (Chrome OS)
>
> The behavior is silly but consistent in that it doesn't allow empty active
> cpusets and it has been like that for many many years now.
>
> > found out about it accidentally, and then found that Android had been
> > carrying a patch to fix it. And if it were a desirable behavior then
> > why isn't it an option in v2?
>
> Nobody is saying it's a good behavior (hence the change in cgroup2) and there
> are situations where changing things like this is justifiable, but, here:
>
> * The proposed change makes the interface inconsistent and does so
> unconditionally on what is now a mostly legacy interface.
>
> * There already is a newer version of the interface which includes the
> desired behavior.
>
> * I forgot but as Waiman pointed out, you can even opt-in to the new
> behavior, which is more comprehensive without the inconsitencies,
> while staying on cgroup1.

Thank you Tejun, Waiman and Sonny. I confirmed the cgroup_v2_mode option
fixes cgroup v1's broken behavior.

We will use this mount option on our systems to fix it.

thanks,

- Joel

2020-03-26 21:48:27

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On 3/26/20 4:05 PM, Sonny Rao wrote:
>> I think Tejun is concerned about a change in the default behavior of
>> cpuset v1.
>>
>> There is a special v2 mode for cpuset that is enabled by the mount
>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
>> v2 behavior. I introduced this v2 mode a while back to address, I think,
>> a similar concern. Could you try that to see if it is able to address
>> your problem? If not, you can make some code adjustment within the
>> framework of the v2 mode. As long as it is an opt-in, I think we are
>> open to further change.
> I am surprised if anyone actually wants this behavior, we (Chrome OS)
> found out about it accidentally, and then found that Android had been
> carrying a patch to fix it. And if it were a desirable behavior then
> why isn't it an option in v2?
>
I am a bit confused. The v2 mode make cpuset v1 behaves more like cpuset
v2. The original v1 behavior has some obvious issue that was fixed in
v2. So what v2 option are you talking about?

Regards,
Longman

2020-03-26 22:04:40

by Sonny Rao

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Thu, Mar 26, 2020 at 2:47 PM Waiman Long <[email protected]> wrote:
>
> On 3/26/20 4:05 PM, Sonny Rao wrote:
> >> I think Tejun is concerned about a change in the default behavior of
> >> cpuset v1.
> >>
> >> There is a special v2 mode for cpuset that is enabled by the mount
> >> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> >> v2 behavior. I introduced this v2 mode a while back to address, I think,
> >> a similar concern. Could you try that to see if it is able to address
> >> your problem? If not, you can make some code adjustment within the
> >> framework of the v2 mode. As long as it is an opt-in, I think we are
> >> open to further change.
> > I am surprised if anyone actually wants this behavior, we (Chrome OS)
> > found out about it accidentally, and then found that Android had been
> > carrying a patch to fix it. And if it were a desirable behavior then
> > why isn't it an option in v2?
> >
> I am a bit confused. The v2 mode make cpuset v1 behaves more like cpuset
> v2. The original v1 behavior has some obvious issue that was fixed in
> v2. So what v2 option are you talking about?

I was merely pointing out the behavior of the v1 implementation is so
undesirable that it wasn't kept at all in v2. IMHO, it's a bug that
should be fixed, and I think it's possible to keep the old behavior if
all cpus are offlined, but since you've added this option we can use
it instead.

>
> Regards,
> Longman
>

2020-03-27 01:28:55

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On 3/26/20 4:23 PM, Joel Fernandes wrote:
> On Thu, Mar 26, 2020 at 04:18:59PM -0400, Tejun Heo wrote: >> On Thu, Mar 26, 2020 at 01:05:04PM -0700, Sonny Rao wrote: >>> I am
surprised if anyone actually wants this behavior, we (Chrome >>> OS) >>
>> The behavior is silly but consistent in that it doesn't allow empty
>> active cpusets and it has been like that for many many years now. >>
>>> found out about it accidentally, and then found that Android had >>>
been carrying a patch to fix it. And if it were a desirable >>> behavior
then why isn't it an option in v2? >> >> Nobody is saying it's a good
behavior (hence the change in cgroup2) >> and there are situations where
changing things like this is >> justifiable, but, here: >> >> * The
proposed change makes the interface inconsistent and does so >>
unconditionally on what is now a mostly legacy interface. >> >> * There
already is a newer version of the interface which includes >> the
desired behavior. >> >> * I forgot but as Waiman pointed out, you can
even opt-in to the >> new behavior, which is more comprehensive without
the >> inconsitencies, while staying on cgroup1. > > Thank you Tejun,
Waiman and Sonny. I confirmed the cgroup_v2_mode > option fixes cgroup
v1's broken behavior. > > We will use this mount option on our systems
to fix it.
I am glad that it works for you.

I think the problem is that the v2_mode mount option is not that well
documented. Maybe I should send a patch to add some some description
about it in cgroup-v2.rst or cgroup-v1/cpusets.rst.

Cheers,
Longman



2020-03-27 03:33:56

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Thu, Mar 26, 2020 at 09:26:51PM -0400, Waiman Long wrote:
[...]
> I think the problem is that the v2_mode mount option is not that well
> documented. Maybe I should send a patch to add some some description
> about it in cgroup-v2.rst or cgroup-v1/cpusets.rst.

That is definitely worth it and I would fully support that.

thanks,

- Joel

2021-10-27 13:27:10

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

> I think Tejun is concerned about a change in the default behavior of
> cpuset v1.
>
> There is a special v2 mode for cpuset that is enabled by the mount
> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> v2 behavior. I introduced this v2 mode a while back to address, I think,
> a similar concern. Could you try that to see if it is able to address
> your problem? If not, you can make some code adjustment within the
> framework of the v2 mode. As long as it is an opt-in, I think we are
> open to further change.

I am also able to reproduce on Ubuntu 21.04 LTS.

all docker will be put in this cgroups and its child cgroups:
/sys/fs/cgroup/cpuset/docker

disabling and enabling SMT by:
echo off > /sys/devices/system/cpu/smt/control
echo on > /sys/devices/system/cpu/smt/control

or unpluging and pluging CPUs by:
echo 0 > /sys/devices/system/cpu/cpuX/online
echo 1 > /sys/devices/system/cpu/cpuX/online

then all docker images will lose some CPUs.

So should we document the broken behaviours somewhere?

Thanks
barry

2021-10-27 14:51:53

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug


On 10/26/21 7:58 PM, Barry Song wrote:
>> I think Tejun is concerned about a change in the default behavior of
>> cpuset v1.
>>
>> There is a special v2 mode for cpuset that is enabled by the mount
>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
>> v2 behavior. I introduced this v2 mode a while back to address, I think,
>> a similar concern. Could you try that to see if it is able to address
>> your problem? If not, you can make some code adjustment within the
>> framework of the v2 mode. As long as it is an opt-in, I think we are
>> open to further change.
> I am also able to reproduce on Ubuntu 21.04 LTS.
>
> all docker will be put in this cgroups and its child cgroups:
> /sys/fs/cgroup/cpuset/docker
>
> disabling and enabling SMT by:
> echo off > /sys/devices/system/cpu/smt/control
> echo on > /sys/devices/system/cpu/smt/control
>
> or unpluging and pluging CPUs by:
> echo 0 > /sys/devices/system/cpu/cpuX/online
> echo 1 > /sys/devices/system/cpu/cpuX/online
>
> then all docker images will lose some CPUs.
>
> So should we document the broken behaviours somewhere?

Is the special cpuset_v2_mode mount option able to fix the issue?

This mode is documented in

Documentation/admin-guide/cgroup-v1/cpuset.rst:

The cpuset.effective_cpus and cpuset.effective_mems files are
normally read-only copies of cpuset.cpus and cpuset.mems files
respectively.  If the cpuset cgroup filesystem is mounted with the
special "cpuset_v2_mode" option, the behavior of these files will become
similar to the corresponding files in cpuset v2.  In other words, hotplug
events will not change cpuset.cpus and cpuset.mems.  Those events will
only affect cpuset.effective_cpus and cpuset.effective_mems which show
the actual cpus and memory nodes that are currently used by this cpuset.
See Documentation/admin-guide/cgroup-v2.rst for more information about
cpuset v2 behavior.

Maybe we can make it more visible.

Cheers,
Longman

2021-10-27 15:41:57

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Wed, Oct 27, 2021 at 2:06 PM Waiman Long <[email protected]> wrote:
>
>
> On 10/26/21 7:58 PM, Barry Song wrote:
> >> I think Tejun is concerned about a change in the default behavior of
> >> cpuset v1.
> >>
> >> There is a special v2 mode for cpuset that is enabled by the mount
> >> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> >> v2 behavior. I introduced this v2 mode a while back to address, I think,
> >> a similar concern. Could you try that to see if it is able to address
> >> your problem? If not, you can make some code adjustment within the
> >> framework of the v2 mode. As long as it is an opt-in, I think we are
> >> open to further change.
> > I am also able to reproduce on Ubuntu 21.04 LTS.
> >
> > all docker will be put in this cgroups and its child cgroups:
> > /sys/fs/cgroup/cpuset/docker
> >
> > disabling and enabling SMT by:
> > echo off > /sys/devices/system/cpu/smt/control
> > echo on > /sys/devices/system/cpu/smt/control
> >
> > or unpluging and pluging CPUs by:
> > echo 0 > /sys/devices/system/cpu/cpuX/online
> > echo 1 > /sys/devices/system/cpu/cpuX/online
> >
> > then all docker images will lose some CPUs.
> >
> > So should we document the broken behaviours somewhere?
>
> Is the special cpuset_v2_mode mount option able to fix the issue?
>
> This mode is documented in
>
> Documentation/admin-guide/cgroup-v1/cpuset.rst:
>
> The cpuset.effective_cpus and cpuset.effective_mems files are
> normally read-only copies of cpuset.cpus and cpuset.mems files
> respectively. If the cpuset cgroup filesystem is mounted with the
> special "cpuset_v2_mode" option, the behavior of these files will become
> similar to the corresponding files in cpuset v2. In other words, hotplug
> events will not change cpuset.cpus and cpuset.mems. Those events will
> only affect cpuset.effective_cpus and cpuset.effective_mems which show
> the actual cpus and memory nodes that are currently used by this cpuset.
> See Documentation/admin-guide/cgroup-v2.rst for more information about
> cpuset v2 behavior.
>
> Maybe we can make it more visible.

Is it possible to make cpuset_v2_mode true in default? not quite sure if
it will harm something.

>
> Cheers,
> Longman

Thanks
Barry

2021-10-27 15:48:29

by Waiman Long

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On 10/26/21 10:21 PM, Barry Song wrote:
> On Wed, Oct 27, 2021 at 2:06 PM Waiman Long <[email protected]> wrote:
>>
>> On 10/26/21 7:58 PM, Barry Song wrote:
>>>> I think Tejun is concerned about a change in the default behavior of
>>>> cpuset v1.
>>>>
>>>> There is a special v2 mode for cpuset that is enabled by the mount
>>>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
>>>> v2 behavior. I introduced this v2 mode a while back to address, I think,
>>>> a similar concern. Could you try that to see if it is able to address
>>>> your problem? If not, you can make some code adjustment within the
>>>> framework of the v2 mode. As long as it is an opt-in, I think we are
>>>> open to further change.
>>> I am also able to reproduce on Ubuntu 21.04 LTS.
>>>
>>> all docker will be put in this cgroups and its child cgroups:
>>> /sys/fs/cgroup/cpuset/docker
>>>
>>> disabling and enabling SMT by:
>>> echo off > /sys/devices/system/cpu/smt/control
>>> echo on > /sys/devices/system/cpu/smt/control
>>>
>>> or unpluging and pluging CPUs by:
>>> echo 0 > /sys/devices/system/cpu/cpuX/online
>>> echo 1 > /sys/devices/system/cpu/cpuX/online
>>>
>>> then all docker images will lose some CPUs.
>>>
>>> So should we document the broken behaviours somewhere?
>> Is the special cpuset_v2_mode mount option able to fix the issue?
>>
>> This mode is documented in
>>
>> Documentation/admin-guide/cgroup-v1/cpuset.rst:
>>
>> The cpuset.effective_cpus and cpuset.effective_mems files are
>> normally read-only copies of cpuset.cpus and cpuset.mems files
>> respectively. If the cpuset cgroup filesystem is mounted with the
>> special "cpuset_v2_mode" option, the behavior of these files will become
>> similar to the corresponding files in cpuset v2. In other words, hotplug
>> events will not change cpuset.cpus and cpuset.mems. Those events will
>> only affect cpuset.effective_cpus and cpuset.effective_mems which show
>> the actual cpus and memory nodes that are currently used by this cpuset.
>> See Documentation/admin-guide/cgroup-v2.rst for more information about
>> cpuset v2 behavior.
>>
>> Maybe we can make it more visible.
> Is it possible to make cpuset_v2_mode true in default? not quite sure if
> it will harm something.

The cpuset_v2_mode is a change in v1 behavior and that is why it is an
opt-in as we don't want to break existing applications that have a
dependency on the current v1 behavior. If users switch to use cgroup v2,
they get the new behavior. Alternately, they can modify the system
startup script to use the v2 behavior by using the mount option. I don't
think we are going to change the v1 default behavior.

Cheers,
Longman

2021-10-27 16:05:56

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH RFC] cpuset: Make cpusets get restored on hotplug

On Wed, Oct 27, 2021 at 3:35 PM Waiman Long <[email protected]> wrote:
>
> On 10/26/21 10:21 PM, Barry Song wrote:
> > On Wed, Oct 27, 2021 at 2:06 PM Waiman Long <[email protected]> wrote:
> >>
> >> On 10/26/21 7:58 PM, Barry Song wrote:
> >>>> I think Tejun is concerned about a change in the default behavior of
> >>>> cpuset v1.
> >>>>
> >>>> There is a special v2 mode for cpuset that is enabled by the mount
> >>>> option "cpuset_v2_mode". This causes the cpuset v1 to adopt some of the
> >>>> v2 behavior. I introduced this v2 mode a while back to address, I think,
> >>>> a similar concern. Could you try that to see if it is able to address
> >>>> your problem? If not, you can make some code adjustment within the
> >>>> framework of the v2 mode. As long as it is an opt-in, I think we are
> >>>> open to further change.
> >>> I am also able to reproduce on Ubuntu 21.04 LTS.
> >>>
> >>> all docker will be put in this cgroups and its child cgroups:
> >>> /sys/fs/cgroup/cpuset/docker
> >>>
> >>> disabling and enabling SMT by:
> >>> echo off > /sys/devices/system/cpu/smt/control
> >>> echo on > /sys/devices/system/cpu/smt/control
> >>>
> >>> or unpluging and pluging CPUs by:
> >>> echo 0 > /sys/devices/system/cpu/cpuX/online
> >>> echo 1 > /sys/devices/system/cpu/cpuX/online
> >>>
> >>> then all docker images will lose some CPUs.
> >>>
> >>> So should we document the broken behaviours somewhere?
> >> Is the special cpuset_v2_mode mount option able to fix the issue?
> >>
> >> This mode is documented in
> >>
> >> Documentation/admin-guide/cgroup-v1/cpuset.rst:
> >>
> >> The cpuset.effective_cpus and cpuset.effective_mems files are
> >> normally read-only copies of cpuset.cpus and cpuset.mems files
> >> respectively. If the cpuset cgroup filesystem is mounted with the
> >> special "cpuset_v2_mode" option, the behavior of these files will become
> >> similar to the corresponding files in cpuset v2. In other words, hotplug
> >> events will not change cpuset.cpus and cpuset.mems. Those events will
> >> only affect cpuset.effective_cpus and cpuset.effective_mems which show
> >> the actual cpus and memory nodes that are currently used by this cpuset.
> >> See Documentation/admin-guide/cgroup-v2.rst for more information about
> >> cpuset v2 behavior.
> >>
> >> Maybe we can make it more visible.
> > Is it possible to make cpuset_v2_mode true in default? not quite sure if
> > it will harm something.
>
> The cpuset_v2_mode is a change in v1 behavior and that is why it is an
> opt-in as we don't want to break existing applications that have a
> dependency on the current v1 behavior. If users switch to use cgroup v2,
> they get the new behavior. Alternately, they can modify the system
> startup script to use the v2 behavior by using the mount option. I don't
> think we are going to change the v1 default behavior.

ok, understood.

the old behaviour seems quite odd and linux distributions don't realize it.
for example, ubuntu is simply losing cpu for docker.

>
> Cheers,
> Longman

Thanks
barry