The default root is allocated and initialized at boot, so we
shouldn't destroy the default root when it's umounted, otherwise
it will lead to disaster.
Signed-off-by: Li Zefan <[email protected]>
---
kernel/cgroup.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a5f75ac..f73fe48 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1027,12 +1027,14 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
static void cgroup_get(struct cgroup *cgrp)
{
WARN_ON_ONCE(cgroup_is_dead(cgrp));
- css_get(&cgrp->self);
+ if (!(cgrp->self.flags & CSS_NO_REF))
+ css_get(&cgrp->self);
}
static void cgroup_put(struct cgroup *cgrp)
{
- css_put(&cgrp->self);
+ if (!(cgrp->self.flags & CSS_NO_REF))
+ css_put(&cgrp->self);
}
/**
@@ -1781,10 +1783,12 @@ static void cgroup_kill_sb(struct super_block *sb)
* This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release.
*/
- if (css_has_online_children(&root->cgrp.self))
+ if (css_has_online_children(&root->cgrp.self)) {
cgroup_put(&root->cgrp);
- else
- percpu_ref_kill(&root->cgrp.self.refcnt);
+ } else {
+ if (root != &cgrp_dfl_root)
+ percpu_ref_kill(&root->cgrp.self.refcnt);
+ }
kernfs_kill_sb(sb);
}
--
1.8.0.2
This fixes the failure path, so we won't set the visible flag though
the mount is failed.
Signed-off-by: Li Zefan <[email protected]>
---
kernel/cgroup.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dabc486..0b6b44e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1671,7 +1671,6 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
/* look for a matching existing root */
if (!opts.subsys_mask && !opts.none && !opts.name) {
- cgrp_dfl_root_visible = true;
root = &cgrp_dfl_root;
cgroup_get(&root->cgrp);
ret = 0;
@@ -1770,6 +1769,9 @@ out_free:
dentry = kernfs_mount(fs_type, flags, root->kf_root, &new_sb);
if (IS_ERR(dentry) || !new_sb)
cgroup_put(&root->cgrp);
+ else if (root == &cgrp_dfl_root)
+ cgrp_dfl_root_visible = true;
+
return dentry;
}
--
1.8.0.2
Before this patch (in a fresh system):
# cat /proc/$$/cgroup
# mount -t cgroup -o __DEVEL__sane_behavior xxx /cgroup
# umount /cgroup
# cat /proc/$$/cgroup
0:cpuset,cpu,cpuacct,memory,devices,freezer,net_cls,blkio,perf_event,net_prio,hugetlb:/
After this patch (in a fresh system):
# cat ...
# mount ...
# umount ...
# cat /proc/$$/cgroup
#
You won't see the default root after it's umounted.
Signed-off-by: Li Zefan <[email protected]>
---
kernel/cgroup.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f73fe48..dabc486 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1788,6 +1788,8 @@ static void cgroup_kill_sb(struct super_block *sb)
} else {
if (root != &cgrp_dfl_root)
percpu_ref_kill(&root->cgrp.self.refcnt);
+ else
+ cgrp_dfl_root_visible = false;
}
kernfs_kill_sb(sb);
--
1.8.0.2
Hello, Li.
On Tue, Jun 03, 2014 at 12:04:38PM +0800, Li Zefan wrote:
> static void cgroup_get(struct cgroup *cgrp)
> {
> WARN_ON_ONCE(cgroup_is_dead(cgrp));
> - css_get(&cgrp->self);
> + if (!(cgrp->self.flags & CSS_NO_REF))
> + css_get(&cgrp->self);
Hmmm? The same condition is tested by css_get(). Why should it be
tested again here?
> static void cgroup_put(struct cgroup *cgrp)
> {
> - css_put(&cgrp->self);
> + if (!(cgrp->self.flags & CSS_NO_REF))
> + css_put(&cgrp->self);
Ditto.
> @@ -1781,10 +1783,12 @@ static void cgroup_kill_sb(struct super_block *sb)
> * This prevents new mounts by disabling percpu_ref_tryget_live().
> * cgroup_mount() may wait for @root's release.
> */
> - if (css_has_online_children(&root->cgrp.self))
> + if (css_has_online_children(&root->cgrp.self)) {
> cgroup_put(&root->cgrp);
> - else
> - percpu_ref_kill(&root->cgrp.self.refcnt);
> + } else {
> + if (root != &cgrp_dfl_root)
> + percpu_ref_kill(&root->cgrp.self.refcnt);
> + }
As conceptually percpu_ref_kill() just puts the base ref and the
dfl_root's refcnt never reaches zero, it won't actually trigger.
Hmmm.... wouldn't the above leak a ref each time the default hierarchy
is unmounted tho? Shouldn't it be like the following?
if (root == &cgrp_dfl_root || css_has_online_children(...))
cgroup_put(&root->cgrp);
else
percpu_ref_kill(...);
Thanks.
--
tejun
On Tue, Jun 03, 2014 at 12:05:22PM +0800, Li Zefan wrote:
> Before this patch (in a fresh system):
>
> # cat /proc/$$/cgroup
> # mount -t cgroup -o __DEVEL__sane_behavior xxx /cgroup
> # umount /cgroup
> # cat /proc/$$/cgroup
> 0:cpuset,cpu,cpuacct,memory,devices,freezer,net_cls,blkio,perf_event,net_prio,hugetlb:/
>
> After this patch (in a fresh system):
>
> # cat ...
> # mount ...
> # umount ...
> # cat /proc/$$/cgroup
> #
>
> You won't see the default root after it's umounted.
Hmmmm... I intentionally left it visible tho. The only reason we gate
its visibility is avoid disturbing userland which doesn't know about
and won't use the unified hierarchy. If the userland starts making
use of it, there's no reason to hide it again especially as that's
consistent with how other hierarchies behave too - they keep showing
up if they have lingering refs.
Thanks.
--
tejun
On Tue, Jun 03, 2014 at 12:05:59PM +0800, Li Zefan wrote:
> This fixes the failure path, so we won't set the visible flag though
> the mount is failed.
Same rationale. If the userland knows about it, there's no point in
hiding it.
Thanks.
--
tejun
On 2014/6/3 20:57, Tejun Heo wrote:
> Hello, Li.
>
> On Tue, Jun 03, 2014 at 12:04:38PM +0800, Li Zefan wrote:
>> static void cgroup_get(struct cgroup *cgrp)
>> {
>> WARN_ON_ONCE(cgroup_is_dead(cgrp));
>> - css_get(&cgrp->self);
>> + if (!(cgrp->self.flags & CSS_NO_REF))
>> + css_get(&cgrp->self);
>
> Hmmm? The same condition is tested by css_get(). Why should it be
> tested again here?
>
Oh, I completely ignored that.
>> static void cgroup_put(struct cgroup *cgrp)
>> {
>> - css_put(&cgrp->self);
>> + if (!(cgrp->self.flags & CSS_NO_REF))
>> + css_put(&cgrp->self);
>
> Ditto.
>
>> @@ -1781,10 +1783,12 @@ static void cgroup_kill_sb(struct super_block *sb)
>> * This prevents new mounts by disabling percpu_ref_tryget_live().
>> * cgroup_mount() may wait for @root's release.
>> */
>> - if (css_has_online_children(&root->cgrp.self))
>> + if (css_has_online_children(&root->cgrp.self)) {
>> cgroup_put(&root->cgrp);
>> - else
>> - percpu_ref_kill(&root->cgrp.self.refcnt);
>> + } else {
>> + if (root != &cgrp_dfl_root)
>> + percpu_ref_kill(&root->cgrp.self.refcnt);
>> + }
>
> As conceptually percpu_ref_kill() just puts the base ref and the
> dfl_root's refcnt never reaches zero, it won't actually trigger.
Yes it will, just try mount && umount.
I think it's because cgroup_get() is a no-op for CSS_NO_REF, so it has
only the base ref, so percpu_ref_iill() will actually schedule the
call to css_release().
> Hmmm.... wouldn't the above leak a ref each time the default hierarchy
> is unmounted tho? Shouldn't it be like the following?
>
cgroup_get() is a no-op for root cgroup of the default root, so there's
no leak, but still better to call cgroup_put().
I'll send an updated patch.
> if (root == &cgrp_dfl_root || css_has_online_children(...))
> cgroup_put(&root->cgrp);
> else
> percpu_ref_kill(...);
>
> Thanks.
>
On 2014/6/3 21:01, Tejun Heo wrote:
> On Tue, Jun 03, 2014 at 12:05:22PM +0800, Li Zefan wrote:
>> Before this patch (in a fresh system):
>>
>> # cat /proc/$$/cgroup
>> # mount -t cgroup -o __DEVEL__sane_behavior xxx /cgroup
>> # umount /cgroup
>> # cat /proc/$$/cgroup
>> 0:cpuset,cpu,cpuacct,memory,devices,freezer,net_cls,blkio,perf_event,net_prio,hugetlb:/
>>
>> After this patch (in a fresh system):
>>
>> # cat ...
>> # mount ...
>> # umount ...
>> # cat /proc/$$/cgroup
>> #
>>
>> You won't see the default root after it's umounted.
>
> Hmmmm... I intentionally left it visible tho. The only reason we gate
> its visibility is avoid disturbing userland which doesn't know about
> and won't use the unified hierarchy. If the userland starts making
> use of it, there's no reason to hide it again especially as that's
> consistent with how other hierarchies behave too - they keep showing
> up if they have lingering refs.
>
The example I gave is the same result if sane_behavior is not specified,
so this is a behavioural change for the old interface?
Do we need a fix like this?
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3f46165..6f10cff 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1789,6 +1789,9 @@ static void cgroup_kill_sb(struct super_block *sb)
else
percpu_ref_kill(&root->cgrp.self.refcnt);
+ if (root == &cgrp_dfl_root && !cgroup_sane_behavior(&root->cgrp))
+ cgrp_dfl_root_visible = false;
+
kernfs_kill_sb(sb);
Hello,
On Wed, Jun 04, 2014 at 04:59:59PM +0800, Li Zefan wrote:
> The example I gave is the same result if sane_behavior is not specified,
> so this is a behavioural change for the old interface?
Hmmm? Either the userland knows about unified hierarchy or not and
there's no point in hiding it if we know that userland knows. It's
really a system-wide thing which happens once.
Thanks.
--
tejun
On 2014/6/5 9:20, Tejun Heo wrote:
> Hello,
>
> On Wed, Jun 04, 2014 at 04:59:59PM +0800, Li Zefan wrote:
>> The example I gave is the same result if sane_behavior is not specified,
>> so this is a behavioural change for the old interface?
>
> Hmmm? Either the userland knows about unified hierarchy or not and
> there's no point in hiding it if we know that userland knows. It's
> really a system-wide thing which happens once.
>
Yeah, it's reasonable.
I thought "mount -t cgroup xxx /cgroup" will mount the default hierarchy,
but I was wrong.