2016-04-17 20:04:44

by Serge Hallyn

[permalink] [raw]
Subject: Show virtualized dentry root in mountinfo for cgroupfs

With the current cgroup namespace patches, the root dentry path of a
mount as shown in /proc/self/mountinfo is the full global cgroup
path. It is common for userspace to use /proc/self/mountinfo to
search for cgroup mountpoints, and expect the root dentry path to
relate to the cgroup paths in /proc/self/cgroup. Patch 2 in this
set therefore virtualizes the root dentry path relative to the
reader's cgroup namespace root.

Patch 1 fixes a bug in kernfs_path_from_node_locked() which is
exposed by patch 2.


2016-04-17 20:04:43

by Serge Hallyn

[permalink] [raw]
Subject: [PATCH 1/2] kernfs_path_from_node_locked: don't overwrite nlen

From: Serge Hallyn <[email protected]>

We've calculated @len to be the bytes we need for '/..' entries from
@kn_from to the common ancestor, and calculated @nlen to be the extra
bytes we need to get from the common ancestor to @kn_to. We use them
as such at the end. But in the loop copying the actual entries, we
overwrite @nlen. Use a temporary variable for that instead.

Without this, the return length, when the buffer is large enough, is
wrong. (When the buffer is NULL or too small, the returned value is
correct. The buffer contents are also correct.)

Interestingly, no callers of this function are affected by this as of
yet. However the upcoming cgroup_show_path() will be.

Signed-off-by: Serge Hallyn <[email protected]>
---
fs/kernfs/dir.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 03b688d..37f9678 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -153,9 +153,9 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
p = buf + len + nlen;
*p = '\0';
for (kn = kn_to; kn != common; kn = kn->parent) {
- nlen = strlen(kn->name);
- p -= nlen;
- memcpy(p, kn->name, nlen);
+ size_t tmp = strlen(kn->name);
+ p -= tmp;
+ memcpy(p, kn->name, tmp);
*(--p) = '/';
}

--
2.7.4

2016-04-17 20:04:42

by Serge Hallyn

[permalink] [raw]
Subject: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

From: Serge Hallyn <[email protected]>

When showing a cgroupfs entry in mountinfo, show the
path of the mount root dentry relative to the reader's
cgroup namespace root.

Signed-off-by: Serge Hallyn <[email protected]>
---
fs/kernfs/mount.c | 14 ++++++++++++++
include/linux/kernfs.h | 2 ++
kernel/cgroup.c | 35 +++++++++++++++++++++++++++++++++++
3 files changed, 51 insertions(+)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index f73541f..3b78724 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/namei.h>
+#include <linux/seq_file.h>

#include "kernfs-internal.h"

@@ -40,6 +41,18 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
return 0;
}

+static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
+{
+ struct kernfs_node *node = dentry->d_fsdata;
+ struct kernfs_root *root = kernfs_root(node);
+ struct kernfs_syscall_ops *scops = root->syscall_ops;
+
+ if (scops && scops->show_path)
+ return scops->show_path(sf, node, root);
+
+ return seq_dentry(sf, dentry, " \t\n\\");
+}
+
const struct super_operations kernfs_sops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
@@ -47,6 +60,7 @@ const struct super_operations kernfs_sops = {

.remount_fs = kernfs_sop_remount_fs,
.show_options = kernfs_sop_show_options,
+ .show_path = kernfs_sop_show_path,
};

/**
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index c06c442..30f089e 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -152,6 +152,8 @@ struct kernfs_syscall_ops {
int (*rmdir)(struct kernfs_node *kn);
int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name);
+ int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
+ struct kernfs_root *root);
};

struct kernfs_root {
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671dc05..9a0d7b3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
return 0;
}

+static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
+ struct kernfs_root *kf_root)
+{
+ int len = 0, ret = 0;
+ char *buf = NULL;
+ struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
+ struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
+ struct cgroup *ns_cgroup;
+
+ mutex_lock(&cgroup_mutex);
+ spin_lock_bh(&css_set_lock);
+ ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
+ len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
+ if (len > 0)
+ buf = kmalloc(len + 1, GFP_ATOMIC);
+ if (buf)
+ ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 1);
+
+ spin_unlock_bh(&css_set_lock);
+ mutex_unlock(&cgroup_mutex);
+
+ if (len <= 0)
+ return len;
+ if (!buf)
+ return -ENOMEM;
+ if (ret == len) {
+ seq_escape(sf, buf, " \t\n\\");
+ ret = 0;
+ } else if (ret >= 0)
+ ret = -EINVAL;
+ kfree(buf);
+ return ret;
+}
+
static int cgroup_show_options(struct seq_file *seq,
struct kernfs_root *kf_root)
{
@@ -5430,6 +5464,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
.mkdir = cgroup_mkdir,
.rmdir = cgroup_rmdir,
.rename = cgroup_rename,
+ .show_path = cgroup_show_path,
};

static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
--
2.7.4

2016-04-18 04:11:33

by Serge Hallyn

[permalink] [raw]
Subject: Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

On Sun, Apr 17, 2016 at 03:04:32PM -0500, [email protected] wrote:
> From: Serge Hallyn <[email protected]>
>
> When showing a cgroupfs entry in mountinfo, show the
> path of the mount root dentry relative to the reader's
> cgroup namespace root.
>
> Signed-off-by: Serge Hallyn <[email protected]>
> ---
> fs/kernfs/mount.c | 14 ++++++++++++++
> include/linux/kernfs.h | 2 ++
> kernel/cgroup.c | 35 +++++++++++++++++++++++++++++++++++
> 3 files changed, 51 insertions(+)
>
> diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
> index f73541f..3b78724 100644
> --- a/fs/kernfs/mount.c
> +++ b/fs/kernfs/mount.c
> @@ -15,6 +15,7 @@
> #include <linux/slab.h>
> #include <linux/pagemap.h>
> #include <linux/namei.h>
> +#include <linux/seq_file.h>
>
> #include "kernfs-internal.h"
>
> @@ -40,6 +41,18 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
> return 0;
> }
>
> +static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
> +{
> + struct kernfs_node *node = dentry->d_fsdata;
> + struct kernfs_root *root = kernfs_root(node);
> + struct kernfs_syscall_ops *scops = root->syscall_ops;
> +
> + if (scops && scops->show_path)
> + return scops->show_path(sf, node, root);
> +
> + return seq_dentry(sf, dentry, " \t\n\\");
> +}
> +
> const struct super_operations kernfs_sops = {
> .statfs = simple_statfs,
> .drop_inode = generic_delete_inode,
> @@ -47,6 +60,7 @@ const struct super_operations kernfs_sops = {
>
> .remount_fs = kernfs_sop_remount_fs,
> .show_options = kernfs_sop_show_options,
> + .show_path = kernfs_sop_show_path,
> };
>
> /**
> diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
> index c06c442..30f089e 100644
> --- a/include/linux/kernfs.h
> +++ b/include/linux/kernfs.h
> @@ -152,6 +152,8 @@ struct kernfs_syscall_ops {
> int (*rmdir)(struct kernfs_node *kn);
> int (*rename)(struct kernfs_node *kn, struct kernfs_node *new_parent,
> const char *new_name);
> + int (*show_path)(struct seq_file *sf, struct kernfs_node *kn,
> + struct kernfs_root *root);
> };
>
> struct kernfs_root {
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 671dc05..9a0d7b3 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
> return 0;
> }
>
> +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
> + struct kernfs_root *kf_root)
> +{
> + int len = 0, ret = 0;
> + char *buf = NULL;
> + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
> + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
> + struct cgroup *ns_cgroup;
> +
> + mutex_lock(&cgroup_mutex);

Hm, I can't grab the cgroup mutex here because I already have the
namespace_sem. But that's required by cset_cgroup_from_root(). Can
I just call that under rcu_read_lock() instead? (Not without
changing the lockdep_assert_help()). Is there another way to get the
info needed here?

> + spin_lock_bh(&css_set_lock);
> + ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
> + len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
> + if (len > 0)
> + buf = kmalloc(len + 1, GFP_ATOMIC);
> + if (buf)
> + ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 1);
> +
> + spin_unlock_bh(&css_set_lock);
> + mutex_unlock(&cgroup_mutex);
> +
> + if (len <= 0)
> + return len;
> + if (!buf)
> + return -ENOMEM;
> + if (ret == len) {
> + seq_escape(sf, buf, " \t\n\\");
> + ret = 0;
> + } else if (ret >= 0)
> + ret = -EINVAL;
> + kfree(buf);
> + return ret;
> +}
> +
> static int cgroup_show_options(struct seq_file *seq,
> struct kernfs_root *kf_root)
> {
> @@ -5430,6 +5464,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
> .mkdir = cgroup_mkdir,
> .rmdir = cgroup_rmdir,
> .rename = cgroup_rename,
> + .show_path = cgroup_show_path,
> };
>
> static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
> --
> 2.7.4
>
> _______________________________________________
> Containers mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/containers

2016-04-18 05:29:10

by Serge E. Hallyn

[permalink] [raw]
Subject: [PATCH 3/2] cgroup_show_path: use a new helper to get current cgns css_set

Since we're getting current's cgroup namespace info, and are not
modifying it, we can use rcu_read_lock() instead of cgroup_mutex.

Signed-off-by: Serge Hallyn <[email protected]>
---
kernel/cgroup.c | 40 ++++++++++++++++++++++++++++++++++++----
1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9a0d7b3..cd8269e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1215,6 +1215,41 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
}

+/*
+ * look up cgroup associated with current task's cgroup namespace on the
+ * specified hierarchy
+ */
+static struct cgroup *
+current_cgns_cgroup_from_root(struct cgroup_root *root)
+{
+ struct cgroup *res = NULL;
+ struct css_set *css;
+
+ lockdep_assert_held(&css_set_lock);
+
+ rcu_read_lock();
+
+ css = current->nsproxy->cgroup_ns->root_cset;
+ if (cset == &init_css_set) {
+ res = &root->cgrp;
+ } else {
+ struct cgrp_cset_link *link;
+
+ list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+ struct cgroup *c = link->cgrp;
+
+ if (c->root == root) {
+ res = c;
+ break;
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ BUG_ON(!res);
+ return res;
+}
+
/* look up cgroup associated with given css_set on the specified hierarchy */
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
@@ -1598,13 +1633,11 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
{
int len = 0, ret = 0;
char *buf = NULL;
- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
struct cgroup *ns_cgroup;

- mutex_lock(&cgroup_mutex);
spin_lock_bh(&css_set_lock);
- ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
+ ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
if (len > 0)
buf = kmalloc(len + 1, GFP_ATOMIC);
@@ -1612,7 +1645,6 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 1);

spin_unlock_bh(&css_set_lock);
- mutex_unlock(&cgroup_mutex);

if (len <= 0)
return len;
--
2.7.4

2016-04-19 01:23:11

by Eric W. Biederman

[permalink] [raw]
Subject: Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

"Serge E. Hallyn" <[email protected]> writes:

>> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
>> index 671dc05..9a0d7b3 100644
>> --- a/kernel/cgroup.c
>> +++ b/kernel/cgroup.c
>> @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
>> return 0;
>> }
>>
>> +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
>> + struct kernfs_root *kf_root)
>> +{
>> + int len = 0, ret = 0;
>> + char *buf = NULL;
>> + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
>> + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
>> + struct cgroup *ns_cgroup;
>> +
>> + mutex_lock(&cgroup_mutex);
>
> Hm, I can't grab the cgroup mutex here because I already have the
> namespace_sem. But that's required by cset_cgroup_from_root(). Can
> I just call that under rcu_read_lock() instead? (Not without
> changing the lockdep_assert_help()). Is there another way to get the
> info needed here?

Do we need the current cgroup namespace information at all?

Could we not get the relevant cgroup namespace from the mount of
cgroupfs?

In general the better path is not to have the contents of files depend on
who is reading the file.

Eric


2016-04-19 01:44:24

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

Quoting Eric W. Biederman ([email protected]):
> "Serge E. Hallyn" <[email protected]> writes:
>
> >> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> >> index 671dc05..9a0d7b3 100644
> >> --- a/kernel/cgroup.c
> >> +++ b/kernel/cgroup.c
> >> @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
> >> return 0;
> >> }
> >>
> >> +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
> >> + struct kernfs_root *kf_root)
> >> +{
> >> + int len = 0, ret = 0;
> >> + char *buf = NULL;
> >> + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
> >> + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
> >> + struct cgroup *ns_cgroup;
> >> +
> >> + mutex_lock(&cgroup_mutex);
> >
> > Hm, I can't grab the cgroup mutex here because I already have the
> > namespace_sem. But that's required by cset_cgroup_from_root(). Can
> > I just call that under rcu_read_lock() instead? (Not without
> > changing the lockdep_assert_help()). Is there another way to get the
> > info needed here?
>
> Do we need the current cgroup namespace information at all?
>
> Could we not get the relevant cgroup namespace from the mount of
> cgroupfs?

I don't think so. That was my first inclination. But at show_path()
all we have is the vfsmunt->mnt_root. Since all cgroup namespaces
for a hierarchy share the same dentry tree and superblock, there's
no way to tell where the mount's namespace root is supposed to be.

whether we did

# enter new cgroup namespace rooted at cgroup /user.slice/user-1000.slice
mount -t cgroup -o freezer freezer /mnt

or

mount --bind /sys/fs/cgroup/freezer/user.slice/user-1000.slice /mnt

the mountinfo entry will be the same.

> In general the better path is not to have the contents of files depend on
> who is reading the file.

-serge

2016-04-19 04:05:51

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

Quoting Serge E. Hallyn ([email protected]):
> Quoting Eric W. Biederman ([email protected]):
> > "Serge E. Hallyn" <[email protected]> writes:
> >
> > >> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> > >> index 671dc05..9a0d7b3 100644
> > >> --- a/kernel/cgroup.c
> > >> +++ b/kernel/cgroup.c
> > >> @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
> > >> return 0;
> > >> }
> > >>
> > >> +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
> > >> + struct kernfs_root *kf_root)
> > >> +{
> > >> + int len = 0, ret = 0;
> > >> + char *buf = NULL;
> > >> + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
> > >> + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
> > >> + struct cgroup *ns_cgroup;
> > >> +
> > >> + mutex_lock(&cgroup_mutex);
> > >
> > > Hm, I can't grab the cgroup mutex here because I already have the
> > > namespace_sem. But that's required by cset_cgroup_from_root(). Can
> > > I just call that under rcu_read_lock() instead? (Not without
> > > changing the lockdep_assert_help()). Is there another way to get the
> > > info needed here?
> >
> > Do we need the current cgroup namespace information at all?
> >
> > Could we not get the relevant cgroup namespace from the mount of
> > cgroupfs?
>
> I don't think so. That was my first inclination. But at show_path()
> all we have is the vfsmunt->mnt_root. Since all cgroup namespaces
> for a hierarchy share the same dentry tree and superblock, there's
> no way to tell where the mount's namespace root is supposed to be.
>
> whether we did
>
> # enter new cgroup namespace rooted at cgroup /user.slice/user-1000.slice
> mount -t cgroup -o freezer freezer /mnt
>
> or
>
> mount --bind /sys/fs/cgroup/freezer/user.slice/user-1000.slice /mnt
>
> the mountinfo entry will be the same.
>
> > In general the better path is not to have the contents of files depend on
> > who is reading the file.

And actually, while as i said above this was my first inclination, I now
think that's wrong. /proc/$$/cgroup is virtualized per the reader. The
point of this patch is to make mountinfo virtualized analogously to
/proc/$$/cgroup, so that we can be certain how a particular cgroup dentry
relates to a task's actual cgroup. So the mountinfo dentry root path
should in fact depend on the reader.

Looking at it another way... The value we're talking about shows us
the path of the root dentry of a cgroup mount. If a task in cgns2
rooted at /a/b/c mounts a cgroupfs, it will see '/' as the root dentry.
If a task in cgns1 rooted at /a/b looks at that mountinfo, '/' would
be misleading. It really should be '/c'.

If there were security implications those might override this. But there
is no security benefit to this. (The usual security argument is about
the opener vs the reader, not the mounter verses the reader, but in either
case I maintain there is no security benefit to virtualizing these paths)

2016-04-20 19:43:19

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH 1/2] kernfs_path_from_node_locked: don't overwrite nlen

On Sun, Apr 17, 2016 at 03:04:31PM -0500, [email protected] wrote:
> From: Serge Hallyn <[email protected]>
>
> We've calculated @len to be the bytes we need for '/..' entries from
> @kn_from to the common ancestor, and calculated @nlen to be the extra
> bytes we need to get from the common ancestor to @kn_to. We use them
> as such at the end. But in the loop copying the actual entries, we
> overwrite @nlen. Use a temporary variable for that instead.
>
> Without this, the return length, when the buffer is large enough, is
> wrong. (When the buffer is NULL or too small, the returned value is
> correct. The buffer contents are also correct.)
>
> Interestingly, no callers of this function are affected by this as of
> yet. However the upcoming cgroup_show_path() will be.
>
> Signed-off-by: Serge Hallyn <[email protected]>

Acked-by: Tejun Heo <[email protected]>

Greg, can you please pick this one up for v4.6?

Thanks.

--
tejun

2016-04-26 02:42:13

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

Quoting Serge E. Hallyn ([email protected]):
> Quoting Serge E. Hallyn ([email protected]):
> > Quoting Eric W. Biederman ([email protected]):
> > > "Serge E. Hallyn" <[email protected]> writes:
> > >
> > > >> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> > > >> index 671dc05..9a0d7b3 100644
> > > >> --- a/kernel/cgroup.c
> > > >> +++ b/kernel/cgroup.c
> > > >> @@ -1593,6 +1593,40 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
> > > >> return 0;
> > > >> }
> > > >>
> > > >> +static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
> > > >> + struct kernfs_root *kf_root)
> > > >> +{
> > > >> + int len = 0, ret = 0;
> > > >> + char *buf = NULL;
> > > >> + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
> > > >> + struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
> > > >> + struct cgroup *ns_cgroup;
> > > >> +
> > > >> + mutex_lock(&cgroup_mutex);
> > > >
> > > > Hm, I can't grab the cgroup mutex here because I already have the
> > > > namespace_sem. But that's required by cset_cgroup_from_root(). Can
> > > > I just call that under rcu_read_lock() instead? (Not without
> > > > changing the lockdep_assert_help()). Is there another way to get the
> > > > info needed here?
> > >
> > > Do we need the current cgroup namespace information at all?
> > >
> > > Could we not get the relevant cgroup namespace from the mount of
> > > cgroupfs?
> >
> > I don't think so. That was my first inclination. But at show_path()
> > all we have is the vfsmunt->mnt_root. Since all cgroup namespaces
> > for a hierarchy share the same dentry tree and superblock, there's
> > no way to tell where the mount's namespace root is supposed to be.
> >
> > whether we did
> >
> > # enter new cgroup namespace rooted at cgroup /user.slice/user-1000.slice
> > mount -t cgroup -o freezer freezer /mnt
> >
> > or
> >
> > mount --bind /sys/fs/cgroup/freezer/user.slice/user-1000.slice /mnt
> >
> > the mountinfo entry will be the same.
> >
> > > In general the better path is not to have the contents of files depend on
> > > who is reading the file.
>
> And actually, while as i said above this was my first inclination, I now
> think that's wrong. /proc/$$/cgroup is virtualized per the reader. The
> point of this patch is to make mountinfo virtualized analogously to
> /proc/$$/cgroup, so that we can be certain how a particular cgroup dentry
> relates to a task's actual cgroup. So the mountinfo dentry root path
> should in fact depend on the reader.
>
> Looking at it another way... The value we're talking about shows us
> the path of the root dentry of a cgroup mount. If a task in cgns2
> rooted at /a/b/c mounts a cgroupfs, it will see '/' as the root dentry.
> If a task in cgns1 rooted at /a/b looks at that mountinfo, '/' would
> be misleading. It really should be '/c'.

So I think that for cgroup mount entries in mountinfo to be useful (i.e.
to criu) we either need the root dentry path to be given as relative to
the reader's cgroup namespace (as I have it in this patchset), or else
we need to add another piece of information in the mountinfo entry, such
as the nsfd inode number of the cgroup namespace in which it was
mounted.

-serge

2016-04-26 10:29:30

by Karel Zak

[permalink] [raw]
Subject: Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

On Mon, Apr 25, 2016 at 09:42:07PM -0500, Serge E. Hallyn wrote:
> > Looking at it another way... The value we're talking about shows us
> > the path of the root dentry of a cgroup mount. If a task in cgns2
> > rooted at /a/b/c mounts a cgroupfs, it will see '/' as the root dentry.
> > If a task in cgns1 rooted at /a/b looks at that mountinfo, '/' would
> > be misleading. It really should be '/c'.
>
> So I think that for cgroup mount entries in mountinfo to be useful (i.e.
> to criu) we either need the root dentry path to be given as relative to
> the reader's cgroup namespace (as I have it in this patchset), or else
> we need to add another piece of information in the mountinfo entry, such
> as the nsfd inode number of the cgroup namespace in which it was
> mounted.

In the ideal world there is no mountinfo file, but /proc/self/mountinfo/<id>/
directory with individual files, so every subsystem and filesystem has
absolute freedom to store there all relevant information. The result will
be also lucky kernel that does not have to always generate entire huge
mountinfo file for all mountpoins... etc. :-)

Karel

--
Karel Zak <[email protected]>
http://karelzak.blogspot.com

2016-04-26 14:36:29

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [PATCH 2/2] mountinfo: implement show_path for kernfs and cgroup

On Tue, Apr 26, 2016 at 12:29:25PM +0200, Karel Zak wrote:
> On Mon, Apr 25, 2016 at 09:42:07PM -0500, Serge E. Hallyn wrote:
> > > Looking at it another way... The value we're talking about shows us
> > > the path of the root dentry of a cgroup mount. If a task in cgns2
> > > rooted at /a/b/c mounts a cgroupfs, it will see '/' as the root dentry.
> > > If a task in cgns1 rooted at /a/b looks at that mountinfo, '/' would
> > > be misleading. It really should be '/c'.
> >
> > So I think that for cgroup mount entries in mountinfo to be useful (i.e.
> > to criu) we either need the root dentry path to be given as relative to
> > the reader's cgroup namespace (as I have it in this patchset), or else
> > we need to add another piece of information in the mountinfo entry, such
> > as the nsfd inode number of the cgroup namespace in which it was
> > mounted.
>
> In the ideal world there is no mountinfo file, but /proc/self/mountinfo/<id>/
> directory with individual files, so every subsystem and filesystem has
> absolute freedom to store there all relevant information. The result will
> be also lucky kernel that does not have to always generate entire huge
> mountinfo file for all mountpoins... etc. :-)

Yeah mountinfo does seem like a big stick to swing around every time I want
one little piece of information. Also mght be good to have per-fstype
directories so we can just look under /proc/self/mountsdir/cgroupfs/ for
only cgroupfs <id>s.

There we might also find open fds for source and mountdir, kinda fitting
in with previous discussions of separating bdev_open() and mountat().

BTW, assuming this would in fact report source and mountpoint location
with fds, these would really (through realpath) be reported relative to
the reader's namespace, as I'm doing and advocating here.

-serge

2016-04-29 21:43:00

by Serge E. Hallyn

[permalink] [raw]
Subject: Re: [PATCH 3/2] cgroup_show_path: use a new helper to get current cgns css_set

[ I'm sorry, apparently I had sent the wrong version of this patch. Here
is the correct one from
https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=2016-04-17/kernfs.show&id=79f784037098fddae92258579ef494537a12f1b2
Thanks Michael for pointing that out
]

Since we're getting current's cgroup namespace info, and are
not modifying it, we don't need the cgroup_mutex.

Signed-off-by: Serge Hallyn <[email protected]>
---
kernel/cgroup.c | 40 ++++++++++++++++++++++++++++++++++++----
1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9a0d7b3..1243e3e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1215,6 +1215,41 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_free_root(root);
}

+/*
+ * look up cgroup associated with current task's cgroup namespace on the
+ * specified hierarchy
+ */
+static struct cgroup *
+current_cgns_cgroup_from_root(struct cgroup_root *root)
+{
+ struct cgroup *res = NULL;
+ struct css_set *cset;
+
+ lockdep_assert_held(&css_set_lock);
+
+ rcu_read_lock();
+
+ cset = current->nsproxy->cgroup_ns->root_cset;
+ if (cset == &init_css_set) {
+ res = &root->cgrp;
+ } else {
+ struct cgrp_cset_link *link;
+
+ list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
+ struct cgroup *c = link->cgrp;
+
+ if (c->root == root) {
+ res = c;
+ break;
+ }
+ }
+ }
+ rcu_read_unlock();
+
+ BUG_ON(!res);
+ return res;
+}
+
/* look up cgroup associated with given css_set on the specified hierarchy */
static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
struct cgroup_root *root)
@@ -1598,13 +1633,11 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
{
int len = 0, ret = 0;
char *buf = NULL;
- struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
struct cgroup *ns_cgroup;

- mutex_lock(&cgroup_mutex);
spin_lock_bh(&css_set_lock);
- ns_cgroup = cset_cgroup_from_root(ns->root_cset, kf_cgroot);
+ ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
len = kernfs_path_from_node(kf_node, ns_cgroup->kn, NULL, 0);
if (len > 0)
buf = kmalloc(len + 1, GFP_ATOMIC);
@@ -1612,7 +1645,6 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
ret = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, len + 1);

spin_unlock_bh(&css_set_lock);
- mutex_unlock(&cgroup_mutex);

if (len <= 0)
return len;
--
2.7.4