In cgroup_create(), if alloc_css_id() returns failure, the errno
is not propagated to userspace, so mkdir will fail silently.
To trigger this bug, we mount blkio (or memory subsystem), and
create more then 65534 cgroups. (The number of cgroups is limited
to 65535 if a subsystem has use_id == 1)
# mount -t cgroup -o blkio xxx /mnt
# for ((i = 0; i < 65534; i++)); do mkdir /mnt/$i; done
# mkdir /mnt/65534
(should return ENOSPC)
#
Signed-off-by: Li Zefan <[email protected]>
---
cgroup.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
--- a/kernel/cgroup.c.orig 2010-01-19 16:37:37.000000000 +0800
+++ a/kernel/cgroup.c 2010-01-19 16:39:07.000000000 +0800
@@ -3279,14 +3279,17 @@ static long cgroup_create(struct cgroup
for_each_subsys(root, ss) {
struct cgroup_subsys_state *css = ss->create(ss, cgrp);
+
if (IS_ERR(css)) {
err = PTR_ERR(css);
goto err_destroy;
}
init_cgroup_css(css, ss, cgrp);
- if (ss->use_id)
- if (alloc_css_id(ss, parent, cgrp))
+ if (ss->use_id) {
+ err = alloc_css_id(ss, parent, cgrp);
+ if (err)
goto err_destroy;
+ }
/* At error, ->destroy() callback has to free assigned ID. */
}
Don't Call get_pid_ns() before we locate/alloc the ns.
Signed-off-by: Li Zefan <[email protected]>
---
cgroup.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
--- a/kernel/cgroup.c.orig 2010-01-26 14:24:29.000000000 +0800
+++ a/kernel/cgroup.c 2010-01-26 14:24:44.000000000 +0800
@@ -2643,7 +2643,8 @@ static struct cgroup_pidlist *cgroup_pid
{
struct cgroup_pidlist *l;
/* don't need task_nsproxy() if we're looking at ourself */
- struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
+ struct pid_namespace *ns = current->nsproxy->pid_ns;
+
/*
* We can't drop the pidlist_mutex before taking the l->mutex in case
* the last ref-holder is trying to remove l from the list at the same
@@ -2653,8 +2654,6 @@ static struct cgroup_pidlist *cgroup_pid
mutex_lock(&cgrp->pidlist_mutex);
list_for_each_entry(l, &cgrp->pidlists, links) {
if (l->key.type == type && l->key.ns == ns) {
- /* found a matching list - drop the extra refcount */
- put_pid_ns(ns);
/* make sure l doesn't vanish out from under us */
down_write(&l->mutex);
mutex_unlock(&cgrp->pidlist_mutex);
@@ -2665,13 +2664,12 @@ static struct cgroup_pidlist *cgroup_pid
l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
if (!l) {
mutex_unlock(&cgrp->pidlist_mutex);
- put_pid_ns(ns);
return l;
}
init_rwsem(&l->mutex);
down_write(&l->mutex);
l->key.type = type;
- l->key.ns = ns;
+ l->key.ns = get_pid_ns(ns);
l->use_count = 0; /* don't increment here */
l->list = NULL;
l->owner = cgrp;
Quoting Li Zefan ([email protected]):
> In cgroup_create(), if alloc_css_id() returns failure, the errno
> is not propagated to userspace, so mkdir will fail silently.
>
> To trigger this bug, we mount blkio (or memory subsystem), and
> create more then 65534 cgroups. (The number of cgroups is limited
> to 65535 if a subsystem has use_id == 1)
>
> # mount -t cgroup -o blkio xxx /mnt
> # for ((i = 0; i < 65534; i++)); do mkdir /mnt/$i; done
> # mkdir /mnt/65534
> (should return ENOSPC)
> #
>
> Signed-off-by: Li Zefan <[email protected]>
Yup.
Acked-by: Serge Hallyn <[email protected]>
> ---
> cgroup.c | 7 +++++--
> 1 file changed, 5 insertions(+), 2 deletions(-)
>
> --- a/kernel/cgroup.c.orig 2010-01-19 16:37:37.000000000 +0800
> +++ a/kernel/cgroup.c 2010-01-19 16:39:07.000000000 +0800
> @@ -3279,14 +3279,17 @@ static long cgroup_create(struct cgroup
>
> for_each_subsys(root, ss) {
> struct cgroup_subsys_state *css = ss->create(ss, cgrp);
> +
> if (IS_ERR(css)) {
> err = PTR_ERR(css);
> goto err_destroy;
> }
> init_cgroup_css(css, ss, cgrp);
> - if (ss->use_id)
> - if (alloc_css_id(ss, parent, cgrp))
> + if (ss->use_id) {
> + err = alloc_css_id(ss, parent, cgrp);
> + if (err)
> goto err_destroy;
> + }
> /* At error, ->destroy() callback has to free assigned ID. */
> }
>
> _______________________________________________
> Containers mailing list
> [email protected]
> https://lists.linux-foundation.org/mailman/listinfo/containers
On Tue, Jan 26, 2010 at 12:16 AM, Li Zefan <[email protected]> wrote:
>
> Signed-off-by: Li Zefan <[email protected]>
Acked-by: Paul Menage <[email protected]>
> ---
> ?cgroup.c | ? ?7 +++++--
> ?1 file changed, 5 insertions(+), 2 deletions(-)
>
> --- a/kernel/cgroup.c.orig ? ? ?2010-01-19 16:37:37.000000000 +0800
> +++ a/kernel/cgroup.c ? 2010-01-19 16:39:07.000000000 +0800
> @@ -3279,14 +3279,17 @@ static long cgroup_create(struct cgroup
>
> ? ? ? ?for_each_subsys(root, ss) {
> ? ? ? ? ? ? ? ?struct cgroup_subsys_state *css = ss->create(ss, cgrp);
> +
> ? ? ? ? ? ? ? ?if (IS_ERR(css)) {
> ? ? ? ? ? ? ? ? ? ? ? ?err = PTR_ERR(css);
> ? ? ? ? ? ? ? ? ? ? ? ?goto err_destroy;
> ? ? ? ? ? ? ? ?}
> ? ? ? ? ? ? ? ?init_cgroup_css(css, ss, cgrp);
> - ? ? ? ? ? ? ? if (ss->use_id)
> - ? ? ? ? ? ? ? ? ? ? ? if (alloc_css_id(ss, parent, cgrp))
> + ? ? ? ? ? ? ? if (ss->use_id) {
> + ? ? ? ? ? ? ? ? ? ? ? err = alloc_css_id(ss, parent, cgrp);
> + ? ? ? ? ? ? ? ? ? ? ? if (err)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?goto err_destroy;
> + ? ? ? ? ? ? ? }
> ? ? ? ? ? ? ? ?/* At error, ->destroy() callback has to free assigned ID. */
> ? ? ? ?}
>
>
On Tue, Jan 26, 2010 at 12:17 AM, Li Zefan <[email protected]> wrote:
> Don't Call get_pid_ns() before we locate/alloc the ns.
>
> Signed-off-by: Li Zefan <[email protected]>
Acked-by: Paul Menage <[email protected]>
> ---
> ?cgroup.c | ? ?8 +++-----
> ?1 file changed, 3 insertions(+), 5 deletions(-)
>
> --- a/kernel/cgroup.c.orig ? ? ?2010-01-26 14:24:29.000000000 +0800
> +++ a/kernel/cgroup.c ? 2010-01-26 14:24:44.000000000 +0800
> @@ -2643,7 +2643,8 @@ static struct cgroup_pidlist *cgroup_pid
> ?{
> ? ? ? ?struct cgroup_pidlist *l;
> ? ? ? ?/* don't need task_nsproxy() if we're looking at ourself */
> - ? ? ? struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
> + ? ? ? struct pid_namespace *ns = current->nsproxy->pid_ns;
> +
> ? ? ? ?/*
> ? ? ? ? * We can't drop the pidlist_mutex before taking the l->mutex in case
> ? ? ? ? * the last ref-holder is trying to remove l from the list at the same
> @@ -2653,8 +2654,6 @@ static struct cgroup_pidlist *cgroup_pid
> ? ? ? ?mutex_lock(&cgrp->pidlist_mutex);
> ? ? ? ?list_for_each_entry(l, &cgrp->pidlists, links) {
> ? ? ? ? ? ? ? ?if (l->key.type == type && l->key.ns == ns) {
> - ? ? ? ? ? ? ? ? ? ? ? /* found a matching list - drop the extra refcount */
> - ? ? ? ? ? ? ? ? ? ? ? put_pid_ns(ns);
> ? ? ? ? ? ? ? ? ? ? ? ?/* make sure l doesn't vanish out from under us */
> ? ? ? ? ? ? ? ? ? ? ? ?down_write(&l->mutex);
> ? ? ? ? ? ? ? ? ? ? ? ?mutex_unlock(&cgrp->pidlist_mutex);
> @@ -2665,13 +2664,12 @@ static struct cgroup_pidlist *cgroup_pid
> ? ? ? ?l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
> ? ? ? ?if (!l) {
> ? ? ? ? ? ? ? ?mutex_unlock(&cgrp->pidlist_mutex);
> - ? ? ? ? ? ? ? put_pid_ns(ns);
> ? ? ? ? ? ? ? ?return l;
> ? ? ? ?}
> ? ? ? ?init_rwsem(&l->mutex);
> ? ? ? ?down_write(&l->mutex);
> ? ? ? ?l->key.type = type;
> - ? ? ? l->key.ns = ns;
> + ? ? ? l->key.ns = get_pid_ns(ns);
> ? ? ? ?l->use_count = 0; /* don't increment here */
> ? ? ? ?l->list = NULL;
> ? ? ? ?l->owner = cgrp;
>
On Tue, 26 Jan 2010 15:01:26 -0800
Paul Menage <[email protected]> wrote:
> On Tue, Jan 26, 2010 at 12:16 AM, Li Zefan <[email protected]> wrote:
> >
> > Signed-off-by: Li Zefan <[email protected]>
>
> Acked-by: Paul Menage <[email protected]>
>
Thank you.
Acked-by: KAMEZAWA Hiroyuki <[email protected]>
> > ---
> > cgroup.c | 7 +++++--
> > 1 file changed, 5 insertions(+), 2 deletions(-)
> >
> > --- a/kernel/cgroup.c.orig 2010-01-19 16:37:37.000000000 +0800
> > +++ a/kernel/cgroup.c 2010-01-19 16:39:07.000000000 +0800
> > @@ -3279,14 +3279,17 @@ static long cgroup_create(struct cgroup
> >
> > for_each_subsys(root, ss) {
> > struct cgroup_subsys_state *css = ss->create(ss, cgrp);
> > +
> > if (IS_ERR(css)) {
> > err = PTR_ERR(css);
> > goto err_destroy;
> > }
> > init_cgroup_css(css, ss, cgrp);
> > - if (ss->use_id)
> > - if (alloc_css_id(ss, parent, cgrp))
> > + if (ss->use_id) {
> > + err = alloc_css_id(ss, parent, cgrp);
> > + if (err)
> > goto err_destroy;
> > + }
> > /* At error, ->destroy() callback has to free assigned ID. */
> > }
> >
> >
>