2013-03-27 08:36:58

by Michal Hocko

[permalink] [raw]
Subject: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

As cgroup supports rename, it's unsafe to dereference dentry->d_name
without proper vfs locks. Fix this by using cgroup_name() rather than
dentry directly.

Also open code memcg_cache_name because it is called only from
kmem_cache_dup which frees the returned name right after
kmem_cache_create_memcg makes a copy of it. Such a short-lived
allocation doesn't make too much sense. So replace it by a static
buffer as kmem_cache_dup is called with memcg_cache_mutex.

Signed-off-by: Li Zefan <[email protected]>
Signed-off-by: Michal Hocko <[email protected]>
Acked-by: Glauber Costa <[email protected]>
---
mm/memcontrol.c | 64 ++++++++++++++++++++++++++++---------------------------
1 file changed, 33 insertions(+), 31 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f608546..b30547b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3364,52 +3364,54 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
schedule_work(&cachep->memcg_params->destroy);
}

-static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
-{
- char *name;
- struct dentry *dentry;
-
- rcu_read_lock();
- dentry = rcu_dereference(memcg->css.cgroup->dentry);
- rcu_read_unlock();
-
- BUG_ON(dentry == NULL);
-
- name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
- memcg_cache_id(memcg), dentry->d_name.name);
-
- return name;
-}
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);

+/*
+ * Called with memcg_cache_mutex held
+ */
static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
struct kmem_cache *s)
{
- char *name;
struct kmem_cache *new;
+ static char *tmp_name = NULL;

- name = memcg_cache_name(memcg, s);
- if (!name)
- return NULL;
+ lockdep_assert_held(&memcg_cache_mutex);
+
+ /*
+ * kmem_cache_create_memcg duplicates the given name and
+ * cgroup_name for this name requires RCU context.
+ * This static temporary buffer is used to prevent from
+ * pointless shortliving allocation.
+ */
+ if (!tmp_name) {
+ tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ WARN_ON_ONCE(!tmp_name);
+ if (!tmp_name)
+ return NULL;
+ }
+
+ rcu_read_lock();
+ snprintf(tmp_name, PAGE_SIZE, "%s(%d:%s)", s->name,
+ memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
+ rcu_read_unlock();

- new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+ new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
(s->flags & ~SLAB_PANIC), s->ctor, s);

if (new)
new->allocflags |= __GFP_KMEMCG;

- kfree(name);
return new;
}

-/*
- * This lock protects updaters, not readers. We want readers to be as fast as
- * they can, and they will either see NULL or a valid cache value. Our model
- * allow them to see NULL, in which case the root memcg will be selected.
- *
- * We need this lock because multiple allocations to the same cache from a non
- * will span more than one worker. Only one of them can create the cache.
- */
-static DEFINE_MUTEX(memcg_cache_mutex);
static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
struct kmem_cache *cachep)
{
--
1.7.10.4


2013-03-27 14:59:41

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> As cgroup supports rename, it's unsafe to dereference dentry->d_name
> without proper vfs locks. Fix this by using cgroup_name() rather than
> dentry directly.
>
> Also open code memcg_cache_name because it is called only from
> kmem_cache_dup which frees the returned name right after
> kmem_cache_create_memcg makes a copy of it. Such a short-lived
> allocation doesn't make too much sense. So replace it by a static
> buffer as kmem_cache_dup is called with memcg_cache_mutex.
>
> Signed-off-by: Li Zefan <[email protected]>
> Signed-off-by: Michal Hocko <[email protected]>
> Acked-by: Glauber Costa <[email protected]>
> ---
> mm/memcontrol.c | 64 ++++++++++++++++++++++++++++---------------------------
> 1 file changed, 33 insertions(+), 31 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index f608546..b30547b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3364,52 +3364,54 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
> schedule_work(&cachep->memcg_params->destroy);
> }
>
> -static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
> -{
> - char *name;
> - struct dentry *dentry;
> -
> - rcu_read_lock();
> - dentry = rcu_dereference(memcg->css.cgroup->dentry);
> - rcu_read_unlock();
> -
> - BUG_ON(dentry == NULL);
> -
> - name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
> - memcg_cache_id(memcg), dentry->d_name.name);
> -
> - return name;
> -}
> +/*
> + * This lock protects updaters, not readers. We want readers to be as fast as
> + * they can, and they will either see NULL or a valid cache value. Our model
> + * allow them to see NULL, in which case the root memcg will be selected.
> + *
> + * We need this lock because multiple allocations to the same cache from a non
> + * will span more than one worker. Only one of them can create the cache.
> + */
> +static DEFINE_MUTEX(memcg_cache_mutex);
>
> +/*
> + * Called with memcg_cache_mutex held
> + */
> static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
> struct kmem_cache *s)
> {
> - char *name;
> struct kmem_cache *new;
> + static char *tmp_name = NULL;
>
> - name = memcg_cache_name(memcg, s);
> - if (!name)
> - return NULL;
> + lockdep_assert_held(&memcg_cache_mutex);
> +
> + /*
> + * kmem_cache_create_memcg duplicates the given name and
> + * cgroup_name for this name requires RCU context.
> + * This static temporary buffer is used to prevent from
> + * pointless shortliving allocation.
> + */
> + if (!tmp_name) {
> + tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
> + WARN_ON_ONCE(!tmp_name);

Just use the page allocator directly and get a free allocation failure
warning. Then again, order-0 pages are considered cheap enough that
they never even fail in our current implementation.

Which brings me to my other point: why not just a simple single-page
allocation? This just seems a little overelaborate. I think this
path would be taken predominantly after cgroup creation and fork where
we do a bunch of allocations anyway. And it happens asynchroneously
from userspace, so it's not even really performance critical.

2013-03-27 15:11:08

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 27-03-13 10:58:25, Johannes Weiner wrote:
> On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
[...]
> > + /*
> > + * kmem_cache_create_memcg duplicates the given name and
> > + * cgroup_name for this name requires RCU context.
> > + * This static temporary buffer is used to prevent from
> > + * pointless shortliving allocation.
> > + */
> > + if (!tmp_name) {
> > + tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
> > + WARN_ON_ONCE(!tmp_name);
>
> Just use the page allocator directly and get a free allocation failure
> warning.

WARN_ON_ONCE is probably pointless.

> Then again, order-0 pages are considered cheap enough that they never
> even fail in our current implementation.
>
> Which brings me to my other point: why not just a simple single-page
> allocation?

No objection from me. I was previously thinking about the "proper"
size for something that is a file name. So I originally wanted to use
PATH_MAX instead but ended up with PAGE_SIZE for reasons I do not
remember now. Maybe we can use NAME_MAX instead. I just do not like to
use page allocator directly when allocatating something like strings
etc...

To be honest, I do not care much which way to go.

> This just seems a little overelaborate. I think this path would be
> taken predominantly after cgroup creation and fork where we do a bunch
> of allocations anyway. And it happens asynchroneously from userspace,
> so it's not even really performance critical.

--
Michal Hocko
SUSE Labs

2013-03-27 15:19:25

by Glauber Costa

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On 03/27/2013 07:11 PM, Michal Hocko wrote:
> On Wed 27-03-13 10:58:25, Johannes Weiner wrote:
>> On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> [...]
>>> + /*
>>> + * kmem_cache_create_memcg duplicates the given name and
>>> + * cgroup_name for this name requires RCU context.
>>> + * This static temporary buffer is used to prevent from
>>> + * pointless shortliving allocation.
>>> + */
>>> + if (!tmp_name) {
>>> + tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
>>> + WARN_ON_ONCE(!tmp_name);
>>
>> Just use the page allocator directly and get a free allocation failure
>> warning.
>
> WARN_ON_ONCE is probably pointless.
>
>> Then again, order-0 pages are considered cheap enough that they never
>> even fail in our current implementation.
>>
>> Which brings me to my other point: why not just a simple single-page
>> allocation?
>
> No objection from me. I was previously thinking about the "proper"
> size for something that is a file name. So I originally wanted to use
> PATH_MAX instead but ended up with PAGE_SIZE for reasons I do not
> remember now.

theoretically, this is PATH_MAX + max cache name.

2013-03-27 15:32:25

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 27-03-13 19:19:58, Glauber Costa wrote:
> On 03/27/2013 07:11 PM, Michal Hocko wrote:
> > On Wed 27-03-13 10:58:25, Johannes Weiner wrote:
> >> On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> > [...]
> >>> + /*
> >>> + * kmem_cache_create_memcg duplicates the given name and
> >>> + * cgroup_name for this name requires RCU context.
> >>> + * This static temporary buffer is used to prevent from
> >>> + * pointless shortliving allocation.
> >>> + */
> >>> + if (!tmp_name) {
> >>> + tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
> >>> + WARN_ON_ONCE(!tmp_name);
> >>
> >> Just use the page allocator directly and get a free allocation failure
> >> warning.
> >
> > WARN_ON_ONCE is probably pointless.
> >
> >> Then again, order-0 pages are considered cheap enough that they never
> >> even fail in our current implementation.
> >>
> >> Which brings me to my other point: why not just a simple single-page
> >> allocation?
> >
> > No objection from me. I was previously thinking about the "proper"
> > size for something that is a file name. So I originally wanted to use
> > PATH_MAX instead but ended up with PAGE_SIZE for reasons I do not
> > remember now.
>
> theoretically, this is PATH_MAX + max cache name.

So do you prefer kmalloc(PATH_MAX) or the page allocator directly as
Johannes suggests? I agree tha kamlloc(PAGE_SIZE) looks weird.

--
Michal Hocko
SUSE Labs

2013-03-27 15:33:53

by Johannes Weiner

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed, Mar 27, 2013 at 04:11:04PM +0100, Michal Hocko wrote:
> On Wed 27-03-13 10:58:25, Johannes Weiner wrote:
> > On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> [...]
> > > + /*
> > > + * kmem_cache_create_memcg duplicates the given name and
> > > + * cgroup_name for this name requires RCU context.
> > > + * This static temporary buffer is used to prevent from
> > > + * pointless shortliving allocation.
> > > + */
> > > + if (!tmp_name) {
> > > + tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
> > > + WARN_ON_ONCE(!tmp_name);
> >
> > Just use the page allocator directly and get a free allocation failure
> > warning.
>
> WARN_ON_ONCE is probably pointless.
>
> > Then again, order-0 pages are considered cheap enough that they never
> > even fail in our current implementation.
> >
> > Which brings me to my other point: why not just a simple single-page
> > allocation?
>
> No objection from me. I was previously thinking about the "proper"
> size for something that is a file name. So I originally wanted to use
> PATH_MAX instead but ended up with PAGE_SIZE for reasons I do not
> remember now. Maybe we can use NAME_MAX instead. I just do not like to
> use page allocator directly when allocatating something like strings
> etc...

Don't grep for GFP_TEMPORARY then, we do it in a couple places :-)
NAME_MAX is just for single dentry names, not path names, no? Might
be a little short.

2013-03-27 15:47:05

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 27-03-13 11:32:26, Johannes Weiner wrote:
> On Wed, Mar 27, 2013 at 04:11:04PM +0100, Michal Hocko wrote:
> > On Wed 27-03-13 10:58:25, Johannes Weiner wrote:
> > > On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> > [...]
> > > > + /*
> > > > + * kmem_cache_create_memcg duplicates the given name and
> > > > + * cgroup_name for this name requires RCU context.
> > > > + * This static temporary buffer is used to prevent from
> > > > + * pointless shortliving allocation.
> > > > + */
> > > > + if (!tmp_name) {
> > > > + tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
> > > > + WARN_ON_ONCE(!tmp_name);
> > >
> > > Just use the page allocator directly and get a free allocation failure
> > > warning.
> >
> > WARN_ON_ONCE is probably pointless.
> >
> > > Then again, order-0 pages are considered cheap enough that they never
> > > even fail in our current implementation.
> > >
> > > Which brings me to my other point: why not just a simple single-page
> > > allocation?
> >
> > No objection from me. I was previously thinking about the "proper"
> > size for something that is a file name. So I originally wanted to use
> > PATH_MAX instead but ended up with PAGE_SIZE for reasons I do not
> > remember now. Maybe we can use NAME_MAX instead. I just do not like to
> > use page allocator directly when allocatating something like strings
> > etc...
>
> Don't grep for GFP_TEMPORARY then, we do it in a couple places :-)

This is what Li suggested in the beginning and right, I didn't like it.

> NAME_MAX is just for single dentry names, not path names, no? Might
> be a little short.

Yes, Glauber confirmed that MAX_PATH is needed.

--
Michal Hocko
SUSE Labs

2013-03-27 16:15:34

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> +/*
> + * Called with memcg_cache_mutex held
> + */
> static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
> struct kmem_cache *s)

Maybe the name could signify it's part of memcg?

Thanks.

--
tejun

2013-03-27 16:19:10

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 27-03-13 09:15:27, Tejun Heo wrote:
> On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> > +/*
> > + * Called with memcg_cache_mutex held
> > + */
> > static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
> > struct kmem_cache *s)
>
> Maybe the name could signify it's part of memcg?

kmem_ prefix is used for all CONFIG_MEMCG_KMEM functions. I understand
it clashes with sl?b naming but this is out of scope of this patch IMO.

--
Michal Hocko
SUSE Labs

2013-03-27 16:21:06

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed, Mar 27, 2013 at 9:19 AM, Michal Hocko <[email protected]> wrote:
>> Maybe the name could signify it's part of memcg?
>
> kmem_ prefix is used for all CONFIG_MEMCG_KMEM functions. I understand
> it clashes with sl?b naming but this is out of scope of this patch IMO.

Oh, it's not using kmemcg? I see. Maybe we can rename later.

Thanks.

--
tejun

2013-03-27 16:27:10

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 27-03-13 09:21:02, Tejun Heo wrote:
> On Wed, Mar 27, 2013 at 9:19 AM, Michal Hocko <[email protected]> wrote:
> >> Maybe the name could signify it's part of memcg?
> >
> > kmem_ prefix is used for all CONFIG_MEMCG_KMEM functions. I understand
> > it clashes with sl?b naming but this is out of scope of this patch IMO.
>
> Oh, it's not using kmemcg? I see. Maybe we can rename later.

Some parts use memcg_kmem_* other kmem_. A cleanup would be nice.
Glauber?

--
Michal Hocko
SUSE Labs

2013-03-27 17:32:28

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 27-03-13 16:32:20, Michal Hocko wrote:
> On Wed 27-03-13 19:19:58, Glauber Costa wrote:
> > On 03/27/2013 07:11 PM, Michal Hocko wrote:
> > > On Wed 27-03-13 10:58:25, Johannes Weiner wrote:
> > >> On Wed, Mar 27, 2013 at 09:36:39AM +0100, Michal Hocko wrote:
> > > [...]
> > >>> + /*
> > >>> + * kmem_cache_create_memcg duplicates the given name and
> > >>> + * cgroup_name for this name requires RCU context.
> > >>> + * This static temporary buffer is used to prevent from
> > >>> + * pointless shortliving allocation.
> > >>> + */
> > >>> + if (!tmp_name) {
> > >>> + tmp_name = kmalloc(PAGE_SIZE, GFP_KERNEL);
> > >>> + WARN_ON_ONCE(!tmp_name);
> > >>
> > >> Just use the page allocator directly and get a free allocation failure
> > >> warning.
> > >
> > > WARN_ON_ONCE is probably pointless.
> > >
> > >> Then again, order-0 pages are considered cheap enough that they never
> > >> even fail in our current implementation.
> > >>
> > >> Which brings me to my other point: why not just a simple single-page
> > >> allocation?
> > >
> > > No objection from me. I was previously thinking about the "proper"
> > > size for something that is a file name. So I originally wanted to use
> > > PATH_MAX instead but ended up with PAGE_SIZE for reasons I do not
> > > remember now.
> >
> > theoretically, this is PATH_MAX + max cache name.
>
> So do you prefer kmalloc(PATH_MAX) or the page allocator directly as
> Johannes suggests? I agree tha kamlloc(PAGE_SIZE) looks weird.

Removed WARN_ON_ONCE as suggested by Johannes and kept kmalloc with
PATH_MAX used instead of PAGE_SIZE. I've kept Glauber's acked-by but I
can remove it.
---
>From 982470e1dc60c84b89a00dae22ef209628887b98 Mon Sep 17 00:00:00 2001
From: Michal Hocko <[email protected]>
Date: Wed, 27 Mar 2013 18:28:42 +0100
Subject: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

As cgroup supports rename, it's unsafe to dereference dentry->d_name
without proper vfs locks. Fix this by using cgroup_name() rather than
dentry directly.

Also open code memcg_cache_name because it is called only from
kmem_cache_dup which frees the returned name right after
kmem_cache_create_memcg makes a copy of it. Such a short-lived
allocation doesn't make too much sense. So replace it by a static
buffer as kmem_cache_dup is called with memcg_cache_mutex.

Signed-off-by: Li Zefan <[email protected]>
Signed-off-by: Michal Hocko <[email protected]>
Acked-by: Glauber Costa <[email protected]>
---
mm/memcontrol.c | 63 ++++++++++++++++++++++++++++---------------------------
1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 53b8201..db81f2a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3214,52 +3214,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
schedule_work(&cachep->memcg_params->destroy);
}

-static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
-{
- char *name;
- struct dentry *dentry;
-
- rcu_read_lock();
- dentry = rcu_dereference(memcg->css.cgroup->dentry);
- rcu_read_unlock();
-
- BUG_ON(dentry == NULL);
-
- name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
- memcg_cache_id(memcg), dentry->d_name.name);
-
- return name;
-}
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);

+/*
+ * Called with memcg_cache_mutex held
+ */
static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
struct kmem_cache *s)
{
- char *name;
struct kmem_cache *new;
+ static char *tmp_name = NULL;

- name = memcg_cache_name(memcg, s);
- if (!name)
- return NULL;
+ lockdep_assert_held(&memcg_cache_mutex);
+
+ /*
+ * kmem_cache_create_memcg duplicates the given name and
+ * cgroup_name for this name requires RCU context.
+ * This static temporary buffer is used to prevent from
+ * pointless shortliving allocation.
+ */
+ if (!tmp_name) {
+ tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!tmp_name)
+ return NULL;
+ }
+
+ rcu_read_lock();
+ snprintf(tmp_name, PAGE_SIZE, "%s(%d:%s)", s->name,
+ memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
+ rcu_read_unlock();

- new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+ new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
(s->flags & ~SLAB_PANIC), s->ctor, s);

if (new)
new->allocflags |= __GFP_KMEMCG;

- kfree(name);
return new;
}

-/*
- * This lock protects updaters, not readers. We want readers to be as fast as
- * they can, and they will either see NULL or a valid cache value. Our model
- * allow them to see NULL, in which case the root memcg will be selected.
- *
- * We need this lock because multiple allocations to the same cache from a non
- * will span more than one worker. Only one of them can create the cache.
- */
-static DEFINE_MUTEX(memcg_cache_mutex);
static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
struct kmem_cache *cachep)
{
--
1.7.10.4

--
Michal Hocko
SUSE Labs

2013-03-28 07:21:51

by Glauber Costa

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On 03/27/2013 08:27 PM, Michal Hocko wrote:
> On Wed 27-03-13 09:21:02, Tejun Heo wrote:
>> On Wed, Mar 27, 2013 at 9:19 AM, Michal Hocko <[email protected]> wrote:
>>>> Maybe the name could signify it's part of memcg?
>>>
>>> kmem_ prefix is used for all CONFIG_MEMCG_KMEM functions. I understand
>>> it clashes with sl?b naming but this is out of scope of this patch IMO.
>>
>> Oh, it's not using kmemcg? I see. Maybe we can rename later.
>
> Some parts use memcg_kmem_* other kmem_. A cleanup would be nice.
> Glauber?
>
I have been using kmem_ only in functions that will deal directly with
the slab caches and with the single purpose of operating them.

kmem_cache_destroy_work_func => worker interface to kmem_cache_destroy
kmem_cache_destroy_memcg_children => cache destructor iterator
kmem_cache_dup => interface to kmem_cache_create

All the other functions start with memcg_
Analogously, all slab-side functions that deal with memcg *ends* with
_memcg. except the functions that are only there to operate memcg data
structures:

memcg_update_all_caches.

In general, those functions could very well live in the other file (slab
or memcg), but they need to take locks or manipulate data structures
that are internal to slab/memcg.

I believe this is a sound convention.

2013-03-28 07:48:18

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 27-03-13 18:32:23, Michal Hocko wrote:
[...]
> Removed WARN_ON_ONCE as suggested by Johannes and kept kmalloc with
> PATH_MAX used instead of PAGE_SIZE. I've kept Glauber's acked-by but I
> can remove it.

And hopefully the last version. I forgot to s/PAGE_SIZE/MAX_PATH/ in
snprintf.
---
>From 551d7b5960904503da8f050faa533278a1d1bc6c Mon Sep 17 00:00:00 2001
From: Michal Hocko <[email protected]>
Date: Thu, 28 Mar 2013 08:46:49 +0100
Subject: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

As cgroup supports rename, it's unsafe to dereference dentry->d_name
without proper vfs locks. Fix this by using cgroup_name() rather than
dentry directly.

Also open code memcg_cache_name because it is called only from
kmem_cache_dup which frees the returned name right after
kmem_cache_create_memcg makes a copy of it. Such a short-lived
allocation doesn't make too much sense. So replace it by a static
buffer as kmem_cache_dup is called with memcg_cache_mutex.

Signed-off-by: Li Zefan <[email protected]>
Signed-off-by: Michal Hocko <[email protected]>
Acked-by: Glauber Costa <[email protected]>
---
mm/memcontrol.c | 63 ++++++++++++++++++++++++++++---------------------------
1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 53b8201..9715c0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3214,52 +3214,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
schedule_work(&cachep->memcg_params->destroy);
}

-static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
-{
- char *name;
- struct dentry *dentry;
-
- rcu_read_lock();
- dentry = rcu_dereference(memcg->css.cgroup->dentry);
- rcu_read_unlock();
-
- BUG_ON(dentry == NULL);
-
- name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
- memcg_cache_id(memcg), dentry->d_name.name);
-
- return name;
-}
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);

+/*
+ * Called with memcg_cache_mutex held
+ */
static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
struct kmem_cache *s)
{
- char *name;
struct kmem_cache *new;
+ static char *tmp_name = NULL;

- name = memcg_cache_name(memcg, s);
- if (!name)
- return NULL;
+ lockdep_assert_held(&memcg_cache_mutex);
+
+ /*
+ * kmem_cache_create_memcg duplicates the given name and
+ * cgroup_name for this name requires RCU context.
+ * This static temporary buffer is used to prevent from
+ * pointless shortliving allocation.
+ */
+ if (!tmp_name) {
+ tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (!tmp_name)
+ return NULL;
+ }
+
+ rcu_read_lock();
+ snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name,
+ memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
+ rcu_read_unlock();

- new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+ new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
(s->flags & ~SLAB_PANIC), s->ctor, s);

if (new)
new->allocflags |= __GFP_KMEMCG;

- kfree(name);
return new;
}

-/*
- * This lock protects updaters, not readers. We want readers to be as fast as
- * they can, and they will either see NULL or a valid cache value. Our model
- * allow them to see NULL, in which case the root memcg will be selected.
- *
- * We need this lock because multiple allocations to the same cache from a non
- * will span more than one worker. Only one of them can create the cache.
- */
-static DEFINE_MUTEX(memcg_cache_mutex);
static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
struct kmem_cache *cachep)
{
--
1.7.10.4

--
Michal Hocko
SUSE Labs

2013-04-02 08:26:57

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

Tejun,
could you take this one please?

On Thu 28-03-13 08:48:14, Michal Hocko wrote:
> On Wed 27-03-13 18:32:23, Michal Hocko wrote:
> [...]
> > Removed WARN_ON_ONCE as suggested by Johannes and kept kmalloc with
> > PATH_MAX used instead of PAGE_SIZE. I've kept Glauber's acked-by but I
> > can remove it.
>
> And hopefully the last version. I forgot to s/PAGE_SIZE/MAX_PATH/ in
> snprintf.
> ---
> From 551d7b5960904503da8f050faa533278a1d1bc6c Mon Sep 17 00:00:00 2001
> From: Michal Hocko <[email protected]>
> Date: Thu, 28 Mar 2013 08:46:49 +0100
> Subject: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()
>
> As cgroup supports rename, it's unsafe to dereference dentry->d_name
> without proper vfs locks. Fix this by using cgroup_name() rather than
> dentry directly.
>
> Also open code memcg_cache_name because it is called only from
> kmem_cache_dup which frees the returned name right after
> kmem_cache_create_memcg makes a copy of it. Such a short-lived
> allocation doesn't make too much sense. So replace it by a static
> buffer as kmem_cache_dup is called with memcg_cache_mutex.
>
> Signed-off-by: Li Zefan <[email protected]>
> Signed-off-by: Michal Hocko <[email protected]>
> Acked-by: Glauber Costa <[email protected]>
> ---
> mm/memcontrol.c | 63 ++++++++++++++++++++++++++++---------------------------
> 1 file changed, 32 insertions(+), 31 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 53b8201..9715c0c 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3214,52 +3214,53 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
> schedule_work(&cachep->memcg_params->destroy);
> }
>
> -static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
> -{
> - char *name;
> - struct dentry *dentry;
> -
> - rcu_read_lock();
> - dentry = rcu_dereference(memcg->css.cgroup->dentry);
> - rcu_read_unlock();
> -
> - BUG_ON(dentry == NULL);
> -
> - name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
> - memcg_cache_id(memcg), dentry->d_name.name);
> -
> - return name;
> -}
> +/*
> + * This lock protects updaters, not readers. We want readers to be as fast as
> + * they can, and they will either see NULL or a valid cache value. Our model
> + * allow them to see NULL, in which case the root memcg will be selected.
> + *
> + * We need this lock because multiple allocations to the same cache from a non
> + * will span more than one worker. Only one of them can create the cache.
> + */
> +static DEFINE_MUTEX(memcg_cache_mutex);
>
> +/*
> + * Called with memcg_cache_mutex held
> + */
> static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
> struct kmem_cache *s)
> {
> - char *name;
> struct kmem_cache *new;
> + static char *tmp_name = NULL;
>
> - name = memcg_cache_name(memcg, s);
> - if (!name)
> - return NULL;
> + lockdep_assert_held(&memcg_cache_mutex);
> +
> + /*
> + * kmem_cache_create_memcg duplicates the given name and
> + * cgroup_name for this name requires RCU context.
> + * This static temporary buffer is used to prevent from
> + * pointless shortliving allocation.
> + */
> + if (!tmp_name) {
> + tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
> + if (!tmp_name)
> + return NULL;
> + }
> +
> + rcu_read_lock();
> + snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name,
> + memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
> + rcu_read_unlock();
>
> - new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
> + new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
> (s->flags & ~SLAB_PANIC), s->ctor, s);
>
> if (new)
> new->allocflags |= __GFP_KMEMCG;
>
> - kfree(name);
> return new;
> }
>
> -/*
> - * This lock protects updaters, not readers. We want readers to be as fast as
> - * they can, and they will either see NULL or a valid cache value. Our model
> - * allow them to see NULL, in which case the root memcg will be selected.
> - *
> - * We need this lock because multiple allocations to the same cache from a non
> - * will span more than one worker. Only one of them can create the cache.
> - */
> -static DEFINE_MUTEX(memcg_cache_mutex);
> static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
> struct kmem_cache *cachep)
> {
> --
> 1.7.10.4
>
> --
> Michal Hocko
> SUSE Labs
> --
> To unsubscribe from this list: send the line "unsubscribe cgroups" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

--
Michal Hocko
SUSE Labs

2013-04-03 21:33:43

by Tejun Heo

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Tue, Apr 02, 2013 at 10:26:48AM +0200, Michal Hocko wrote:
> Tejun,
> could you take this one please?

Aye aye, applied to cgroup/for-3.10.

Thanks.

--
tejun

2013-04-04 07:07:04

by Michal Hocko

[permalink] [raw]
Subject: Re: [PATCH] memcg: fix memcg_cache_name() to use cgroup_name()

On Wed 03-04-13 14:33:34, Tejun Heo wrote:
> On Tue, Apr 02, 2013 at 10:26:48AM +0200, Michal Hocko wrote:
> > Tejun,
> > could you take this one please?
>
> Aye aye, applied to cgroup/for-3.10.

Thanks!
--
Michal Hocko
SUSE Labs