From: Pekka Enberg <[email protected]>
As suggested by Mel Gorman, add out-of-memory diagnostics to the SLUB allocator
to make debugging OOM conditions easier. This patch helped hunt down a nasty
OOM issue that popped up every now that was caused by SLUB debugging code which
forced 4096 byte allocations to use order 1 pages even in the fallback case.
An example print out looks like this:
<snip page allocator out-of-memory message>
SLUB: Unable to allocate memory on node -1 (gfp=20)
cache: kmalloc-4096, object size: 4096, buffer size: 4168, default order: 3, min order: 1
node 0: slabs: 95, objs: 665, free: 0
Cc: Christoph Lameter <[email protected]>
Acked-by: Mel Gorman <[email protected]>
Tested-by: Larry Finger <[email protected]>
Signed-off-by: Pekka Enberg <[email protected]>
---
mm/slub.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----------------
1 files changed, 51 insertions(+), 19 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 65ffda5..2bbacfc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1484,6 +1484,56 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
return 1;
}
+static int count_free(struct page *page)
+{
+ return page->objects - page->inuse;
+}
+
+static unsigned long count_partial(struct kmem_cache_node *n,
+ int (*get_count)(struct page *))
+{
+ unsigned long flags;
+ unsigned long x = 0;
+ struct page *page;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->partial, lru)
+ x += get_count(page);
+ spin_unlock_irqrestore(&n->list_lock, flags);
+ return x;
+}
+
+static noinline void
+slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
+{
+ int node;
+
+ printk(KERN_WARNING
+ "SLUB: Unable to allocate memory on node %d (gfp=%x)\n",
+ nid, gfpflags);
+ printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
+ "default order: %d, min order: %d\n", s->name, s->objsize,
+ s->size, oo_order(s->oo), oo_order(s->min));
+
+ for_each_online_node(node) {
+ struct kmem_cache_node *n = get_node(s, node);
+ unsigned long nr_slabs;
+ unsigned long nr_objs;
+ unsigned long nr_free;
+
+ if (!n)
+ continue;
+
+ nr_slabs = atomic_long_read(&n->nr_slabs);
+ nr_objs = atomic_long_read(&n->total_objects);
+ nr_free = count_partial(n, count_free);
+
+ printk(KERN_WARNING
+ " node %d: slabs: %ld, objs: %ld, free: %ld\n",
+ node, nr_slabs, nr_objs, nr_free);
+ }
+}
+
/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
@@ -1565,6 +1615,7 @@ new_slab:
c->page = new;
goto load_freelist;
}
+ slab_out_of_memory(s, gfpflags, node);
return NULL;
debug:
if (!alloc_debug_processing(s, c->page, object, addr))
@@ -3318,20 +3369,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
}
#ifdef CONFIG_SLUB_DEBUG
-static unsigned long count_partial(struct kmem_cache_node *n,
- int (*get_count)(struct page *))
-{
- unsigned long flags;
- unsigned long x = 0;
- struct page *page;
-
- spin_lock_irqsave(&n->list_lock, flags);
- list_for_each_entry(page, &n->partial, lru)
- x += get_count(page);
- spin_unlock_irqrestore(&n->list_lock, flags);
- return x;
-}
-
static int count_inuse(struct page *page)
{
return page->inuse;
@@ -3342,11 +3379,6 @@ static int count_total(struct page *page)
return page->objects;
}
-static int count_free(struct page *page)
-{
- return page->objects - page->inuse;
-}
-
static int validate_slab(struct kmem_cache *s, struct page *page,
unsigned long *map)
{
--
1.6.0.4
On Thu, Jun 11, 2009 at 11:43:46AM +0300, Pekka J Enberg wrote:
> From: Pekka Enberg <[email protected]>
>
> As suggested by Mel Gorman, add out-of-memory diagnostics to the SLUB allocator
> to make debugging OOM conditions easier.
Picky - make debugging page allocation failures easier. OOM in this
context might be conflated with the OOM-killer.
> This patch helped hunt down a nasty
> OOM issue that popped up every now that was caused by SLUB debugging code which
> forced 4096 byte allocations to use order 1 pages even in the fallback case.
>
> An example print out looks like this:
>
> <snip page allocator out-of-memory message>
> SLUB: Unable to allocate memory on node -1 (gfp=20)
node -1 is an implementation detail. Can it print "current" instead? No
biggie, I know what it means and I suppose anyone debugging an allocation
failure will know too.
gfp is in hex right so gfp=0x20? gfp=20 might have someone thinking it's
the decimal value.
> cache: kmalloc-4096, object size: 4096, buffer size: 4168, default order: 3, min order: 1
> node 0: slabs: 95, objs: 665, free: 0
>
That looks grand
Thanks
> Cc: Christoph Lameter <[email protected]>
> Acked-by: Mel Gorman <[email protected]>
> Tested-by: Larry Finger <[email protected]>
> Signed-off-by: Pekka Enberg <[email protected]>
> ---
> mm/slub.c | 70 ++++++++++++++++++++++++++++++++++++++++++++----------------
> 1 files changed, 51 insertions(+), 19 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 65ffda5..2bbacfc 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1484,6 +1484,56 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
> return 1;
> }
>
> +static int count_free(struct page *page)
> +{
> + return page->objects - page->inuse;
> +}
> +
> +static unsigned long count_partial(struct kmem_cache_node *n,
> + int (*get_count)(struct page *))
> +{
> + unsigned long flags;
> + unsigned long x = 0;
> + struct page *page;
> +
> + spin_lock_irqsave(&n->list_lock, flags);
> + list_for_each_entry(page, &n->partial, lru)
> + x += get_count(page);
> + spin_unlock_irqrestore(&n->list_lock, flags);
> + return x;
> +}
> +
> +static noinline void
> +slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
> +{
> + int node;
> +
> + printk(KERN_WARNING
> + "SLUB: Unable to allocate memory on node %d (gfp=%x)\n",
> + nid, gfpflags);
> + printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
> + "default order: %d, min order: %d\n", s->name, s->objsize,
> + s->size, oo_order(s->oo), oo_order(s->min));
> +
> + for_each_online_node(node) {
> + struct kmem_cache_node *n = get_node(s, node);
> + unsigned long nr_slabs;
> + unsigned long nr_objs;
> + unsigned long nr_free;
> +
> + if (!n)
> + continue;
> +
> + nr_slabs = atomic_long_read(&n->nr_slabs);
> + nr_objs = atomic_long_read(&n->total_objects);
> + nr_free = count_partial(n, count_free);
> +
> + printk(KERN_WARNING
> + " node %d: slabs: %ld, objs: %ld, free: %ld\n",
> + node, nr_slabs, nr_objs, nr_free);
> + }
> +}
> +
> /*
> * Slow path. The lockless freelist is empty or we need to perform
> * debugging duties.
> @@ -1565,6 +1615,7 @@ new_slab:
> c->page = new;
> goto load_freelist;
> }
> + slab_out_of_memory(s, gfpflags, node);
> return NULL;
> debug:
> if (!alloc_debug_processing(s, c->page, object, addr))
> @@ -3318,20 +3369,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
> }
>
> #ifdef CONFIG_SLUB_DEBUG
> -static unsigned long count_partial(struct kmem_cache_node *n,
> - int (*get_count)(struct page *))
> -{
> - unsigned long flags;
> - unsigned long x = 0;
> - struct page *page;
> -
> - spin_lock_irqsave(&n->list_lock, flags);
> - list_for_each_entry(page, &n->partial, lru)
> - x += get_count(page);
> - spin_unlock_irqrestore(&n->list_lock, flags);
> - return x;
> -}
> -
> static int count_inuse(struct page *page)
> {
> return page->inuse;
> @@ -3342,11 +3379,6 @@ static int count_total(struct page *page)
> return page->objects;
> }
>
> -static int count_free(struct page *page)
> -{
> - return page->objects - page->inuse;
> -}
> -
> static int validate_slab(struct kmem_cache *s, struct page *page,
> unsigned long *map)
> {
> --
> 1.6.0.4
>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
Hi Mel,
On Thu, 2009-06-11 at 10:47 +0100, Mel Gorman wrote:
> On Thu, Jun 11, 2009 at 11:43:46AM +0300, Pekka J Enberg wrote:
> > From: Pekka Enberg <[email protected]>
> >
> > As suggested by Mel Gorman, add out-of-memory diagnostics to the SLUB allocator
> > to make debugging OOM conditions easier.
>
> Picky - make debugging page allocation failures easier. OOM in this
> context might be conflated with the OOM-killer.
>
> > This patch helped hunt down a nasty
> > OOM issue that popped up every now that was caused by SLUB debugging code which
> > forced 4096 byte allocations to use order 1 pages even in the fallback case.
> >
> > An example print out looks like this:
> >
> > <snip page allocator out-of-memory message>
> > SLUB: Unable to allocate memory on node -1 (gfp=20)
>
> node -1 is an implementation detail. Can it print "current" instead? No
> biggie, I know what it means and I suppose anyone debugging an allocation
> failure will know too.
I'd rather keep it as-is because as you say anyone debugging an
allocation failure will know what it means.
On Thu, 2009-06-11 at 10:47 +0100, Mel Gorman wrote:
> gfp is in hex right so gfp=0x20? gfp=20 might have someone thinking it's
> the decimal value.
Fixed. Thanks!
Pekka
Acked-by: Christoph Lameter <[email protected]>
On Thu, 11 Jun 2009, Pekka J Enberg wrote:
> diff --git a/mm/slub.c b/mm/slub.c
> index 65ffda5..2bbacfc 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1484,6 +1484,56 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
> return 1;
> }
>
> +static int count_free(struct page *page)
> +{
> + return page->objects - page->inuse;
> +}
> +
> +static unsigned long count_partial(struct kmem_cache_node *n,
> + int (*get_count)(struct page *))
> +{
> + unsigned long flags;
> + unsigned long x = 0;
> + struct page *page;
> +
> + spin_lock_irqsave(&n->list_lock, flags);
> + list_for_each_entry(page, &n->partial, lru)
> + x += get_count(page);
> + spin_unlock_irqrestore(&n->list_lock, flags);
> + return x;
> +}
> +
> +static noinline void
> +slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
> +{
> + int node;
> +
> + printk(KERN_WARNING
> + "SLUB: Unable to allocate memory on node %d (gfp=%x)\n",
> + nid, gfpflags);
> + printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
> + "default order: %d, min order: %d\n", s->name, s->objsize,
> + s->size, oo_order(s->oo), oo_order(s->min));
> +
> + for_each_online_node(node) {
> + struct kmem_cache_node *n = get_node(s, node);
> + unsigned long nr_slabs;
> + unsigned long nr_objs;
> + unsigned long nr_free;
> +
> + if (!n)
> + continue;
> +
> + nr_slabs = atomic_long_read(&n->nr_slabs);
> + nr_objs = atomic_long_read(&n->total_objects);
This won't compile unless CONFIG_SLUB_DEBUG is enabled.
Perhaps slab_out_of_memory()'s partial list scan should only be declared
for CONFIG_SLUB_DEBUG? Otherwise, nr_slabs and nr_objs will always be 0
since we don't increment them and nr_free will appear out of sync. I
suspect debuggers will be asking if CONFIG_SLUB_DEBUG is enabled often in
such scenarios.
> + nr_free = count_partial(n, count_free);
> +
> + printk(KERN_WARNING
> + " node %d: slabs: %ld, objs: %ld, free: %ld\n",
> + node, nr_slabs, nr_objs, nr_free);
> + }
> +}
> +
> /*
> * Slow path. The lockless freelist is empty or we need to perform
> * debugging duties.
> @@ -1565,6 +1615,7 @@ new_slab:
> c->page = new;
> goto load_freelist;
> }
> + slab_out_of_memory(s, gfpflags, node);
I assume this is being added to __slab_alloc() and not new_slab() because
you need to reenable irqs before using count_partial()?
Do you really need to move count_partial() and count_free() out from under
CONFIG_SLUB_DEBUG? Couldn't you just add this to new_slab():
out:
if (!page)
slab_out_of_memory(s, flags, node);
return page;
and then add your own partial list scanner in slab_out_of_memory():
nr_free = 0;
spin_lock(&n->list_lock);
list_for_each_entry(page, &n->partial, lru)
nr_free += page->objects - page->inuse;
spin_unlock(&n->list_lock);