LinuxLists.cc - [patch 12/23] SLUB: Trigger defragmentation from memory reclaim

2007-11-07 01:16:39

Subject: [patch 12/23] SLUB: Trigger defragmentation from memory reclaim

This patch triggers slab defragmentation from memory reclaim.
The logical point for this is after slab shrinking was performed in
vmscan.c. At that point the fragmentation ratio of a slab was increased
because objects were freed via the LRUs. So we call kmem_cache_defrag from
there.

slab_shrink() from vmscan.c is called in some contexts to do
global shrinking of slabs and in others to do shrinking for
a particular zone. Pass the zone to slab_shrink, so that slab_shrink
can call kmem_cache_defrag() and restrict the defragmentation to
the node that is under memory pressure.

Reviewed-by: Rik van Riel <[email protected]>
Signed-off-by: Christoph Lameter <[email protected]>
---
fs/drop_caches.c | 2 +-
include/linux/mm.h | 2 +-
include/linux/slab.h | 1 +
mm/vmscan.c | 26 +++++++++++++++++++-------
4 files changed, 22 insertions(+), 9 deletions(-)

Index: linux-2.6/fs/drop_caches.c
===================================================================
--- linux-2.6.orig/fs/drop_caches.c 2007-08-29 19:30:53.000000000 -0700
+++ linux-2.6/fs/drop_caches.c 2007-11-06 12:53:40.000000000 -0800
@@ -50,7 +50,7 @@ void drop_slab(void)
int nr_objects;

do {
- nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+ nr_objects = shrink_slab(1000, GFP_KERNEL, 1000, NULL);
} while (nr_objects > 10);
}

Index: linux-2.6/include/linux/mm.h
===================================================================
--- linux-2.6.orig/include/linux/mm.h 2007-11-06 12:33:55.000000000 -0800
+++ linux-2.6/include/linux/mm.h 2007-11-06 12:54:11.000000000 -0800
@@ -1118,7 +1118,7 @@ int in_gate_area_no_task(unsigned long a
int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
- unsigned long lru_pages);
+ unsigned long lru_pages, struct zone *z);
void drop_pagecache(void);
void drop_slab(void);

Index: linux-2.6/include/linux/slab.h
===================================================================
--- linux-2.6.orig/include/linux/slab.h 2007-11-06 12:37:51.000000000 -0800
+++ linux-2.6/include/linux/slab.h 2007-11-06 12:53:40.000000000 -0800
@@ -63,6 +63,7 @@ void kmem_cache_free(struct kmem_cache *
unsigned int kmem_cache_size(struct kmem_cache *);
const char *kmem_cache_name(struct kmem_cache *);
int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
+int kmem_cache_defrag(int node);

/*
* Please use this macro to create slab caches. Simply specify the
Index: linux-2.6/mm/vmscan.c
===================================================================
--- linux-2.6.orig/mm/vmscan.c 2007-10-25 18:28:41.000000000 -0700
+++ linux-2.6/mm/vmscan.c 2007-11-06 12:55:25.000000000 -0800
@@ -150,10 +150,18 @@ EXPORT_SYMBOL(unregister_shrinker);
* are eligible for the caller's allocation attempt. It is used for balancing
* slab reclaim versus page reclaim.
*
+ * zone is the zone for which we are shrinking the slabs. If the intent
+ * is to do a global shrink then zone may be NULL. Specification of a
+ * zone is currently only used to limit slab defragmentation to a NUMA node.
+ * The performace of shrink_slab would be better (in particular under NUMA)
+ * if it could be targeted as a whole to the zone that is under memory
+ * pressure but the VFS infrastructure does not allow that at the present
+ * time.
+ *
* Returns the number of slab objects which we shrunk.
*/
unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
- unsigned long lru_pages)
+ unsigned long lru_pages, struct zone *zone)
{
struct shrinker *shrinker;
unsigned long ret = 0;
@@ -210,6 +218,8 @@ unsigned long shrink_slab(unsigned long
shrinker->nr += total_scan;
}
up_read(&shrinker_rwsem);
+ if (gfp_mask & __GFP_FS)
+ kmem_cache_defrag(zone ? zone_to_nid(zone) : -1);
return ret;
}

@@ -1241,7 +1251,7 @@ unsigned long try_to_free_pages(struct z
if (!priority)
disable_swap_token();
nr_reclaimed += shrink_zones(priority, zones, &sc);
- shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
+ shrink_slab(sc.nr_scanned, gfp_mask, lru_pages, NULL);
if (reclaim_state) {
nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
@@ -1419,7 +1429,7 @@ loop_again:
nr_reclaimed += shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
- lru_pages);
+ lru_pages, zone);
nr_reclaimed += reclaim_state->reclaimed_slab;
total_scanned += sc.nr_scanned;
if (zone_is_all_unreclaimable(zone))
@@ -1658,7 +1668,7 @@ unsigned long shrink_all_memory(unsigned
/* If slab caches are huge, it's better to hit them first */
while (nr_slab >= lru_pages) {
reclaim_state.reclaimed_slab = 0;
- shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
+ shrink_slab(nr_pages, sc.gfp_mask, lru_pages, NULL);
if (!reclaim_state.reclaimed_slab)
break;

@@ -1696,7 +1706,7 @@ unsigned long shrink_all_memory(unsigned

reclaim_state.reclaimed_slab = 0;
shrink_slab(sc.nr_scanned, sc.gfp_mask,
- count_lru_pages());
+ count_lru_pages(), NULL);
ret += reclaim_state.reclaimed_slab;
if (ret >= nr_pages)
goto out;
@@ -1713,7 +1723,8 @@ unsigned long shrink_all_memory(unsigned
if (!ret) {
do {
reclaim_state.reclaimed_slab = 0;
- shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
+ shrink_slab(nr_pages, sc.gfp_mask,
+ count_lru_pages(), NULL);
ret += reclaim_state.reclaimed_slab;
} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
}
@@ -1875,7 +1886,8 @@ static int __zone_reclaim(struct zone *z
* Note that shrink_slab will free memory on all zones and may
* take a long time.
*/
- while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
+ while (shrink_slab(sc.nr_scanned, gfp_mask, order,
+ zone) &&
zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
slab_reclaimable - nr_pages)
;

--

2007-11-07 09:30:00

by Johannes Weiner

[permalink] [raw]

Subject: Re: [patch 12/23] SLUB: Trigger defragmentation from memory reclaim

Hi Christoph,

On Tue, Nov 06, 2007 at 05:11:42PM -0800, Christoph Lameter wrote:
> Index: linux-2.6/include/linux/slab.h
> ===================================================================
> --- linux-2.6.orig/include/linux/slab.h 2007-11-06 12:37:51.000000000 -0800
> +++ linux-2.6/include/linux/slab.h 2007-11-06 12:53:40.000000000 -0800
> @@ -63,6 +63,7 @@ void kmem_cache_free(struct kmem_cache *
> unsigned int kmem_cache_size(struct kmem_cache *);
> const char *kmem_cache_name(struct kmem_cache *);
> int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
> +int kmem_cache_defrag(int node);

The definition in slab.c always returns 0. Wouldn't a static inline
function in the header be better?

> * Returns the number of slab objects which we shrunk.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> */
> unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
> - unsigned long lru_pages)
> + unsigned long lru_pages, struct zone *zone)
> {
> struct shrinker *shrinker;
> unsigned long ret = 0;
> @@ -210,6 +218,8 @@ unsigned long shrink_slab(unsigned long
> shrinker->nr += total_scan;
> }
> up_read(&shrinker_rwsem);
> + if (gfp_mask & __GFP_FS)
> + kmem_cache_defrag(zone ? zone_to_nid(zone) : -1);
> return ret;
> }

What about the objects that kmem_cache_defrag() releases? Shouldn't
they be counted too?

ret += kmem_cache_defrag(...)

Or am I overseeing something here?

Hannes

2007-11-07 18:34:54

by Christoph Lameter

[permalink] [raw]

Subject: Re: [patch 12/23] SLUB: Trigger defragmentation from memory reclaim

On Wed, 7 Nov 2007, Johannes Weiner wrote:

> > @@ -210,6 +218,8 @@ unsigned long shrink_slab(unsigned long
> > shrinker->nr += total_scan;
> > }
> > up_read(&shrinker_rwsem);
> > + if (gfp_mask & __GFP_FS)
> > + kmem_cache_defrag(zone ? zone_to_nid(zone) : -1);
> > return ret;
> > }
>
> What about the objects that kmem_cache_defrag() releases? Shouldn't
> they be counted too?
>
> ret += kmem_cache_defrag(...)
>
> Or am I overseeing something here?

kmem_cache_defrag returns the number of pages that were released by defrag
actions.

shrink_slab returns the number of objects released by the shrinkers.
kmem_cache_defrag has no way of knowing how many objects where released by
the kick methods. The kick method may have chosen to reallocate the
object.

2007-11-08 15:13:00

by mel

[permalink] [raw]

Subject: Re: [patch 12/23] SLUB: Trigger defragmentation from memory reclaim

On (06/11/07 17:11), Christoph Lameter didst pronounce:
> This patch triggers slab defragmentation from memory reclaim.
> The logical point for this is after slab shrinking was performed in
> vmscan.c. At that point the fragmentation ratio of a slab was increased
> because objects were freed via the LRUs. So we call kmem_cache_defrag from
> there.
>
> slab_shrink() from vmscan.c is called in some contexts to do
> global shrinking of slabs and in others to do shrinking for
> a particular zone. Pass the zone to slab_shrink, so that slab_shrink
> can call kmem_cache_defrag() and restrict the defragmentation to
> the node that is under memory pressure.
>
> Reviewed-by: Rik van Riel <[email protected]>
> Signed-off-by: Christoph Lameter <[email protected]>
> ---
> fs/drop_caches.c | 2 +-
> include/linux/mm.h | 2 +-
> include/linux/slab.h | 1 +
> mm/vmscan.c | 26 +++++++++++++++++++-------
> 4 files changed, 22 insertions(+), 9 deletions(-)
>
> Index: linux-2.6/fs/drop_caches.c
> ===================================================================
> --- linux-2.6.orig/fs/drop_caches.c 2007-08-29 19:30:53.000000000 -0700
> +++ linux-2.6/fs/drop_caches.c 2007-11-06 12:53:40.000000000 -0800
> @@ -50,7 +50,7 @@ void drop_slab(void)
> int nr_objects;
>
> do {
> - nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
> + nr_objects = shrink_slab(1000, GFP_KERNEL, 1000, NULL);
> } while (nr_objects > 10);
> }
>
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h 2007-11-06 12:33:55.000000000 -0800
> +++ linux-2.6/include/linux/mm.h 2007-11-06 12:54:11.000000000 -0800
> @@ -1118,7 +1118,7 @@ int in_gate_area_no_task(unsigned long a
> int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
> void __user *, size_t *, loff_t *);
> unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
> - unsigned long lru_pages);
> + unsigned long lru_pages, struct zone *z);
> void drop_pagecache(void);
> void drop_slab(void);
>
> Index: linux-2.6/include/linux/slab.h
> ===================================================================
> --- linux-2.6.orig/include/linux/slab.h 2007-11-06 12:37:51.000000000 -0800
> +++ linux-2.6/include/linux/slab.h 2007-11-06 12:53:40.000000000 -0800
> @@ -63,6 +63,7 @@ void kmem_cache_free(struct kmem_cache *
> unsigned int kmem_cache_size(struct kmem_cache *);
> const char *kmem_cache_name(struct kmem_cache *);
> int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
> +int kmem_cache_defrag(int node);
>
> /*
> * Please use this macro to create slab caches. Simply specify the
> Index: linux-2.6/mm/vmscan.c
> ===================================================================
> --- linux-2.6.orig/mm/vmscan.c 2007-10-25 18:28:41.000000000 -0700
> +++ linux-2.6/mm/vmscan.c 2007-11-06 12:55:25.000000000 -0800
> @@ -150,10 +150,18 @@ EXPORT_SYMBOL(unregister_shrinker);
> * are eligible for the caller's allocation attempt. It is used for balancing
> * slab reclaim versus page reclaim.
> *
> + * zone is the zone for which we are shrinking the slabs. If the intent
> + * is to do a global shrink then zone may be NULL. Specification of a
> + * zone is currently only used to limit slab defragmentation to a NUMA node.
> + * The performace of shrink_slab would be better (in particular under NUMA)
> + * if it could be targeted as a whole to the zone that is under memory
> + * pressure but the VFS infrastructure does not allow that at the present
> + * time.
> + *
> * Returns the number of slab objects which we shrunk.
> */
> unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
> - unsigned long lru_pages)
> + unsigned long lru_pages, struct zone *zone)
> {
> struct shrinker *shrinker;
> unsigned long ret = 0;
> @@ -210,6 +218,8 @@ unsigned long shrink_slab(unsigned long
> shrinker->nr += total_scan;
> }
> up_read(&shrinker_rwsem);
> + if (gfp_mask & __GFP_FS)
> + kmem_cache_defrag(zone ? zone_to_nid(zone) : -1);

Does this make an assumption that only filesystem-related slabs may be
targetted for reclaim? What if there is a slab that can free its objects
without ever caring about a filesystem?

> return ret;
> }
>
> @@ -1241,7 +1251,7 @@ unsigned long try_to_free_pages(struct z
> if (!priority)
> disable_swap_token();
> nr_reclaimed += shrink_zones(priority, zones, &sc);
> - shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
> + shrink_slab(sc.nr_scanned, gfp_mask, lru_pages, NULL);
> if (reclaim_state) {
> nr_reclaimed += reclaim_state->reclaimed_slab;
> reclaim_state->reclaimed_slab = 0;
> @@ -1419,7 +1429,7 @@ loop_again:
> nr_reclaimed += shrink_zone(priority, zone, &sc);
> reclaim_state->reclaimed_slab = 0;
> nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
> - lru_pages);
> + lru_pages, zone);
> nr_reclaimed += reclaim_state->reclaimed_slab;
> total_scanned += sc.nr_scanned;
> if (zone_is_all_unreclaimable(zone))
> @@ -1658,7 +1668,7 @@ unsigned long shrink_all_memory(unsigned
> /* If slab caches are huge, it's better to hit them first */
> while (nr_slab >= lru_pages) {
> reclaim_state.reclaimed_slab = 0;
> - shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
> + shrink_slab(nr_pages, sc.gfp_mask, lru_pages, NULL);
> if (!reclaim_state.reclaimed_slab)
> break;
>
> @@ -1696,7 +1706,7 @@ unsigned long shrink_all_memory(unsigned
>
> reclaim_state.reclaimed_slab = 0;
> shrink_slab(sc.nr_scanned, sc.gfp_mask,
> - count_lru_pages());
> + count_lru_pages(), NULL);
> ret += reclaim_state.reclaimed_slab;
> if (ret >= nr_pages)
> goto out;
> @@ -1713,7 +1723,8 @@ unsigned long shrink_all_memory(unsigned
> if (!ret) {
> do {
> reclaim_state.reclaimed_slab = 0;
> - shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
> + shrink_slab(nr_pages, sc.gfp_mask,
> + count_lru_pages(), NULL);
> ret += reclaim_state.reclaimed_slab;
> } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
> }
> @@ -1875,7 +1886,8 @@ static int __zone_reclaim(struct zone *z
> * Note that shrink_slab will free memory on all zones and may
> * take a long time.
> */
> - while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
> + while (shrink_slab(sc.nr_scanned, gfp_mask, order,
> + zone) &&
> zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
> slab_reclaimable - nr_pages)
> ;
>
> --
>

--
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab

2007-11-08 19:00:42

by Christoph Lameter

[permalink] [raw]

Subject: Re: [patch 12/23] SLUB: Trigger defragmentation from memory reclaim

On Thu, 8 Nov 2007, Mel Gorman wrote:

> > up_read(&shrinker_rwsem);
> > + if (gfp_mask & __GFP_FS)
> > + kmem_cache_defrag(zone ? zone_to_nid(zone) : -1);
>
> Does this make an assumption that only filesystem-related slabs may be
> targetted for reclaim? What if there is a slab that can free its objects
> without ever caring about a filesystem?

Correct. Currently only filesystem related slabs support slab defragy.