Date: Thu, 8 Nov 2007 15:12:49 +0000
To: Christoph Lameter <clameter@sgi.com>
Cc: akpm@linux-foundatin.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [patch 12/23] SLUB: Trigger defragmentation from memory reclaim
Message-ID: <20071108151249.GE2591@skynet.ie>
References: <20071107011130.382244340@sgi.com> <20071107011229.423714790@sgi.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=iso-8859-15
Content-Disposition: inline
In-Reply-To: <20071107011229.423714790@sgi.com>
User-Agent: Mutt/1.5.13 (2006-08-11)
From: mel@skynet.ie (Mel Gorman)
Sender: linux-kernel-owner@vger.kernel.org
Content-Length: 6970
Lines: 168

On (06/11/07 17:11), Christoph Lameter didst pronounce:
> This patch triggers slab defragmentation from memory reclaim.
> The logical point for this is after slab shrinking was performed in
> vmscan.c. At that point the fragmentation ratio of a slab was increased
> because objects were freed via the LRUs. So we call kmem_cache_defrag from
> there.
> 
> slab_shrink() from vmscan.c is called in some contexts to do
> global shrinking of slabs and in others to do shrinking for
> a particular zone. Pass the zone to slab_shrink, so that slab_shrink
> can call kmem_cache_defrag() and restrict the defragmentation to
> the node that is under memory pressure.
> 
> Reviewed-by: Rik van Riel <riel@redhat.com>
> Signed-off-by: Christoph Lameter <clameter@sgi.com>
> ---
>  fs/drop_caches.c     |    2 +-
>  include/linux/mm.h   |    2 +-
>  include/linux/slab.h |    1 +
>  mm/vmscan.c          |   26 +++++++++++++++++++-------
>  4 files changed, 22 insertions(+), 9 deletions(-)
> 
> Index: linux-2.6/fs/drop_caches.c
> ===================================================================
> --- linux-2.6.orig/fs/drop_caches.c	2007-08-29 19:30:53.000000000 -0700
> +++ linux-2.6/fs/drop_caches.c	2007-11-06 12:53:40.000000000 -0800
> @@ -50,7 +50,7 @@ void drop_slab(void)
>  	int nr_objects;
>  
>  	do {
> -		nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
> +		nr_objects = shrink_slab(1000, GFP_KERNEL, 1000, NULL);
>  	} while (nr_objects > 10);
>  }
>  
> Index: linux-2.6/include/linux/mm.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm.h	2007-11-06 12:33:55.000000000 -0800
> +++ linux-2.6/include/linux/mm.h	2007-11-06 12:54:11.000000000 -0800
> @@ -1118,7 +1118,7 @@ int in_gate_area_no_task(unsigned long a
>  int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
>  					void __user *, size_t *, loff_t *);
>  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
> -			unsigned long lru_pages);
> +			unsigned long lru_pages, struct zone *z);
>  void drop_pagecache(void);
>  void drop_slab(void);
>  
> Index: linux-2.6/include/linux/slab.h
> ===================================================================
> --- linux-2.6.orig/include/linux/slab.h	2007-11-06 12:37:51.000000000 -0800
> +++ linux-2.6/include/linux/slab.h	2007-11-06 12:53:40.000000000 -0800
> @@ -63,6 +63,7 @@ void kmem_cache_free(struct kmem_cache *
>  unsigned int kmem_cache_size(struct kmem_cache *);
>  const char *kmem_cache_name(struct kmem_cache *);
>  int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr);
> +int kmem_cache_defrag(int node);
>  
>  /*
>   * Please use this macro to create slab caches. Simply specify the
> Index: linux-2.6/mm/vmscan.c
> ===================================================================
> --- linux-2.6.orig/mm/vmscan.c	2007-10-25 18:28:41.000000000 -0700
> +++ linux-2.6/mm/vmscan.c	2007-11-06 12:55:25.000000000 -0800
> @@ -150,10 +150,18 @@ EXPORT_SYMBOL(unregister_shrinker);
>   * are eligible for the caller's allocation attempt.  It is used for balancing
>   * slab reclaim versus page reclaim.
>   *
> + * zone is the zone for which we are shrinking the slabs. If the intent
> + * is to do a global shrink then zone may be NULL. Specification of a
> + * zone is currently only used to limit slab defragmentation to a NUMA node.
> + * The performace of shrink_slab would be better (in particular under NUMA)
> + * if it could be targeted as a whole to the zone that is under memory
> + * pressure but the VFS infrastructure does not allow that at the present
> + * time.
> + *
>   * Returns the number of slab objects which we shrunk.
>   */
>  unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
> -			unsigned long lru_pages)
> +			unsigned long lru_pages, struct zone *zone)
>  {
>  	struct shrinker *shrinker;
>  	unsigned long ret = 0;
> @@ -210,6 +218,8 @@ unsigned long shrink_slab(unsigned long 
>  		shrinker->nr += total_scan;
>  	}
>  	up_read(&shrinker_rwsem);
> +	if (gfp_mask & __GFP_FS)
> +		kmem_cache_defrag(zone ? zone_to_nid(zone) : -1);

Does this make an assumption that only filesystem-related slabs may be
targetted for reclaim? What if there is a slab that can free its objects
without ever caring about a filesystem?

>  	return ret;
>  }
>  
> @@ -1241,7 +1251,7 @@ unsigned long try_to_free_pages(struct z
>  		if (!priority)
>  			disable_swap_token();
>  		nr_reclaimed += shrink_zones(priority, zones, &sc);
> -		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
> +		shrink_slab(sc.nr_scanned, gfp_mask, lru_pages, NULL);
>  		if (reclaim_state) {
>  			nr_reclaimed += reclaim_state->reclaimed_slab;
>  			reclaim_state->reclaimed_slab = 0;
> @@ -1419,7 +1429,7 @@ loop_again:
>  				nr_reclaimed += shrink_zone(priority, zone, &sc);
>  			reclaim_state->reclaimed_slab = 0;
>  			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
> -						lru_pages);
> +						lru_pages, zone);
>  			nr_reclaimed += reclaim_state->reclaimed_slab;
>  			total_scanned += sc.nr_scanned;
>  			if (zone_is_all_unreclaimable(zone))
> @@ -1658,7 +1668,7 @@ unsigned long shrink_all_memory(unsigned
>  	/* If slab caches are huge, it's better to hit them first */
>  	while (nr_slab >= lru_pages) {
>  		reclaim_state.reclaimed_slab = 0;
> -		shrink_slab(nr_pages, sc.gfp_mask, lru_pages);
> +		shrink_slab(nr_pages, sc.gfp_mask, lru_pages, NULL);
>  		if (!reclaim_state.reclaimed_slab)
>  			break;
>  
> @@ -1696,7 +1706,7 @@ unsigned long shrink_all_memory(unsigned
>  
>  			reclaim_state.reclaimed_slab = 0;
>  			shrink_slab(sc.nr_scanned, sc.gfp_mask,
> -					count_lru_pages());
> +					count_lru_pages(), NULL);
>  			ret += reclaim_state.reclaimed_slab;
>  			if (ret >= nr_pages)
>  				goto out;
> @@ -1713,7 +1723,8 @@ unsigned long shrink_all_memory(unsigned
>  	if (!ret) {
>  		do {
>  			reclaim_state.reclaimed_slab = 0;
> -			shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
> +			shrink_slab(nr_pages, sc.gfp_mask,
> +					count_lru_pages(), NULL);
>  			ret += reclaim_state.reclaimed_slab;
>  		} while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
>  	}
> @@ -1875,7 +1886,8 @@ static int __zone_reclaim(struct zone *z
>  		 * Note that shrink_slab will free memory on all zones and may
>  		 * take a long time.
>  		 */
> -		while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
> +		while (shrink_slab(sc.nr_scanned, gfp_mask, order,
> +						zone) &&
>  			zone_page_state(zone, NR_SLAB_RECLAIMABLE) >
>  				slab_reclaimable - nr_pages)
>  			;
> 
> -- 
> 

-- 
-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/