Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1763033AbXHFKqK (ORCPT ); Mon, 6 Aug 2007 06:46:10 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S932370AbXHFKom (ORCPT ); Mon, 6 Aug 2007 06:44:42 -0400 Received: from mx1.redhat.com ([66.187.233.31]:57522 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932337AbXHFKoj (ORCPT ); Mon, 6 Aug 2007 06:44:39 -0400 Message-Id: <20070806103658.603735000@chello.nl> References: <20070806102922.907530000@chello.nl> User-Agent: quilt/0.45-1 Date: Mon, 06 Aug 2007 12:29:26 +0200 From: Peter Zijlstra To: linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: Peter Zijlstra , David Miller , Andrew Morton , Daniel Phillips , Pekka Enberg , Christoph Lameter , Matt Mackall , Lee Schermerhorn , Steve Dickson Subject: [PATCH 04/10] mm: slub: add knowledge of reserve pages Content-Disposition: inline; filename=reserve-slab.patch Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 5073 Lines: 192 Restrict objects from reserve slabs (ALLOC_NO_WATERMARKS) to allocation contexts that are entitled to it. Care is taken to only touch the SLUB slow path. Because the reserve threshold is system wide (by virtue of the previous patches) we can do with a single kmem_cache wide state. Signed-off-by: Peter Zijlstra Cc: Christoph Lameter --- include/linux/slub_def.h | 2 + mm/slub.c | 75 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 70 insertions(+), 7 deletions(-) Index: linux-2.6-2/include/linux/slub_def.h =================================================================== --- linux-2.6-2.orig/include/linux/slub_def.h +++ linux-2.6-2/include/linux/slub_def.h @@ -50,6 +50,8 @@ struct kmem_cache { struct kobject kobj; /* For sysfs */ #endif + struct page *reserve_slab; + #ifdef CONFIG_NUMA int defrag_ratio; struct kmem_cache_node *node[MAX_NUMNODES]; Index: linux-2.6-2/mm/slub.c =================================================================== --- linux-2.6-2.orig/mm/slub.c +++ linux-2.6-2/mm/slub.c @@ -20,11 +20,13 @@ #include #include #include +#include "internal.h" /* * Lock order: - * 1. slab_lock(page) - * 2. slab->list_lock + * 1. reserve_lock + * 2. slab_lock(page) + * 3. node->list_lock * * The slab_lock protects operations on the object of a particular * slab and its metadata in the page struct. If the slab lock @@ -258,6 +260,8 @@ static inline int sysfs_slab_alias(struc static inline void sysfs_slab_remove(struct kmem_cache *s) {} #endif +static DEFINE_SPINLOCK(reserve_lock); + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -1069,7 +1073,7 @@ static void setup_object(struct kmem_cac s->ctor(object, s, 0); } -static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) +static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *reserve) { struct page *page; struct kmem_cache_node *n; @@ -1087,6 +1091,7 @@ static struct page *new_slab(struct kmem if (!page) goto out; + *reserve = page->reserve; n = get_node(s, page_to_nid(page)); if (n) atomic_long_inc(&n->nr_slabs); @@ -1457,6 +1462,7 @@ static void *__slab_alloc(struct kmem_ca { void **object; int cpu = smp_processor_id(); + int reserve = 0; if (!page) goto new_slab; @@ -1486,10 +1492,25 @@ new_slab: if (page) { s->cpu_slab[cpu] = page; goto load_freelist; - } + } else if (unlikely(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS)) + goto try_reserve; - page = new_slab(s, gfpflags, node); - if (page) { +alloc_slab: + page = new_slab(s, gfpflags, node, &reserve); + if (page && !reserve) { + if (unlikely(s->reserve_slab)) { + struct page *reserve; + + spin_lock(&reserve_lock); + reserve = s->reserve_slab; + s->reserve_slab = NULL; + spin_unlock(&reserve_lock); + + if (reserve) { + slab_lock(reserve); + unfreeze_slab(s, reserve); + } + } cpu = smp_processor_id(); if (s->cpu_slab[cpu]) { /* @@ -1517,6 +1538,18 @@ new_slab: SetSlabFrozen(page); s->cpu_slab[cpu] = page; goto load_freelist; + } else if (page) { + spin_lock(&reserve_lock); + if (s->reserve_slab) { + discard_slab(s, page); + page = s->reserve_slab; + goto got_reserve; + } + slab_lock(page); + SetSlabFrozen(page); + s->reserve_slab = page; + spin_unlock(&reserve_lock); + goto use_reserve; } return NULL; debug: @@ -1528,6 +1561,31 @@ debug: page->freelist = object[page->offset]; slab_unlock(page); return object; + +try_reserve: + spin_lock(&reserve_lock); + page = s->reserve_slab; + if (!page) { + spin_unlock(&reserve_lock); + goto alloc_slab; + } + +got_reserve: + slab_lock(page); + if (!page->freelist) { + s->reserve_slab = NULL; + spin_unlock(&reserve_lock); + unfreeze_slab(s, page); + goto alloc_slab; + } + spin_unlock(&reserve_lock); + +use_reserve: + object = page->freelist; + page->inuse++; + page->freelist = object[page->offset]; + slab_unlock(page); + return object; } /* @@ -1872,10 +1930,11 @@ static struct kmem_cache_node * __init e { struct page *page; struct kmem_cache_node *n; + int reserve; BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); + page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &reserve); BUG_ON(!page); n = page->freelist; @@ -2091,6 +2150,8 @@ static int kmem_cache_open(struct kmem_c s->defrag_ratio = 100; #endif + s->reserve_slab = NULL; + if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) return 1; error: -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/