This is a bit of a different tack on things than the last version provided
by Mathieu.
Instead of using a cmpxchg we keep a state variable in the per cpu structure
that is incremented when we enter the hot path. We can then detect that
a thread is in the fastpath. For recursive calling scenarios we can fallback
to alternate allocation / free techniques that bypass fastpath caching.
A disadvantage is that we have to disable preempt. But if preemt is disabled
(like on most kernels that I run) then the hotpath becomes very efficient.
WARNING: Very experimental
It would be good to compare against an update of Mathieu's latest which
implemented pointer versioning to avoid even disabling preemption.
Cc: Mathieu Desnoyers <[email protected]>
Cc: Pekka Enberg <[email protected]>
Signed-off-by: Christoph Lameter <[email protected]>
---
include/linux/slub_def.h | 1
mm/slub.c | 91 +++++++++++++++++++++++++++++++++++++----------
2 files changed, 74 insertions(+), 18 deletions(-)
Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h 2009-10-01 15:53:15.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h 2009-10-01 15:53:15.000000000 -0500
@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
void **freelist; /* Pointer to first free per cpu object */
struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */
+ int active; /* Active fastpaths */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2009-10-01 15:53:15.000000000 -0500
+++ linux-2.6/mm/slub.c 2009-10-01 15:53:15.000000000 -0500
@@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
unsigned long addr)
{
void **object;
- struct page *page = __this_cpu_read(s->cpu_slab->page);
+ struct page *page;
+ unsigned long flags;
+ int hotpath;
+
+ local_irq_save(flags);
+ preempt_enable(); /* Get rid of count */
+ hotpath = __this_cpu_read(s->cpu_slab->active) != 0;
+ page = __this_cpu_read(s->cpu_slab->page);
/* We handle __GFP_ZERO in the caller */
gfpflags &= ~__GFP_ZERO;
@@ -1626,13 +1633,21 @@ load_freelist:
goto another_slab;
if (unlikely(SLABDEBUG && PageSlubDebug(page)))
goto debug;
-
- __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
- page->inuse = page->objects;
- page->freelist = NULL;
- __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+ if (unlikely(hotpath)) {
+ /* Object on second free list available and hotpath busy */
+ page->inuse++;
+ page->freelist = get_freepointer(s, object);
+ } else {
+ /* Prepare new list of objects for hotpath */
+ __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
+ page->inuse = page->objects;
+ page->freelist = NULL;
+ __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+ }
unlock_out:
+ __this_cpu_dec(s->cpu_slab->active);
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, ALLOC_SLOWPATH);
return object;
@@ -1642,8 +1657,12 @@ another_slab:
new_slab:
page = get_partial(s, gfpflags, node);
if (page) {
- __this_cpu_write(s->cpu_slab->page, page);
stat(s, ALLOC_FROM_PARTIAL);
+
+ if (hotpath)
+ goto hot_lock;
+
+ __this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}
@@ -1657,6 +1676,10 @@ new_slab:
if (page) {
stat(s, ALLOC_SLAB);
+
+ if (hotpath)
+ goto hot_no_lock;
+
if (__this_cpu_read(s->cpu_slab->page))
flush_slab(s, __this_cpu_ptr(s->cpu_slab));
slab_lock(page);
@@ -1664,6 +1687,10 @@ new_slab:
__this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}
+
+ __this_cpu_dec(s->cpu_slab->active);
+ local_irq_restore(flags);
+
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
return NULL;
@@ -1675,6 +1702,19 @@ debug:
page->freelist = get_freepointer(s, object);
__this_cpu_write(s->cpu_slab->node, -1);
goto unlock_out;
+
+ /*
+ * Hotpath is busy and we need to avoid touching
+ * hotpath variables
+ */
+hot_no_lock:
+ slab_lock(page);
+hot_lock:
+ __ClearPageSlubFrozen(page);
+ if (get_freepointer(s, page->freelist))
+ /* Cannot put page into the hotpath. Instead back to partial */
+ add_partial(get_node(s, page_to_nid(page)), page, 0);
+ goto load_freelist;
}
/*
@@ -1691,7 +1731,6 @@ static __always_inline void *slab_alloc(
gfp_t gfpflags, int node, unsigned long addr)
{
void **object;
- unsigned long flags;
gfpflags &= gfp_allowed_mask;
@@ -1701,19 +1740,21 @@ static __always_inline void *slab_alloc(
if (should_failslab(s->objsize, gfpflags))
return NULL;
- local_irq_save(flags);
+ preempt_disable();
+ irqsafe_cpu_inc(s->cpu_slab->active);
object = __this_cpu_read(s->cpu_slab->freelist);
- if (unlikely(!object || !node_match(s, node)))
+ if (unlikely(!object || !node_match(s, node) ||
+ __this_cpu_read(s->cpu_slab->active)))
object = __slab_alloc(s, gfpflags, node, addr);
else {
__this_cpu_write(s->cpu_slab->freelist,
get_freepointer(s, object));
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
stat(s, ALLOC_FASTPATH);
}
- local_irq_restore(flags);
-
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, s->objsize);
@@ -1777,6 +1818,11 @@ static void __slab_free(struct kmem_cach
{
void *prior;
void **object = (void *)x;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ preempt_enable(); /* Fix up count */
+ __this_cpu_dec(s->cpu_slab->active);
stat(s, FREE_SLOWPATH);
slab_lock(page);
@@ -1809,6 +1855,7 @@ checks_ok:
out_unlock:
slab_unlock(page);
+ local_irq_restore(flags);
return;
slab_empty:
@@ -1820,6 +1867,7 @@ slab_empty:
stat(s, FREE_REMOVE_PARTIAL);
}
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
return;
@@ -1845,24 +1893,26 @@ static __always_inline void slab_free(st
struct page *page, void *x, unsigned long addr)
{
void **object = (void *)x;
- unsigned long flags;
kmemleak_free_recursive(x, s->flags);
- local_irq_save(flags);
kmemcheck_slab_free(s, object, s->objsize);
debug_check_no_locks_freed(object, s->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(object, s->objsize);
+ preempt_disable();
+ irqsafe_cpu_inc(s->cpu_slab->active);
if (likely(page == __this_cpu_read(s->cpu_slab->page) &&
- __this_cpu_read(s->cpu_slab->node) >= 0)) {
- set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist));
+ __this_cpu_read(s->cpu_slab->node) >= 0) &&
+ !__this_cpu_read(s->cpu_slab->active)) {
+ set_freepointer(s, object,
+ __this_cpu_read(s->cpu_slab->freelist));
__this_cpu_write(s->cpu_slab->freelist, object);
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
stat(s, FREE_FASTPATH);
} else
__slab_free(s, page, x, addr);
-
- local_irq_restore(flags);
}
void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2064,6 +2114,8 @@ static DEFINE_PER_CPU(struct kmem_cache_
static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
{
+ int cpu;
+
if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
/*
* Boot time creation of the kmalloc array. Use static per cpu data
@@ -2073,6 +2125,9 @@ static inline int alloc_kmem_cache_cpus(
else
s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(s->cpu_slab, cpu)->active = -1;
+
if (!s->cpu_slab)
return 0;
--