2009-10-06 23:44:24

by Christoph Lameter

[permalink] [raw]
Subject: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

This is a bit of a different tack on things than the last version provided
by Matheiu.

Instead of using a cmpxchg we keep a state variable in the per cpu structure
that is incremented when we enter the hot path. We can then detect that
a thread is in the fastpath and fall back to alternate allocation / free
technique that bypasses fastpath caching.

A disadvantage is that we have to disable preempt. But if preemt is disabled
(like on most kernels that I run) then the hotpath becomes very efficient.

Cc: Mathieu Desnoyers <[email protected]>
Cc: Pekka Enberg <[email protected]>
Signed-off-by: Christoph Lameter <[email protected]>


---
include/linux/slub_def.h | 1
mm/slub.c | 91 +++++++++++++++++++++++++++++++++++++----------
2 files changed, 74 insertions(+), 18 deletions(-)

Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h 2009-10-01 15:53:15.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h 2009-10-01 15:53:15.000000000 -0500
@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
void **freelist; /* Pointer to first free per cpu object */
struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */
+ int active; /* Active fastpaths */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2009-10-01 15:53:15.000000000 -0500
+++ linux-2.6/mm/slub.c 2009-10-01 15:53:15.000000000 -0500
@@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
unsigned long addr)
{
void **object;
- struct page *page = __this_cpu_read(s->cpu_slab->page);
+ struct page *page;
+ unsigned long flags;
+ int hotpath;
+
+ local_irq_save(flags);
+ preempt_enable(); /* Get rid of count */
+ hotpath = __this_cpu_read(s->cpu_slab->active) != 0;
+ page = __this_cpu_read(s->cpu_slab->page);

/* We handle __GFP_ZERO in the caller */
gfpflags &= ~__GFP_ZERO;
@@ -1626,13 +1633,21 @@ load_freelist:
goto another_slab;
if (unlikely(SLABDEBUG && PageSlubDebug(page)))
goto debug;
-
- __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
- page->inuse = page->objects;
- page->freelist = NULL;
- __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+ if (unlikely(hotpath)) {
+ /* Object on second free list available and hotpath busy */
+ page->inuse++;
+ page->freelist = get_freepointer(s, object);
+ } else {
+ /* Prepare new list of objects for hotpath */
+ __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
+ page->inuse = page->objects;
+ page->freelist = NULL;
+ __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+ }
unlock_out:
+ __this_cpu_dec(s->cpu_slab->active);
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, ALLOC_SLOWPATH);
return object;

@@ -1642,8 +1657,12 @@ another_slab:
new_slab:
page = get_partial(s, gfpflags, node);
if (page) {
- __this_cpu_write(s->cpu_slab->page, page);
stat(s, ALLOC_FROM_PARTIAL);
+
+ if (hotpath)
+ goto hot_lock;
+
+ __this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}

@@ -1657,6 +1676,10 @@ new_slab:

if (page) {
stat(s, ALLOC_SLAB);
+
+ if (hotpath)
+ goto hot_no_lock;
+
if (__this_cpu_read(s->cpu_slab->page))
flush_slab(s, __this_cpu_ptr(s->cpu_slab));
slab_lock(page);
@@ -1664,6 +1687,10 @@ new_slab:
__this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}
+
+ __this_cpu_dec(s->cpu_slab->active);
+ local_irq_restore(flags);
+
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
return NULL;
@@ -1675,6 +1702,19 @@ debug:
page->freelist = get_freepointer(s, object);
__this_cpu_write(s->cpu_slab->node, -1);
goto unlock_out;
+
+ /*
+ * Hotpath is busy and we need to avoid touching
+ * hotpath variables
+ */
+hot_no_lock:
+ slab_lock(page);
+hot_lock:
+ __ClearPageSlubFrozen(page);
+ if (get_freepointer(s, page->freelist))
+ /* Cannot put page into the hotpath. Instead back to partial */
+ add_partial(get_node(s, page_to_nid(page)), page, 0);
+ goto load_freelist;
}

/*
@@ -1691,7 +1731,6 @@ static __always_inline void *slab_alloc(
gfp_t gfpflags, int node, unsigned long addr)
{
void **object;
- unsigned long flags;

gfpflags &= gfp_allowed_mask;

@@ -1701,19 +1740,21 @@ static __always_inline void *slab_alloc(
if (should_failslab(s->objsize, gfpflags))
return NULL;

- local_irq_save(flags);
+ preempt_disable();
+ irqsafe_cpu_inc(s->cpu_slab->active);
object = __this_cpu_read(s->cpu_slab->freelist);
- if (unlikely(!object || !node_match(s, node)))
+ if (unlikely(!object || !node_match(s, node) ||
+ __this_cpu_read(s->cpu_slab->active)))

object = __slab_alloc(s, gfpflags, node, addr);

else {
__this_cpu_write(s->cpu_slab->freelist,
get_freepointer(s, object));
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
stat(s, ALLOC_FASTPATH);
}
- local_irq_restore(flags);
-
if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, s->objsize);

@@ -1777,6 +1818,11 @@ static void __slab_free(struct kmem_cach
{
void *prior;
void **object = (void *)x;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ preempt_enable(); /* Fix up count */
+ __this_cpu_dec(s->cpu_slab->active);

stat(s, FREE_SLOWPATH);
slab_lock(page);
@@ -1809,6 +1855,7 @@ checks_ok:

out_unlock:
slab_unlock(page);
+ local_irq_restore(flags);
return;

slab_empty:
@@ -1820,6 +1867,7 @@ slab_empty:
stat(s, FREE_REMOVE_PARTIAL);
}
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
return;
@@ -1845,24 +1893,26 @@ static __always_inline void slab_free(st
struct page *page, void *x, unsigned long addr)
{
void **object = (void *)x;
- unsigned long flags;

kmemleak_free_recursive(x, s->flags);
- local_irq_save(flags);
kmemcheck_slab_free(s, object, s->objsize);
debug_check_no_locks_freed(object, s->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(object, s->objsize);

+ preempt_disable();
+ irqsafe_cpu_inc(s->cpu_slab->active);
if (likely(page == __this_cpu_read(s->cpu_slab->page) &&
- __this_cpu_read(s->cpu_slab->node) >= 0)) {
- set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist));
+ __this_cpu_read(s->cpu_slab->node) >= 0) &&
+ !__this_cpu_read(s->cpu_slab->active)) {
+ set_freepointer(s, object,
+ __this_cpu_read(s->cpu_slab->freelist));
__this_cpu_write(s->cpu_slab->freelist, object);
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
stat(s, FREE_FASTPATH);
} else
__slab_free(s, page, x, addr);
-
- local_irq_restore(flags);
}

void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2064,6 +2114,8 @@ static DEFINE_PER_CPU(struct kmem_cache_

static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
{
+ int cpu;
+
if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
/*
* Boot time creation of the kmalloc array. Use static per cpu data
@@ -2073,6 +2125,9 @@ static inline int alloc_kmem_cache_cpus(
else
s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);

+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(s->cpu_slab, cpu)->active = -1;
+
if (!s->cpu_slab)
return 0;


--


2009-10-07 02:55:25

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* [email protected] ([email protected]) wrote:
> This is a bit of a different tack on things than the last version provided
> by Matheiu.
>

Hi Christoph,

Btw, my name is "Math-ieu" ;) I'm not offended by you fat-fingering my
name, it's just rather funny. :)

Please see comments below,

> Instead of using a cmpxchg we keep a state variable in the per cpu structure
> that is incremented when we enter the hot path. We can then detect that
> a thread is in the fastpath and fall back to alternate allocation / free
> technique that bypasses fastpath caching.
>
> A disadvantage is that we have to disable preempt. But if preemt is disabled
> (like on most kernels that I run) then the hotpath becomes very efficient.
>
> Cc: Mathieu Desnoyers <[email protected]>
> Cc: Pekka Enberg <[email protected]>
> Signed-off-by: Christoph Lameter <[email protected]>
>
>
> ---
> include/linux/slub_def.h | 1
> mm/slub.c | 91 +++++++++++++++++++++++++++++++++++++----------
> 2 files changed, 74 insertions(+), 18 deletions(-)
>
> Index: linux-2.6/include/linux/slub_def.h
> ===================================================================
> --- linux-2.6.orig/include/linux/slub_def.h 2009-10-01 15:53:15.000000000 -0500
> +++ linux-2.6/include/linux/slub_def.h 2009-10-01 15:53:15.000000000 -0500
> @@ -38,6 +38,7 @@ struct kmem_cache_cpu {
> void **freelist; /* Pointer to first free per cpu object */
> struct page *page; /* The slab from which we are allocating */
> int node; /* The node of the page (or -1 for debug) */
> + int active; /* Active fastpaths */
> #ifdef CONFIG_SLUB_STATS
> unsigned stat[NR_SLUB_STAT_ITEMS];
> #endif
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c 2009-10-01 15:53:15.000000000 -0500
> +++ linux-2.6/mm/slub.c 2009-10-01 15:53:15.000000000 -0500
> @@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
> unsigned long addr)
> {
> void **object;
> - struct page *page = __this_cpu_read(s->cpu_slab->page);
> + struct page *page;
> + unsigned long flags;
> + int hotpath;
> +
> + local_irq_save(flags);
> + preempt_enable(); /* Get rid of count */

Ugh ? Is that legit ?

check preempt in irq off context might have weird side-effects on
scheduler real-time behavior (at least). You end up quitting a preempt
off section in irq off mode. So the sched check is skipped, and later
you only re-enable interrupts, which depends on having had a timer
interrupt waiting on the interrupt line to ensure scheduler timings.
But if the timer interrupt arrived while you were in the preempt off
section, you're doomed. The scheduler will not be woken up at interrupt
enable.

> + hotpath = __this_cpu_read(s->cpu_slab->active) != 0;
> + page = __this_cpu_read(s->cpu_slab->page);
>
> /* We handle __GFP_ZERO in the caller */
> gfpflags &= ~__GFP_ZERO;
> @@ -1626,13 +1633,21 @@ load_freelist:
> goto another_slab;
> if (unlikely(SLABDEBUG && PageSlubDebug(page)))
> goto debug;
> -
> - __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
> - page->inuse = page->objects;
> - page->freelist = NULL;
> - __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
> + if (unlikely(hotpath)) {
> + /* Object on second free list available and hotpath busy */
> + page->inuse++;
> + page->freelist = get_freepointer(s, object);
> + } else {
> + /* Prepare new list of objects for hotpath */
> + __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
> + page->inuse = page->objects;
> + page->freelist = NULL;
> + __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
> + }
> unlock_out:
> + __this_cpu_dec(s->cpu_slab->active);
> slab_unlock(page);
> + local_irq_restore(flags);
> stat(s, ALLOC_SLOWPATH);
> return object;
>
> @@ -1642,8 +1657,12 @@ another_slab:
> new_slab:
> page = get_partial(s, gfpflags, node);
> if (page) {
> - __this_cpu_write(s->cpu_slab->page, page);
> stat(s, ALLOC_FROM_PARTIAL);
> +
> + if (hotpath)
> + goto hot_lock;
> +
> + __this_cpu_write(s->cpu_slab->page, page);
> goto load_freelist;
> }
>
> @@ -1657,6 +1676,10 @@ new_slab:
>
> if (page) {
> stat(s, ALLOC_SLAB);
> +
> + if (hotpath)
> + goto hot_no_lock;
> +
> if (__this_cpu_read(s->cpu_slab->page))
> flush_slab(s, __this_cpu_ptr(s->cpu_slab));
> slab_lock(page);
> @@ -1664,6 +1687,10 @@ new_slab:
> __this_cpu_write(s->cpu_slab->page, page);
> goto load_freelist;
> }
> +
> + __this_cpu_dec(s->cpu_slab->active);
> + local_irq_restore(flags);
> +
> if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
> slab_out_of_memory(s, gfpflags, node);
> return NULL;
> @@ -1675,6 +1702,19 @@ debug:
> page->freelist = get_freepointer(s, object);
> __this_cpu_write(s->cpu_slab->node, -1);
> goto unlock_out;
> +
> + /*
> + * Hotpath is busy and we need to avoid touching
> + * hotpath variables
> + */
> +hot_no_lock:
> + slab_lock(page);
> +hot_lock:
> + __ClearPageSlubFrozen(page);
> + if (get_freepointer(s, page->freelist))
> + /* Cannot put page into the hotpath. Instead back to partial */
> + add_partial(get_node(s, page_to_nid(page)), page, 0);
> + goto load_freelist;
> }
>
> /*
> @@ -1691,7 +1731,6 @@ static __always_inline void *slab_alloc(
> gfp_t gfpflags, int node, unsigned long addr)
> {
> void **object;
> - unsigned long flags;
>
> gfpflags &= gfp_allowed_mask;
>
> @@ -1701,19 +1740,21 @@ static __always_inline void *slab_alloc(
> if (should_failslab(s->objsize, gfpflags))
> return NULL;
>
> - local_irq_save(flags);
> + preempt_disable();
> + irqsafe_cpu_inc(s->cpu_slab->active);
> object = __this_cpu_read(s->cpu_slab->freelist);
> - if (unlikely(!object || !node_match(s, node)))
> + if (unlikely(!object || !node_match(s, node) ||
> + __this_cpu_read(s->cpu_slab->active)))

Interesting approach ! I just wonder about the impact of the
irq off / preempt enable dance.

Mathieu

>
> object = __slab_alloc(s, gfpflags, node, addr);
>
> else {
> __this_cpu_write(s->cpu_slab->freelist,
> get_freepointer(s, object));
> + irqsafe_cpu_dec(s->cpu_slab->active);
> + preempt_enable();
> stat(s, ALLOC_FASTPATH);
> }
> - local_irq_restore(flags);
> -
> if (unlikely((gfpflags & __GFP_ZERO) && object))
> memset(object, 0, s->objsize);
>
> @@ -1777,6 +1818,11 @@ static void __slab_free(struct kmem_cach
> {
> void *prior;
> void **object = (void *)x;
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + preempt_enable(); /* Fix up count */
> + __this_cpu_dec(s->cpu_slab->active);
>
> stat(s, FREE_SLOWPATH);
> slab_lock(page);
> @@ -1809,6 +1855,7 @@ checks_ok:
>
> out_unlock:
> slab_unlock(page);
> + local_irq_restore(flags);
> return;
>
> slab_empty:
> @@ -1820,6 +1867,7 @@ slab_empty:
> stat(s, FREE_REMOVE_PARTIAL);
> }
> slab_unlock(page);
> + local_irq_restore(flags);
> stat(s, FREE_SLAB);
> discard_slab(s, page);
> return;
> @@ -1845,24 +1893,26 @@ static __always_inline void slab_free(st
> struct page *page, void *x, unsigned long addr)
> {
> void **object = (void *)x;
> - unsigned long flags;
>
> kmemleak_free_recursive(x, s->flags);
> - local_irq_save(flags);
> kmemcheck_slab_free(s, object, s->objsize);
> debug_check_no_locks_freed(object, s->objsize);
> if (!(s->flags & SLAB_DEBUG_OBJECTS))
> debug_check_no_obj_freed(object, s->objsize);
>
> + preempt_disable();
> + irqsafe_cpu_inc(s->cpu_slab->active);
> if (likely(page == __this_cpu_read(s->cpu_slab->page) &&
> - __this_cpu_read(s->cpu_slab->node) >= 0)) {
> - set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist));
> + __this_cpu_read(s->cpu_slab->node) >= 0) &&
> + !__this_cpu_read(s->cpu_slab->active)) {
> + set_freepointer(s, object,
> + __this_cpu_read(s->cpu_slab->freelist));
> __this_cpu_write(s->cpu_slab->freelist, object);
> + irqsafe_cpu_dec(s->cpu_slab->active);
> + preempt_enable();
> stat(s, FREE_FASTPATH);
> } else
> __slab_free(s, page, x, addr);
> -
> - local_irq_restore(flags);
> }
>
> void kmem_cache_free(struct kmem_cache *s, void *x)
> @@ -2064,6 +2114,8 @@ static DEFINE_PER_CPU(struct kmem_cache_
>
> static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
> {
> + int cpu;
> +
> if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
> /*
> * Boot time creation of the kmalloc array. Use static per cpu data
> @@ -2073,6 +2125,9 @@ static inline int alloc_kmem_cache_cpus(
> else
> s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
>
> + for_each_possible_cpu(cpu)
> + per_cpu_ptr(s->cpu_slab, cpu)->active = -1;
> +
> if (!s->cpu_slab)
> return 0;
>
>
> --

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-07 09:09:47

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Tue, 2009-10-06 at 22:54 -0400, Mathieu Desnoyers wrote:
> > + local_irq_save(flags);
> > + preempt_enable(); /* Get rid of count */
>
> Ugh ? Is that legit ?

Yeah, it reads rather awkward, and the comment doesn't make it any
better, but what he's doing is:

slab_alloc()
preempt_disable();
__slab_alloc()
local_irq_save(flags);
preempt_enable();

2009-10-07 12:47:08

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Peter Zijlstra ([email protected]) wrote:
> On Tue, 2009-10-06 at 22:54 -0400, Mathieu Desnoyers wrote:
> > > + local_irq_save(flags);
> > > + preempt_enable(); /* Get rid of count */
> >
> > Ugh ? Is that legit ?
>
> Yeah, it reads rather awkward, and the comment doesn't make it any
> better, but what he's doing is:
>
> slab_alloc()
> preempt_disable();
> __slab_alloc()
> local_irq_save(flags);
> preempt_enable();
>

Yes, I understood this is what he was doing, but I wonder about the
impact on the scheduler. If we have:

* Jiffy 1 -- timer interrupt

* preempt disable
* Jiffy 2 -- timer interrupt
-> here, the scheduler is disabled, so the timer interrupt is skipped.
The scheduler depends on preempt_check_resched() at preempt_enable()
to execute in a bounded amount of time.
* local_irq_save
* preempt_enable
-> interrupts are disabled, scheduler execution is skipped.
* local_irq_restore
-> the interrupt line is low. The scheduler won't be called. There is
no preempt_check_resched() call.

* Jiffy 3 -- timer interrupt
-> Scheduler finally gets executed, missing a whole jiffy.

At the very least, I think an explicit preempt_check_resched should be
added after local_irq_restore().

Also, preempt_enable here should be replaced with
preempt_enable_no_resched().

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-07 12:58:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 2009-10-07 at 08:46 -0400, Mathieu Desnoyers wrote:
> * local_irq_restore
> -> the interrupt line is low. The scheduler won't be called. There is
> no preempt_check_resched() call.

That would be an issue with all irq disable sections, so I don't think
this is actually true.

I tried to find the actual code, but frigging paravirt crap obfuscated
the code enough that I actually gave up.

2009-10-07 13:31:45

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Peter Zijlstra ([email protected]) wrote:
> On Wed, 2009-10-07 at 08:46 -0400, Mathieu Desnoyers wrote:
> > * local_irq_restore
> > -> the interrupt line is low. The scheduler won't be called. There is
> > no preempt_check_resched() call.
>
> That would be an issue with all irq disable sections, so I don't think
> this is actually true.
>

AFAIK, irq disable sections rely on the fact that if you get a timer
interrupt during this section, the timer interrupt line stays triggered
for the duration of the irqoff section. Therefore, when interrupts are
re-enabled, the interrupt kicks in, so does the scheduler.

This is not the case with the preempt/irqoff dance proposed by
Christoph.

> I tried to find the actual code, but frigging paravirt crap obfuscated
> the code enough that I actually gave up.

You're probably looking for:

arch/x86/include/asm/irqflags.h:

static inline void native_irq_enable(void)
{
asm volatile("sti": : :"memory");
}

and

static inline void native_restore_fl(unsigned long flags)
{
asm volatile("push %0 ; popf"
: /* no output */
:"g" (flags)
:"memory", "cc");
}

static inline void raw_local_irq_restore(unsigned long flags)
{
native_restore_fl(flags);
}

Which are as simple as it gets.

Thanks,

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-07 14:28:31

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 7 Oct 2009, Peter Zijlstra wrote:

> On Wed, 2009-10-07 at 08:46 -0400, Mathieu Desnoyers wrote:
> > * local_irq_restore
> > -> the interrupt line is low. The scheduler won't be called. There is
> > no preempt_check_resched() call.
>
> That would be an issue with all irq disable sections, so I don't think
> this is actually true.

This was not true in the past... News to me that an irq enable would not
be a scheduling point.

2009-10-07 14:35:23

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 2009-10-07 at 09:31 -0400, Mathieu Desnoyers wrote:
> * Peter Zijlstra ([email protected]) wrote:
> > On Wed, 2009-10-07 at 08:46 -0400, Mathieu Desnoyers wrote:
> > > * local_irq_restore
> > > -> the interrupt line is low. The scheduler won't be called. There is
> > > no preempt_check_resched() call.
> >
> > That would be an issue with all irq disable sections, so I don't think
> > this is actually true.
> >
>
> AFAIK, irq disable sections rely on the fact that if you get a timer
> interrupt during this section, the timer interrupt line stays triggered
> for the duration of the irqoff section. Therefore, when interrupts are
> re-enabled, the interrupt kicks in, so does the scheduler.
>
> This is not the case with the preempt/irqoff dance proposed by
> Christoph.

Ah, you're quite right indeed.

2009-10-07 15:05:08

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:

> Yes, I understood this is what he was doing, but I wonder about the
> impact on the scheduler. If we have:
>
> * Jiffy 1 -- timer interrupt
>
> * preempt disable
> * Jiffy 2 -- timer interrupt
> -> here, the scheduler is disabled, so the timer interrupt is skipped.
> The scheduler depends on preempt_check_resched() at preempt_enable()
> to execute in a bounded amount of time.

preempt disable does not disable interrupts. The timer interrupt will
occur. The scheduler may not reschedule another job on this processor
when the timer interrupt calls the scheduler_tick. It
may not do load balancing.

> Also, preempt_enable here should be replaced with
> preempt_enable_no_resched().

Used to have that in earlier incarnations but I saw a lot of these being
removed lately.


2009-10-07 15:03:37

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:
>
> > Yes, I understood this is what he was doing, but I wonder about the
> > impact on the scheduler. If we have:
> >
> > * Jiffy 1 -- timer interrupt
> >
> > * preempt disable
> > * Jiffy 2 -- timer interrupt
> > -> here, the scheduler is disabled, so the timer interrupt is skipped.
> > The scheduler depends on preempt_check_resched() at preempt_enable()
> > to execute in a bounded amount of time.
>
> preempt disable does not disable interrupts. The timer interrupt will
> occur. The scheduler may not reschedule another job on this processor
> when the timer interrupt calls the scheduler_tick. It
> may not do load balancing.

Yes. All you say here is true. I'm concerned about the _impact_ of this
along with the preempt/irqoff dance you propose. Trimming the following
key points from my execution scenario indeed skips the problem altogether.

Usually, when preemption is disabled, the scheduler restrain from
executing. *Now the important point*: the criterion that bounds the
maximum amount of time before the scheduler will re-check for pending
preemption is when preempt_enable() will re-activate preemption.

But because you run preempt_enable with interrupts off, the scheduler
check is not done. And it's not done when interrupts are re-activated
neither.

Please go back to my complete execution scenario, you'll probably see
the light. ;)

Thanks,

Mathieu

>
> > Also, preempt_enable here should be replaced with
> > preempt_enable_no_resched().
>
> Used to have that in earlier incarnations but I saw a lot of these being
> removed lately.
>
>
>

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-07 15:29:32

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:

> Usually, when preemption is disabled, the scheduler restrain from
> executing. *Now the important point*: the criterion that bounds the
> maximum amount of time before the scheduler will re-check for pending
> preemption is when preempt_enable() will re-activate preemption.

Which creates additional overhead in the allocator.

> But because you run preempt_enable with interrupts off, the scheduler
> check is not done. And it's not done when interrupts are re-activated
> neither.

Ok so we should be moving the preempt_enable after the irq enable. Then we
will call into the scheduler at the end of the slow path. This may add
significantly more overhead that we had before when we simply disabled
and enabled interrupts...

2009-10-07 15:20:20

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:
>
> > Usually, when preemption is disabled, the scheduler restrain from
> > executing. *Now the important point*: the criterion that bounds the
> > maximum amount of time before the scheduler will re-check for pending
> > preemption is when preempt_enable() will re-activate preemption.
>
> Which creates additional overhead in the allocator.
>

Which we like to keep as low as possible, I agree.

> > But because you run preempt_enable with interrupts off, the scheduler
> > check is not done. And it's not done when interrupts are re-activated
> > neither.
>
> Ok so we should be moving the preempt_enable after the irq enable. Then we
> will call into the scheduler at the end of the slow path. This may add
> significantly more overhead that we had before when we simply disabled
> and enabled interrupts...
>

You are already calling the scheduler when ending the _fast_ path. I
don't see the problem with calling it when you end the slow path
execution.

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-07 15:29:24

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:

> You are already calling the scheduler when ending the _fast_ path. I
> don't see the problem with calling it when you end the slow path
> execution.

Well yes that gives rise to the thought of using

preempt_enable_no_sched()

at the end of the fastpath as well. Constant calls into the scheduler
could be a major performance issue. I dont notice it here since I usually
cannot affort the preempt overhead and build kernels without support for
it.

2009-10-07 15:32:39

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Tue, 6 Oct 2009, Mathieu Desnoyers wrote:

> Btw, my name is "Math-ieu" ;) I'm not offended by you fat-fingering my
> name, it's just rather funny. :)

Sorry. But I got this right further down below...

2009-10-07 15:41:50

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:
>
> > You are already calling the scheduler when ending the _fast_ path. I
> > don't see the problem with calling it when you end the slow path
> > execution.
>
> Well yes that gives rise to the thought of using
>
> preempt_enable_no_sched()
>
> at the end of the fastpath as well. Constant calls into the scheduler
> could be a major performance issue

... but the opposite is a major RT behavior killer.

preempt_check_resched is basically:

a test TIF_NEED_RESCHED
if true, call to preempt_schedule

So, it's not a call to the scheduler at each and every preempt_enable.
It's _only_ if an interrupt came in during the preempt off section and
the scheduler considered that it should re-schedule soon.

I really don't see what's bothering you here. Testing a thread flag is
incredibly cheap. That's what is typically added to your fast path.

So, correct behavior would be:

preempt disable()
fast path attempt
if (fast path already taken) {
local_irq_save();
slow path.
local_irq_restore();
}
preempt_enable()


Mathieu

> . I dont notice it here since I usually
> cannot affort the preempt overhead and build kernels without support for
> it.
>
>

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-07 20:04:53

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:

> preempt_check_resched is basically:
>
> a test TIF_NEED_RESCHED
> if true, call to preempt_schedule

You did not mention the effect of incrementing the preempt counter and
the barrier(). Adds an additional cacheline to a very hot OS path.
Possibly register effects.

> I really don't see what's bothering you here. Testing a thread flag is
> incredibly cheap. That's what is typically added to your fast path.

I am trying to get rid off all unnecessary overhead. These "incredible
cheap" tricks en masse have caused lots of regressions. And the allocator
hotpaths are overloaded with these "incredibly cheap" checks alreayd.

> So, correct behavior would be:
>
> preempt disable()
> fast path attempt
> if (fast path already taken) {
> local_irq_save();
> slow path.
> local_irq_restore();
> }
> preempt_enable()

Ok. If you have to use preempt then you have to suffer I guess..

2009-10-07 17:13:21

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:
>
> > preempt_check_resched is basically:
> >
> > a test TIF_NEED_RESCHED
> > if true, call to preempt_schedule
>
> You did not mention the effect of incrementing the preempt counter and
> the barrier(). Adds an additional cacheline to a very hot OS path.
> Possibly register effects.
>

What you say applies to preempt_enable(). I was describing
preempt_check_resched above, which involves no compiler barrier nor
increment whatsoever.

By the way, the barrier() you are talking about is in
preempt_enable_no_resched(), the very primitive you are considering
using to save these precious cycles.


> > I really don't see what's bothering you here. Testing a thread flag is
> > incredibly cheap. That's what is typically added to your fast path.
>
> I am trying to get rid off all unnecessary overhead. These "incredible
> cheap" tricks en masse have caused lots of regressions. And the allocator
> hotpaths are overloaded with these "incredibly cheap" checks alreayd.
>
> > So, correct behavior would be:
> >
> > preempt disable()
> > fast path attempt
> > if (fast path already taken) {
> > local_irq_save();
> > slow path.
> > local_irq_restore();
> > }
> > preempt_enable()
>
> Ok. If you have to use preempt then you have to suffer I guess..
>

Yes. A user enabling full preemption should be aware that it has a
performance footprint.

By the way, from what I remember of the slub allocator, you might find
the following more suited for your needs. I remember that the slow path
sometimes need to reenable interrupts, so:

preempt disable()
fast path attempt
if (fast path already taken) {
local_irq_save();
preempt_enable_no_resched();
slow path {
if (!flags & GFP_ATOMIC) {
local_irq_enable();
preempt_check_resched();
...
local_irq_disable();
}
}
local_irq_restore();
preempt_check_resched();
return;
}
preempt_enable()

This should work, be efficient and manage to ensure scheduler RT
correctness.

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-08 07:50:28

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Wed, 2009-10-07 at 11:21 -0400, Christoph Lameter wrote:
> On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:
>
> > You are already calling the scheduler when ending the _fast_ path. I
> > don't see the problem with calling it when you end the slow path
> > execution.
>
> Well yes that gives rise to the thought of using
>
> preempt_enable_no_sched()

NACK, delaying the reschedule is not an option

2009-10-08 12:45:35

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Peter Zijlstra ([email protected]) wrote:
> On Wed, 2009-10-07 at 11:21 -0400, Christoph Lameter wrote:
> > On Wed, 7 Oct 2009, Mathieu Desnoyers wrote:
> >
> > > You are already calling the scheduler when ending the _fast_ path. I
> > > don't see the problem with calling it when you end the slow path
> > > execution.
> >
> > Well yes that gives rise to the thought of using
> >
> > preempt_enable_no_sched()
>
> NACK, delaying the reschedule is not an option

Even if only done with interrupt off, and check resched is called after
each irq enable following this critical section ? I'd like to understand
the reason behind your rejection for this specific case.

Thanks,

Mathieu

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-08 12:51:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Thu, 2009-10-08 at 08:44 -0400, Mathieu Desnoyers wrote:
> Even if only done with interrupt off, and check resched is called after
> each irq enable following this critical section ? I'd like to understand
> the reason behind your rejection for this specific case.

No, the thing you proposed:

> preempt disable()
> fast path attempt
> if (fast path already taken) {
> local_irq_save();
> preempt_enable_no_resched();
> slow path {
> if (!flags & GFP_ATOMIC) {
> local_irq_enable();
> preempt_check_resched();
> ...
> local_irq_disable();
> }
> }
> local_irq_restore();
> preempt_check_resched();
> return;
> }
> preempt_enable()

Seems ok.

I just don't get why Christoph is getting all upset about the
need_resched() check in preempt_enable(), its still cheaper than poking
at the interrupt flags.

2009-10-08 16:19:11

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Thu, 8 Oct 2009, Peter Zijlstra wrote:

> > preempt_enable_no_sched()
>
> NACK, delaying the reschedule is not an option

I ended up just doing a preempt_disable() at the beginning and a
preempt_disable() at the end. That should be easily reviewable.

2009-10-08 16:24:26

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Thu, 8 Oct 2009, Peter Zijlstra wrote:

> I just don't get why Christoph is getting all upset about the
> need_resched() check in preempt_enable(), its still cheaper than poking
> at the interrupt flags.

Preempt causes too many performance issues. I keep on thinking that I
could make it easier on people to use it. Guess I better leave it simple
then.


2009-10-08 17:18:30

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Thu, 8 Oct 2009, Peter Zijlstra wrote:
>
> > > preempt_enable_no_sched()
> >
> > NACK, delaying the reschedule is not an option
>
> I ended up just doing a preempt_disable() at the beginning and a
> preempt_disable() at the end. That should be easily reviewable.
>

Then how do you re-enable preemption in the slow path when you need to
block ?

Mathieu


--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-08 17:23:09

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Peter Zijlstra ([email protected]) wrote:
> On Thu, 2009-10-08 at 08:44 -0400, Mathieu Desnoyers wrote:
> > Even if only done with interrupt off, and check resched is called after
> > each irq enable following this critical section ? I'd like to understand
> > the reason behind your rejection for this specific case.
>
> No, the thing you proposed:
>
> > preempt disable()
> > fast path attempt
> > if (fast path already taken) {
> > local_irq_save();
> > preempt_enable_no_resched();
> > slow path {
> > if (!flags & GFP_ATOMIC) {
> > local_irq_enable();
> > preempt_check_resched();
> > ...
> > local_irq_disable();
> > }
> > }
> > local_irq_restore();
> > preempt_check_resched();
> > return;
> > }
> > preempt_enable()
>
> Seems ok.
>
> I just don't get why Christoph is getting all upset about the
> need_resched() check in preempt_enable(), its still cheaper than poking
> at the interrupt flags.

I agree with you. need_resched() check is incredibly cheap. And if
Christoph still complains about the compiler barrier in preempt
enable_no_resched/disable, then I think he should consider the fact that
the compiler does not perform cross-function optimizations, and consider
putting the preempt disable/enable statements close to function
boundaries. Therefore, the impact in terms of compiler optimization
restrictions should be minimal.

The scheme I proposed above should be OK in terms of scheduler effect
and permit to deal with re-enabling preemption in the slow path
appropriately.

Mathieu


--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-08 17:52:04

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Thu, 8 Oct 2009, Mathieu Desnoyers wrote:

> Then how do you re-enable preemption in the slow path when you need to
> block ?

preempt_enable();

before the page allocator call and

preempt_disable();

afterwards.

2009-10-08 19:23:34

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Thu, 8 Oct 2009, Mathieu Desnoyers wrote:
>
> > Then how do you re-enable preemption in the slow path when you need to
> > block ?
>
> preempt_enable();
>
> before the page allocator call and
>
> preempt_disable();
>
> afterwards.
>

OK, sounds good,

Mathieu


--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-08 19:29:09

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Thu, 8 Oct 2009, Mathieu Desnoyers wrote:

> OK, sounds good,

Then here the full patch for review (vs. percpu-next):


From: Christoph Lameter <[email protected]>
Subject: SLUB: Experimental new fastpath w/o interrupt disable

This is a bit of a different tack on things than the last version provided
by Mathieu.

Instead of using a cmpxchg we keep a state variable in the per cpu structure
that is incremented when we enter the hot path. We can then detect that
a thread is in the fastpath and fall back to alternate allocation / free
technique that bypasses fastpath caching.

V1->V2:
- Use safe preempt_enable / disable.
- Enable preempt before calling into the page allocator
- Checkup on hotpath activity changes when returning from page allocator.
- Add barriers.

Todo:
- Verify that this is really safe
- Is this a benefit?

Cc: Mathieu Desnoyers <[email protected]>
Cc: Pekka Enberg <[email protected]>
Signed-off-by: Christoph Lameter <[email protected]>


---
include/linux/slub_def.h | 1
mm/slub.c | 112 +++++++++++++++++++++++++++++++++++++++--------
2 files changed, 96 insertions(+), 17 deletions(-)

Index: linux-2.6/include/linux/slub_def.h
===================================================================
--- linux-2.6.orig/include/linux/slub_def.h 2009-10-08 11:35:59.000000000 -0500
+++ linux-2.6/include/linux/slub_def.h 2009-10-08 11:35:59.000000000 -0500
@@ -38,6 +38,7 @@ struct kmem_cache_cpu {
void **freelist; /* Pointer to first free per cpu object */
struct page *page; /* The slab from which we are allocating */
int node; /* The node of the page (or -1 for debug) */
+ int active; /* Active fastpaths */
#ifdef CONFIG_SLUB_STATS
unsigned stat[NR_SLUB_STAT_ITEMS];
#endif
Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c 2009-10-08 11:35:59.000000000 -0500
+++ linux-2.6/mm/slub.c 2009-10-08 14:03:22.000000000 -0500
@@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
unsigned long addr)
{
void **object;
- struct page *page = __this_cpu_read(s->cpu_slab->page);
+ struct page *page;
+ unsigned long flags;
+ int hotpath;
+
+ local_irq_save(flags);
+ hotpath = __this_cpu_read(s->cpu_slab->active) != 0;
+ __this_cpu_dec(s->cpu_slab->active); /* Drop count from hotpath */
+ page = __this_cpu_read(s->cpu_slab->page);

/* We handle __GFP_ZERO in the caller */
gfpflags &= ~__GFP_ZERO;
@@ -1627,12 +1634,24 @@ load_freelist:
if (unlikely(SLABDEBUG && PageSlubDebug(page)))
goto debug;

- __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
- __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
- page->inuse = page->objects;
- page->freelist = NULL;
+ if (unlikely(hotpath)) {
+ /* Object on page free list available and hotpath busy */
+ page->inuse++;
+ page->freelist = get_freepointer(s, object);
+
+ } else {
+
+ /* Prepare new list of objects for hotpath */
+ __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
+ page->inuse = page->objects;
+ page->freelist = NULL;
+ __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
+
+ }
+
unlock_out:
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, ALLOC_SLOWPATH);
return object;

@@ -1642,21 +1661,38 @@ another_slab:
new_slab:
page = get_partial(s, gfpflags, node);
if (page) {
- __this_cpu_write(s->cpu_slab->page, page);
stat(s, ALLOC_FROM_PARTIAL);
+
+ if (hotpath)
+ goto hot_lock;
+
+ __this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}

if (gfpflags & __GFP_WAIT)
local_irq_enable();

+ preempt_enable();
page = new_slab(s, gfpflags, node);
+ preempt_disable();
+
+ /*
+ * We have already decremented our count. Someone else
+ * could be running right now or we were moved to a
+ * processor that is in the hotpath. So check against -1.
+ */
+ hotpath = __this_cpu_read(s->cpu_slab->active) != -1;

if (gfpflags & __GFP_WAIT)
local_irq_disable();

if (page) {
stat(s, ALLOC_SLAB);
+
+ if (hotpath)
+ goto hot_no_lock;
+
if (__this_cpu_read(s->cpu_slab->page))
flush_slab(s, __this_cpu_ptr(s->cpu_slab));
slab_lock(page);
@@ -1664,9 +1700,13 @@ new_slab:
__this_cpu_write(s->cpu_slab->page, page);
goto load_freelist;
}
+
+ local_irq_restore(flags);
+
if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
slab_out_of_memory(s, gfpflags, node);
return NULL;
+
debug:
if (!alloc_debug_processing(s, page, object, addr))
goto another_slab;
@@ -1675,6 +1715,20 @@ debug:
page->freelist = get_freepointer(s, object);
__this_cpu_write(s->cpu_slab->node, -1);
goto unlock_out;
+
+ /*
+ * Hotpath is busy and we need to avoid touching
+ * hotpath variables
+ */
+hot_no_lock:
+ slab_lock(page);
+
+hot_lock:
+ __ClearPageSlubFrozen(page);
+ if (get_freepointer(s, page->freelist))
+ /* Cannot put page into the hotpath. Instead back to partial */
+ add_partial(get_node(s, page_to_nid(page)), page, 0);
+ goto load_freelist;
}

/*
@@ -1691,7 +1745,6 @@ static __always_inline void *slab_alloc(
gfp_t gfpflags, int node, unsigned long addr)
{
void **object;
- unsigned long flags;

gfpflags &= gfp_allowed_mask;

@@ -1701,18 +1754,27 @@ static __always_inline void *slab_alloc(
if (should_failslab(s->objsize, gfpflags))
return NULL;

- local_irq_save(flags);
+ preempt_disable();
+
+ irqsafe_cpu_inc(s->cpu_slab->active);
+ barrier();
object = __this_cpu_read(s->cpu_slab->freelist);
- if (unlikely(!object || !node_match(s, node)))
+ if (unlikely(!object || !node_match(s, node) ||
+ __this_cpu_read(s->cpu_slab->active)))

object = __slab_alloc(s, gfpflags, node, addr);

else {
+
__this_cpu_write(s->cpu_slab->freelist,
get_freepointer(s, object));
+ barrier();
+ irqsafe_cpu_dec(s->cpu_slab->active);
stat(s, ALLOC_FASTPATH);
+
}
- local_irq_restore(flags);
+
+ preempt_enable();

if (unlikely((gfpflags & __GFP_ZERO) && object))
memset(object, 0, s->objsize);
@@ -1777,7 +1839,9 @@ static void __slab_free(struct kmem_cach
{
void *prior;
void **object = (void *)x;
+ unsigned long flags;

+ local_irq_save(flags);
stat(s, FREE_SLOWPATH);
slab_lock(page);

@@ -1809,6 +1873,7 @@ checks_ok:

out_unlock:
slab_unlock(page);
+ local_irq_restore(flags);
return;

slab_empty:
@@ -1820,6 +1885,7 @@ slab_empty:
stat(s, FREE_REMOVE_PARTIAL);
}
slab_unlock(page);
+ local_irq_restore(flags);
stat(s, FREE_SLAB);
discard_slab(s, page);
return;
@@ -1845,24 +1911,31 @@ static __always_inline void slab_free(st
struct page *page, void *x, unsigned long addr)
{
void **object = (void *)x;
- unsigned long flags;

kmemleak_free_recursive(x, s->flags);
- local_irq_save(flags);
+ preempt_disable();
kmemcheck_slab_free(s, object, s->objsize);
debug_check_no_locks_freed(object, s->objsize);
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(object, s->objsize);

+ irqsafe_cpu_inc(s->cpu_slab->active);
+ barrier();
if (likely(page == __this_cpu_read(s->cpu_slab->page) &&
- __this_cpu_read(s->cpu_slab->node) >= 0)) {
- set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist));
+ __this_cpu_read(s->cpu_slab->node) >= 0) &&
+ !__this_cpu_read(s->cpu_slab->active)) {
+ set_freepointer(s, object,
+ __this_cpu_read(s->cpu_slab->freelist));
__this_cpu_write(s->cpu_slab->freelist, object);
+ barrier();
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
stat(s, FREE_FASTPATH);
- } else
+ } else {
+ irqsafe_cpu_dec(s->cpu_slab->active);
+ preempt_enable();
__slab_free(s, page, x, addr);
-
- local_irq_restore(flags);
+ }
}

void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2064,6 +2137,8 @@ static DEFINE_PER_CPU(struct kmem_cache_

static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
{
+ int cpu;
+
if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
/*
* Boot time creation of the kmalloc array. Use static per cpu data
@@ -2073,6 +2148,9 @@ static inline int alloc_kmem_cache_cpus(
else
s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);

+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(s->cpu_slab, cpu)->active = -1;
+
if (!s->cpu_slab)
return 0;

2009-10-08 20:38:12

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Thu, 8 Oct 2009, Mathieu Desnoyers wrote:
>
> > OK, sounds good,
>
> Then here the full patch for review (vs. percpu-next):
>
>
> From: Christoph Lameter <[email protected]>
> Subject: SLUB: Experimental new fastpath w/o interrupt disable
>
> This is a bit of a different tack on things than the last version provided
> by Mathieu.
>
> Instead of using a cmpxchg we keep a state variable in the per cpu structure
> that is incremented when we enter the hot path. We can then detect that
> a thread is in the fastpath and fall back to alternate allocation / free
> technique that bypasses fastpath caching.
>
> V1->V2:
> - Use safe preempt_enable / disable.
> - Enable preempt before calling into the page allocator
> - Checkup on hotpath activity changes when returning from page allocator.
> - Add barriers.
>
> Todo:
> - Verify that this is really safe
> - Is this a benefit?
>
> Cc: Mathieu Desnoyers <[email protected]>
> Cc: Pekka Enberg <[email protected]>
> Signed-off-by: Christoph Lameter <[email protected]>
>
>
> ---
> include/linux/slub_def.h | 1
> mm/slub.c | 112 +++++++++++++++++++++++++++++++++++++++--------
> 2 files changed, 96 insertions(+), 17 deletions(-)
>
> Index: linux-2.6/include/linux/slub_def.h
> ===================================================================
> --- linux-2.6.orig/include/linux/slub_def.h 2009-10-08 11:35:59.000000000 -0500
> +++ linux-2.6/include/linux/slub_def.h 2009-10-08 11:35:59.000000000 -0500
> @@ -38,6 +38,7 @@ struct kmem_cache_cpu {
> void **freelist; /* Pointer to first free per cpu object */
> struct page *page; /* The slab from which we are allocating */
> int node; /* The node of the page (or -1 for debug) */
> + int active; /* Active fastpaths */
> #ifdef CONFIG_SLUB_STATS
> unsigned stat[NR_SLUB_STAT_ITEMS];
> #endif
> Index: linux-2.6/mm/slub.c
> ===================================================================
> --- linux-2.6.orig/mm/slub.c 2009-10-08 11:35:59.000000000 -0500
> +++ linux-2.6/mm/slub.c 2009-10-08 14:03:22.000000000 -0500
> @@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
> unsigned long addr)
> {
> void **object;
> - struct page *page = __this_cpu_read(s->cpu_slab->page);
> + struct page *page;
> + unsigned long flags;
> + int hotpath;
> +
> + local_irq_save(flags);

(Recommend adding)

preempt_enable_no_resched();


The preempt enable right in the middle of a big function is adding an
unnecessary barrier(), which will restrain gcc from doing its
optimizations. This might hurt performances.

I still recommend the preempt_enable_no_resched() at the beginning of
__slab_alloc(), and simply putting a check_resched() here (which saves
us the odd compiler barrier in the middle of function).


> + hotpath = __this_cpu_read(s->cpu_slab->active) != 0;
> + __this_cpu_dec(s->cpu_slab->active); /* Drop count from hotpath */
> + page = __this_cpu_read(s->cpu_slab->page);
>
> /* We handle __GFP_ZERO in the caller */
> gfpflags &= ~__GFP_ZERO;
> @@ -1627,12 +1634,24 @@ load_freelist:
> if (unlikely(SLABDEBUG && PageSlubDebug(page)))
> goto debug;
>
> - __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
> - __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
> - page->inuse = page->objects;
> - page->freelist = NULL;
> + if (unlikely(hotpath)) {
> + /* Object on page free list available and hotpath busy */
> + page->inuse++;
> + page->freelist = get_freepointer(s, object);
> +
> + } else {
> +
> + /* Prepare new list of objects for hotpath */
> + __this_cpu_write(s->cpu_slab->freelist, get_freepointer(s, object));
> + page->inuse = page->objects;
> + page->freelist = NULL;
> + __this_cpu_write(s->cpu_slab->node, page_to_nid(page));
> +
> + }
> +
> unlock_out:
> slab_unlock(page);
> + local_irq_restore(flags);
> stat(s, ALLOC_SLOWPATH);
> return object;
>
> @@ -1642,21 +1661,38 @@ another_slab:
> new_slab:
> page = get_partial(s, gfpflags, node);
> if (page) {
> - __this_cpu_write(s->cpu_slab->page, page);
> stat(s, ALLOC_FROM_PARTIAL);
> +
> + if (hotpath)
> + goto hot_lock;
> +
> + __this_cpu_write(s->cpu_slab->page, page);
> goto load_freelist;
> }
>
> if (gfpflags & __GFP_WAIT)
> local_irq_enable();
>
> + preempt_enable();

We could replace the above by:

if (gfpflags & __GFP_WAIT) {
local_irq_enable();
preempt_check_resched();
}


> page = new_slab(s, gfpflags, node);
> + preempt_disable();
> +

(remove the above)


> + /*
> + * We have already decremented our count. Someone else
> + * could be running right now or we were moved to a
> + * processor that is in the hotpath. So check against -1.
> + */
> + hotpath = __this_cpu_read(s->cpu_slab->active) != -1;
>
> if (gfpflags & __GFP_WAIT)
> local_irq_disable();
>
> if (page) {
> stat(s, ALLOC_SLAB);
> +
> + if (hotpath)
> + goto hot_no_lock;
> +
> if (__this_cpu_read(s->cpu_slab->page))
> flush_slab(s, __this_cpu_ptr(s->cpu_slab));
> slab_lock(page);
> @@ -1664,9 +1700,13 @@ new_slab:
> __this_cpu_write(s->cpu_slab->page, page);
> goto load_freelist;
> }
> +
> + local_irq_restore(flags);
> +
> if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
> slab_out_of_memory(s, gfpflags, node);
> return NULL;
> +
> debug:
> if (!alloc_debug_processing(s, page, object, addr))
> goto another_slab;
> @@ -1675,6 +1715,20 @@ debug:
> page->freelist = get_freepointer(s, object);
> __this_cpu_write(s->cpu_slab->node, -1);
> goto unlock_out;
> +
> + /*
> + * Hotpath is busy and we need to avoid touching
> + * hotpath variables
> + */
> +hot_no_lock:
> + slab_lock(page);
> +
> +hot_lock:
> + __ClearPageSlubFrozen(page);
> + if (get_freepointer(s, page->freelist))
> + /* Cannot put page into the hotpath. Instead back to partial */
> + add_partial(get_node(s, page_to_nid(page)), page, 0);
> + goto load_freelist;
> }
>
> /*
> @@ -1691,7 +1745,6 @@ static __always_inline void *slab_alloc(
> gfp_t gfpflags, int node, unsigned long addr)
> {
> void **object;
> - unsigned long flags;
>
> gfpflags &= gfp_allowed_mask;
>
> @@ -1701,18 +1754,27 @@ static __always_inline void *slab_alloc(
> if (should_failslab(s->objsize, gfpflags))
> return NULL;
>
> - local_irq_save(flags);
> + preempt_disable();
> +
> + irqsafe_cpu_inc(s->cpu_slab->active);
> + barrier();
> object = __this_cpu_read(s->cpu_slab->freelist);
> - if (unlikely(!object || !node_match(s, node)))
> + if (unlikely(!object || !node_match(s, node) ||
> + __this_cpu_read(s->cpu_slab->active)))

Missing a barrier() here ?

The idea is to let gcc know that "active" inc/dec and "freelist" reads
must never be reordered. Even when the decrement is done in the slow
path branch.

>
> object = __slab_alloc(s, gfpflags, node, addr);
>
> else {
> +
> __this_cpu_write(s->cpu_slab->freelist,
> get_freepointer(s, object));
> + barrier();
> + irqsafe_cpu_dec(s->cpu_slab->active);
> stat(s, ALLOC_FASTPATH);
> +
> }
> - local_irq_restore(flags);
> +
> + preempt_enable();

Could move the preempt_enable() above to the else (fast path) branch.

>
> if (unlikely((gfpflags & __GFP_ZERO) && object))
> memset(object, 0, s->objsize);
> @@ -1777,7 +1839,9 @@ static void __slab_free(struct kmem_cach
> {
> void *prior;
> void **object = (void *)x;
> + unsigned long flags;
>
> + local_irq_save(flags);
> stat(s, FREE_SLOWPATH);
> slab_lock(page);
>
> @@ -1809,6 +1873,7 @@ checks_ok:
>
> out_unlock:
> slab_unlock(page);
> + local_irq_restore(flags);
> return;
>
> slab_empty:
> @@ -1820,6 +1885,7 @@ slab_empty:
> stat(s, FREE_REMOVE_PARTIAL);
> }
> slab_unlock(page);
> + local_irq_restore(flags);
> stat(s, FREE_SLAB);
> discard_slab(s, page);
> return;
> @@ -1845,24 +1911,31 @@ static __always_inline void slab_free(st
> struct page *page, void *x, unsigned long addr)
> {
> void **object = (void *)x;
> - unsigned long flags;
>
> kmemleak_free_recursive(x, s->flags);
> - local_irq_save(flags);
> + preempt_disable();
> kmemcheck_slab_free(s, object, s->objsize);
> debug_check_no_locks_freed(object, s->objsize);
> if (!(s->flags & SLAB_DEBUG_OBJECTS))
> debug_check_no_obj_freed(object, s->objsize);
>
> + irqsafe_cpu_inc(s->cpu_slab->active);
> + barrier();
> if (likely(page == __this_cpu_read(s->cpu_slab->page) &&
> - __this_cpu_read(s->cpu_slab->node) >= 0)) {
> - set_freepointer(s, object, __this_cpu_read(s->cpu_slab->freelist));
> + __this_cpu_read(s->cpu_slab->node) >= 0) &&
> + !__this_cpu_read(s->cpu_slab->active)) {
> + set_freepointer(s, object,
> + __this_cpu_read(s->cpu_slab->freelist));
> __this_cpu_write(s->cpu_slab->freelist, object);
> + barrier();
> + irqsafe_cpu_dec(s->cpu_slab->active);
> + preempt_enable();
> stat(s, FREE_FASTPATH);
> - } else
> + } else {

Perhaps missing a barrier() in the else ?

Thanks,

Mathieu

> + irqsafe_cpu_dec(s->cpu_slab->active);
> + preempt_enable();
> __slab_free(s, page, x, addr);
> -
> - local_irq_restore(flags);
> + }
> }
>
> void kmem_cache_free(struct kmem_cache *s, void *x)
> @@ -2064,6 +2137,8 @@ static DEFINE_PER_CPU(struct kmem_cache_
>
> static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
> {
> + int cpu;
> +
> if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
> /*
> * Boot time creation of the kmalloc array. Use static per cpu data
> @@ -2073,6 +2148,9 @@ static inline int alloc_kmem_cache_cpus(
> else
> s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
>
> + for_each_possible_cpu(cpu)
> + per_cpu_ptr(s->cpu_slab, cpu)->active = -1;
> +
> if (!s->cpu_slab)
> return 0;
>

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-08 21:16:00

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Thu, 8 Oct 2009, Mathieu Desnoyers wrote:

> > Index: linux-2.6/mm/slub.c
> > ===================================================================
> > --- linux-2.6.orig/mm/slub.c 2009-10-08 11:35:59.000000000 -0500
> > +++ linux-2.6/mm/slub.c 2009-10-08 14:03:22.000000000 -0500
> > @@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
> > unsigned long addr)
> > {
> > void **object;
> > - struct page *page = __this_cpu_read(s->cpu_slab->page);
> > + struct page *page;
> > + unsigned long flags;
> > + int hotpath;
> > +
> > + local_irq_save(flags);
>
> (Recommend adding)
>
> preempt_enable_no_resched();
>
>
> The preempt enable right in the middle of a big function is adding an
> unnecessary barrier(), which will restrain gcc from doing its
> optimizations. This might hurt performances.

In the middle of the function we have determine that we have to go to the
page allocator to get more memory. There is not much the compiler can do
to speed that up.

> I still recommend the preempt_enable_no_resched() at the beginning of
> __slab_alloc(), and simply putting a check_resched() here (which saves
> us the odd compiler barrier in the middle of function).

Then preemption would be unnecessarily disabled for the page allocator
call?

> > if (gfpflags & __GFP_WAIT)
> > local_irq_enable();
> >
> > + preempt_enable();
>
> We could replace the above by:
>
> if (gfpflags & __GFP_WAIT) {
> local_irq_enable();
> preempt_check_resched();
> }

Which would leave preempt off for the page allocator.

> > + irqsafe_cpu_inc(s->cpu_slab->active);
> > + barrier();
> > object = __this_cpu_read(s->cpu_slab->freelist);
> > - if (unlikely(!object || !node_match(s, node)))
> > + if (unlikely(!object || !node_match(s, node) ||
> > + __this_cpu_read(s->cpu_slab->active)))
>
> Missing a barrier() here ?

The modifications of the s->cpu_slab->freelist in __slab_alloc() are only
done after interrupts have been disabled and after the slab has been locked.

> The idea is to let gcc know that "active" inc/dec and "freelist" reads
> must never be reordered. Even when the decrement is done in the slow
> path branch.

Right. How could that occur with this code?

> > + preempt_enable();
> > stat(s, FREE_FASTPATH);
> > - } else
> > + } else {
>
> Perhaps missing a barrier() in the else ?

Not sure why that would be necessary. __slab_free() does not even touch
s->cpu_slab->freelist if you have the same reasons as in the alloc path.

2009-10-12 13:56:51

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Thu, 8 Oct 2009, Mathieu Desnoyers wrote:
>
> > > Index: linux-2.6/mm/slub.c
> > > ===================================================================
> > > --- linux-2.6.orig/mm/slub.c 2009-10-08 11:35:59.000000000 -0500
> > > +++ linux-2.6/mm/slub.c 2009-10-08 14:03:22.000000000 -0500
> > > @@ -1606,7 +1606,14 @@ static void *__slab_alloc(struct kmem_ca
> > > unsigned long addr)
> > > {
> > > void **object;
> > > - struct page *page = __this_cpu_read(s->cpu_slab->page);
> > > + struct page *page;
> > > + unsigned long flags;
> > > + int hotpath;
> > > +
> > > + local_irq_save(flags);
> >
> > (Recommend adding)
> >
> > preempt_enable_no_resched();
> >
> >
> > The preempt enable right in the middle of a big function is adding an
> > unnecessary barrier(), which will restrain gcc from doing its
> > optimizations. This might hurt performances.
>
> In the middle of the function we have determine that we have to go to the
> page allocator to get more memory. There is not much the compiler can do
> to speed that up.

Indeed, the compiler cannot do much about it. However, the programer
(you) can move the preempt_enable_no_resched() part of the
preempt_enable() to the beginning of the function.

>
> > I still recommend the preempt_enable_no_resched() at the beginning of
> > __slab_alloc(), and simply putting a check_resched() here (which saves
> > us the odd compiler barrier in the middle of function).
>
> Then preemption would be unnecessarily disabled for the page allocator
> call?

No ?
preempt_enable_no_resched() enables preemption.

>
> > > if (gfpflags & __GFP_WAIT)
> > > local_irq_enable();
> > >
> > > + preempt_enable();
> >
> > We could replace the above by:
> >
> > if (gfpflags & __GFP_WAIT) {
> > local_irq_enable();
> > preempt_check_resched();
> > }
>
> Which would leave preempt off for the page allocator.

Not if you do preempt_enable_no_resched() at the beginnig of the
function, after disabling interrupts.

>
> > > + irqsafe_cpu_inc(s->cpu_slab->active);
> > > + barrier();
> > > object = __this_cpu_read(s->cpu_slab->freelist);
> > > - if (unlikely(!object || !node_match(s, node)))
> > > + if (unlikely(!object || !node_match(s, node) ||
> > > + __this_cpu_read(s->cpu_slab->active)))
> >
> > Missing a barrier() here ?
>
> The modifications of the s->cpu_slab->freelist in __slab_alloc() are only
> done after interrupts have been disabled and after the slab has been locked.

I was concerned about a potential race between
cpu_slab->active/cpu_slab->freelist if an interrupt came in. I
understand that as soon as you get a hint that you must hit the slow
path, you don't care about the order in which these operations have been
done.

>
> > The idea is to let gcc know that "active" inc/dec and "freelist" reads
> > must never be reordered. Even when the decrement is done in the slow
> > path branch.
>
> Right. How could that occur with this code?
>

__slab_alloc calls __this_cpu_dec(s->cpu_slab->active); without any
compiler barrier. But I get that when __slab_alloc is executed, we don't
care about "active" dec to be reordered, because we're not altering fast
path data anymore.

> > > + preempt_enable();
> > > stat(s, FREE_FASTPATH);
> > > - } else
> > > + } else {
> >
> > Perhaps missing a barrier() in the else ?
>
> Not sure why that would be necessary. __slab_free() does not even touch
> s->cpu_slab->freelist if you have the same reasons as in the alloc path.

My intent was to order __this_cpu_read(s->cpu_slab->page) and
irqsafe_cpu_dec(s->cpu_slab->active), but I get that if you run the slow
path, you don't care about some spilling of the slow path over the slab
active critical section.

Thanks,

Mathieu

>
>

--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-12 15:00:07

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Mon, 12 Oct 2009, Mathieu Desnoyers wrote:

> > In the middle of the function we have determine that we have to go to the
> > page allocator to get more memory. There is not much the compiler can do
> > to speed that up.
>
> Indeed, the compiler cannot do much about it. However, the programer
> (you) can move the preempt_enable_no_resched() part of the
> preempt_enable() to the beginning of the function.

Ok but then we have the issue that the later irq enable in the
slowpath will not check for preemption.

> > > I still recommend the preempt_enable_no_resched() at the beginning of
> > > __slab_alloc(), and simply putting a check_resched() here (which saves
> > > us the odd compiler barrier in the middle of function).
> >
> > Then preemption would be unnecessarily disabled for the page allocator
> > call?
>
> No ?
> preempt_enable_no_resched() enables preemption.

If I just enable interrupts there then the preempt check will not be
done and we may miss a scheduling point.

2009-10-12 15:31:11

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Mon, 12 Oct 2009, Mathieu Desnoyers wrote:

> > If I just enable interrupts there then the preempt check will not be
> > done and we may miss a scheduling point.
> >
>
> That's why you should do:
>
> local_irq_save()
> preempt_enable_no_resched()
> ...
> local_irq_restore()
> preempt_check_resched()

What is the difference then to

local_irq_save()

...

local_irq_enable();
preempt_enable();

?

2009-10-12 15:27:07

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Mon, 12 Oct 2009, Mathieu Desnoyers wrote:
>
> > > In the middle of the function we have determine that we have to go to the
> > > page allocator to get more memory. There is not much the compiler can do
> > > to speed that up.
> >
> > Indeed, the compiler cannot do much about it. However, the programer
> > (you) can move the preempt_enable_no_resched() part of the
> > preempt_enable() to the beginning of the function.
>
> Ok but then we have the issue that the later irq enable in the
> slowpath will not check for preemption.
>
> > > > I still recommend the preempt_enable_no_resched() at the beginning of
> > > > __slab_alloc(), and simply putting a check_resched() here (which saves
> > > > us the odd compiler barrier in the middle of function).
> > >
> > > Then preemption would be unnecessarily disabled for the page allocator
> > > call?
> >
> > No ?
> > preempt_enable_no_resched() enables preemption.
>
> If I just enable interrupts there then the preempt check will not be
> done and we may miss a scheduling point.
>

That's why you should do:

local_irq_save()
preempt_enable_no_resched()
...
local_irq_restore()
preempt_check_resched()

Mathieu



--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-12 15:46:11

by Christoph Lameter

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

On Mon, 12 Oct 2009, Mathieu Desnoyers wrote:

> local_irq_enable();
> preempt_enable(); <- barrier()
>
> In the first scenario, the compiler barrier is at the beginning of the
> slow path function, which should impose less restrictions on the compiler
> optimizations.

Again: The next statement is a call to a function that calls the page
allocator. Its one call vs two for me. Plus the flow of things is much
cleaner and less obfuscated.


2009-10-12 15:39:27

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Mon, 12 Oct 2009, Mathieu Desnoyers wrote:
>
> > > If I just enable interrupts there then the preempt check will not be
> > > done and we may miss a scheduling point.
> > >
> >
> > That's why you should do:
> >
> > local_irq_save()
> > preempt_enable_no_resched()
> > ...
> > local_irq_restore()
> > preempt_check_resched()
>
> What is the difference then to
>
> local_irq_save()
>
> ...
>
> local_irq_enable();
> preempt_enable();
>
> ?

local_irq_save()
preempt_enable_no_resched() <- barrier()
...
local_irq_enable()
preempt_check_resched()

vs

local_irq_save()
...
local_irq_enable();
preempt_enable(); <- barrier()

In the first scenario, the compiler barrier is at the beginning of the
slow path function, which should impose less restrictions on the compiler
optimizations.

Mathieu


--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68

2009-10-12 16:05:42

by Mathieu Desnoyers

[permalink] [raw]
Subject: Re: [this_cpu_xx V5 19/19] SLUB: Experimental new fastpath w/o interrupt disable

* Christoph Lameter ([email protected]) wrote:
> On Mon, 12 Oct 2009, Mathieu Desnoyers wrote:
>
> > local_irq_enable();
> > preempt_enable(); <- barrier()
> >
> > In the first scenario, the compiler barrier is at the beginning of the
> > slow path function, which should impose less restrictions on the compiler
> > optimizations.
>
> Again: The next statement is a call to a function that calls the page
> allocator. Its one call vs two for me. Plus the flow of things is much
> cleaner and less obfuscated.
>

That's a convincing argument, I agree.

Thanks,

Mathieu


--
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68