2009-01-23 08:43:30

by Pekka Enberg

[permalink] [raw]
Subject: [PATCH] SLUB: revert direct page allocator pass through

From: Pekka Enberg <[email protected]>

This patch reverts page allocator pass-through logic from the SLUB allocator.

Commit aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 ("SLUB: direct pass through of
page size or higher kmalloc requests") added page allocator pass-through to the
SLUB allocator for large sized allocations. This, however, results in a
performance regression compared to SLAB in the netperf UDP-U-4k test.

The regression comes from the kfree(skb->head) call in skb_release_data() that
is subject to page allocator pass-through as the size passed to __alloc_skb()
is larger than 4 KB in this test. With this patch, the performance regression
is almost closed:

<insert numbers here>

Reported-by: "Zhang, Yanmin" <[email protected]>
Tested-by: "Zhang, Yanmin" <[email protected]>
Signed-off-by: Pekka Enberg <[email protected]>
---
Yanmin, do you still have the relevant numbers I could cut and paste to
the patch description?

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b..3bd3662 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -124,7 +124,7 @@ struct kmem_cache {
* We keep the general caches in an array of slab caches that are used for
* 2^x bytes of allocations.
*/
-extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1];
+extern struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];

/*
* Sorry that the following has to be that ugly but some versions of GCC
@@ -135,6 +135,9 @@ static __always_inline int kmalloc_index(size_t size)
if (!size)
return 0;

+ if (size > KMALLOC_MAX_SIZE)
+ return -1;
+
if (size <= KMALLOC_MIN_SIZE)
return KMALLOC_SHIFT_LOW;

@@ -154,10 +157,6 @@ static __always_inline int kmalloc_index(size_t size)
if (size <= 1024) return 10;
if (size <= 2 * 1024) return 11;
if (size <= 4 * 1024) return 12;
-/*
- * The following is only needed to support architectures with a larger page
- * size than 4k.
- */
if (size <= 8 * 1024) return 13;
if (size <= 16 * 1024) return 14;
if (size <= 32 * 1024) return 15;
@@ -167,6 +166,10 @@ static __always_inline int kmalloc_index(size_t size)
if (size <= 512 * 1024) return 19;
if (size <= 1024 * 1024) return 20;
if (size <= 2 * 1024 * 1024) return 21;
+ if (size <= 4 * 1024 * 1024) return 22;
+ if (size <= 8 * 1024 * 1024) return 23;
+ if (size <= 16 * 1024 * 1024) return 24;
+ if (size <= 32 * 1024 * 1024) return 25;
return -1;

/*
@@ -191,6 +194,19 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
if (index == 0)
return NULL;

+ /*
+ * This function only gets expanded if __builtin_constant_p(size), so
+ * testing it here shouldn't be needed. But some versions of gcc need
+ * help.
+ */
+ if (__builtin_constant_p(size) && index < 0) {
+ /*
+ * Generate a link failure. Would be great if we could
+ * do something to stop the compile here.
+ */
+ extern void __kmalloc_size_too_large(void);
+ __kmalloc_size_too_large();
+ }
return &kmalloc_caches[index];
}

@@ -204,17 +220,9 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);

-static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
-{
- return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
-}
-
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
if (__builtin_constant_p(size)) {
- if (size > PAGE_SIZE)
- return kmalloc_large(size, flags);
-
if (!(flags & SLUB_DMA)) {
struct kmem_cache *s = kmalloc_slab(size);

diff --git a/mm/slub.c b/mm/slub.c
index 6392ae5..8fad23f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2475,7 +2475,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
* Kmalloc subsystem
*******************************************************************/

-struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __cacheline_aligned;
EXPORT_SYMBOL(kmalloc_caches);

static int __init setup_slub_min_order(char *str)
@@ -2537,7 +2537,7 @@ panic:
}

#ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
+static struct kmem_cache *kmalloc_caches_dma[KMALLOC_SHIFT_HIGH + 1];

static void sysfs_add_func(struct work_struct *w)
{
@@ -2643,8 +2643,12 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
return ZERO_SIZE_PTR;

index = size_index[(size - 1) / 8];
- } else
+ } else {
+ if (size > KMALLOC_MAX_SIZE)
+ return NULL;
+
index = fls(size - 1);
+ }

#ifdef CONFIG_ZONE_DMA
if (unlikely((flags & SLUB_DMA)))
@@ -2658,9 +2662,6 @@ void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;

- if (unlikely(size > PAGE_SIZE))
- return kmalloc_large(size, flags);
-
s = get_slab(size, flags);

if (unlikely(ZERO_OR_NULL_PTR(s)))
@@ -2670,25 +2671,11 @@ void *__kmalloc(size_t size, gfp_t flags)
}
EXPORT_SYMBOL(__kmalloc);

-static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
-{
- struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
- get_order(size));
-
- if (page)
- return page_address(page);
- else
- return NULL;
-}
-
#ifdef CONFIG_NUMA
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
struct kmem_cache *s;

- if (unlikely(size > PAGE_SIZE))
- return kmalloc_large_node(size, flags, node);
-
s = get_slab(size, flags);

if (unlikely(ZERO_OR_NULL_PTR(s)))
@@ -2746,11 +2733,8 @@ void kfree(const void *x)
return;

page = virt_to_head_page(x);
- if (unlikely(!PageSlab(page))) {
- BUG_ON(!PageCompound(page));
- put_page(page);
+ if (unlikely(WARN_ON(!PageSlab(page)))) /* XXX */
return;
- }
slab_free(page->slab, page, object, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
@@ -2985,7 +2969,7 @@ void __init kmem_cache_init(void)
caches++;
}

- for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
+ for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
create_kmalloc_cache(&kmalloc_caches[i],
"kmalloc", 1 << i, GFP_KERNEL);
caches++;
@@ -3022,7 +3006,7 @@ void __init kmem_cache_init(void)
slab_state = UP;

/* Provide the correct kmalloc names now that the caches are up */
- for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
+ for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
kmalloc_caches[i]. name =
kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);

@@ -3222,9 +3206,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;

- if (unlikely(size > PAGE_SIZE))
- return kmalloc_large(size, gfpflags);
-
s = get_slab(size, gfpflags);

if (unlikely(ZERO_OR_NULL_PTR(s)))
@@ -3238,9 +3219,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
{
struct kmem_cache *s;

- if (unlikely(size > PAGE_SIZE))
- return kmalloc_large_node(size, gfpflags, node);
-
s = get_slab(size, gfpflags);

if (unlikely(ZERO_OR_NULL_PTR(s)))


2009-01-23 08:53:27

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Friday 23 January 2009 19:43:20 Pekka J Enberg wrote:
> From: Pekka Enberg <[email protected]>
>
> This patch reverts page allocator pass-through logic from the SLUB
> allocator.
>
> Commit aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 ("SLUB: direct pass through
> of page size or higher kmalloc requests")

Hmm, it lists quite a number of advantages that I guess are being
reverted too? What was the test case(s) that prompted this commit
in the first place? Better ensure it doesn't slow down...

2009-01-23 09:04:42

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Friday 23 January 2009 19:43:20 Pekka J Enberg wrote:
> > From: Pekka Enberg <[email protected]>
> >
> > This patch reverts page allocator pass-through logic from the SLUB
> > allocator.
> >
> > Commit aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 ("SLUB: direct pass through
> > of page size or higher kmalloc requests")

On Fri, 2009-01-23 at 19:52 +1100, Nick Piggin wrote:
> Hmm, it lists quite a number of advantages that I guess are being
> reverted too? What was the test case(s) that prompted this commit
> in the first place? Better ensure it doesn't slow down...

Oh, I see I forgot to put an RFC in the patch subject line. I'm not
going to merge this just yet.

Christoph, suggestions on the test case?

2009-01-23 09:22:57

by Yanmin Zhang

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Fri, 2009-01-23 at 10:43 +0200, Pekka J Enberg wrote:
> From: Pekka Enberg <[email protected]>
>
> This patch reverts page allocator pass-through logic from the SLUB allocator.
>
> Commit aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 ("SLUB: direct pass through of
> page size or higher kmalloc requests") added page allocator pass-through to the
> SLUB allocator for large sized allocations. This, however, results in a
> performance regression compared to SLAB in the netperf UDP-U-4k test.
>
> The regression comes from the kfree(skb->head) call in skb_release_data() that
> is subject to page allocator pass-through as the size passed to __alloc_skb()
> is larger than 4 KB in this test. With this patch, the performance regression
> is almost closed:
>
> <insert numbers here>
>
> Reported-by: "Zhang, Yanmin" <[email protected]>
> Tested-by: "Zhang, Yanmin" <[email protected]>
> Signed-off-by: Pekka Enberg <[email protected]>
> ---
> Yanmin, do you still have the relevant numbers I could cut and paste to
> the patch description?
I use 2.6.29-rc2 kernel to run netperf UDP-U-4k CPU_NUM client/server pair loopback testing
on x86-64 machines. Comparing with SLUB, SLAB's result is about 2.3 times of SLUB's.
After applying the reverting patch, the result difference between SLUB and SLAB becomes 1%
which we might consider as fluctuation.

yanmin

2009-01-23 15:06:49

by Christoph Lameter

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Fri, 23 Jan 2009, Nick Piggin wrote:

> Hmm, it lists quite a number of advantages that I guess are being
> reverted too? What was the test case(s) that prompted this commit
> in the first place? Better ensure it doesn't slow down...

The advantage was mainly memory savings and abilty to redefined kmallocs
to go directly to the page allocator. Totally avoids slab allocator overhead.

I thought higher order allocations were not supposed to be used in
performance critical paths? Didnt you want to do everything with order-0
allocs?

It seems that we currently need the slab allocators to compensate for the
performance problems in the page allocator for these higher order allocs.
I'd rather have the page allocator fixed but things are as they are.


2009-01-23 15:13:32

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Saturday 24 January 2009 02:03:22 Christoph Lameter wrote:
> On Fri, 23 Jan 2009, Nick Piggin wrote:
> > Hmm, it lists quite a number of advantages that I guess are being
> > reverted too? What was the test case(s) that prompted this commit
> > in the first place? Better ensure it doesn't slow down...
>
> The advantage was mainly memory savings and abilty to redefined kmallocs
> to go directly to the page allocator. Totally avoids slab allocator
> overhead.

Well it sounds in the changelog like a massive performance advantage.
Unfortunately it doesn't really say what the workload was.


> I thought higher order allocations were not supposed to be used in
> performance critical paths?

? You use them all the time in SLUB alone.


> Didnt you want to do everything with order-0
> allocs?

People don't always do what I think is best, unfortunately ;)


> It seems that we currently need the slab allocators to compensate for the
> performance problems in the page allocator for these higher order allocs.
> I'd rather have the page allocator fixed but things are as they are.

No objections from me with speeding up the page allocator of course.

2009-01-23 15:16:37

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Fri, Jan 23, 2009 at 5:03 PM, Christoph Lameter
<[email protected]> wrote:
> It seems that we currently need the slab allocators to compensate for the
> performance problems in the page allocator for these higher order allocs.
> I'd rather have the page allocator fixed but things are as they are.

So, is that an ACK or a NAK for the patch?-)

2009-01-23 15:37:42

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Saturday 24 January 2009 02:27:29 Christoph Lameter wrote:
> On Sat, 24 Jan 2009, Nick Piggin wrote:
> > > I thought higher order allocations were not supposed to be used in
> > > performance critical paths?
> >
> > ? You use them all the time in SLUB alone.
>
> But SLUB compensates for the slow higher order allocs by using them as
> buffers for smaller objects.. The higher order allocs are in the slow
> paths.

I don't quite know what you're asking of me. If you see higher order
allocations in performance critical code, then you answer your own
question?

However, I don't know if netperf udp 4K over loopback is totally
realistic. Maybe real network drivers have different allocation patterns.
If I were you I wouldn't be too hasty to make big changes based on that
alone, if it could introduce regression in somewhere more important.

2009-01-23 15:41:40

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Saturday 24 January 2009 02:25:06 Christoph Lameter wrote:
> On Fri, 23 Jan 2009, Pekka Enberg wrote:
> > On Fri, Jan 23, 2009 at 5:03 PM, Christoph Lameter
> >
> > <[email protected]> wrote:
> > > It seems that we currently need the slab allocators to compensate for
> > > the performance problems in the page allocator for these higher order
> > > allocs. I'd rather have the page allocator fixed but things are as they
> > > are.
> >
> > So, is that an ACK or a NAK for the patch?-)
>
> Doesnt that break down Nick's order-0 dominates-the-world scheme break
> down?

SLUB is your scheme. And yes it definitely goes against my liking of
order-0 allocations regardless of this patch.


> One of the reasons for doing this was to get the page allocator
> performance issues exposed and addressed.

Page allocator is never going to be as fast as slab allocator, for
issues I explained a long time ago. Not to say it can't be improved,
just stating facts.

2009-01-23 15:42:31

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Fri, Jan 23, 2009 at 5:03 PM, Christoph Lameter
> > <[email protected]> wrote:
> > > It seems that we currently need the slab allocators to compensate for the
> > > performance problems in the page allocator for these higher order allocs.
> > > I'd rather have the page allocator fixed but things are as they are.

On Fri, 23 Jan 2009, Pekka Enberg wrote:
> > So, is that an ACK or a NAK for the patch?-)

On Fri, 2009-01-23 at 10:25 -0500, Christoph Lameter wrote:
> Doesnt that break down Nick's order-0 dominates-the-world scheme break
> down? One of the reasons for doing this was to get the page allocator
> performance issues exposed and addressed.
>
> Acked-by: Christoph Lameter <[email protected]>

Thanks. I'm putting this in linux-next now. Lets see what breaks.

Pekka

2009-01-23 15:45:14

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 2009-01-24 at 02:37 +1100, Nick Piggin wrote:
> However, I don't know if netperf udp 4K over loopback is totally
> realistic. Maybe real network drivers have different allocation patterns.
> If I were you I wouldn't be too hasty to make big changes based on that
> alone, if it could introduce regression in somewhere more important.

Yup, I'm putting it in linux-next to see if anything pops up. It's of
course possible that it will introduce regression somewhere but seeing
the hit SLUB is taking from the page allocator, I'm bound to think that
performance increase claims of commit
aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 ("SLUB: direct pass through of
page size or higher kmalloc requests") don't hold anymore.

Pekka

2009-01-23 15:54:36

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Saturday 24 January 2009 02:44:28 Pekka Enberg wrote:
> On Sat, 2009-01-24 at 02:37 +1100, Nick Piggin wrote:
> > However, I don't know if netperf udp 4K over loopback is totally
> > realistic. Maybe real network drivers have different allocation patterns.
> > If I were you I wouldn't be too hasty to make big changes based on that
> > alone, if it could introduce regression in somewhere more important.
>
> Yup, I'm putting it in linux-next to see if anything pops up. It's of
> course possible that it will introduce regression somewhere but seeing
> the hit SLUB is taking from the page allocator, I'm bound to think that
> performance increase claims of commit
> aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 ("SLUB: direct pass through of
> page size or higher kmalloc requests") don't hold anymore.

Well what workload were they relating to? That could easily be retested.

2009-01-23 15:57:22

by Christoph Lameter

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 24 Jan 2009, Nick Piggin wrote:

> > I thought higher order allocations were not supposed to be used in
> > performance critical paths?
>
> ? You use them all the time in SLUB alone.

But SLUB compensates for the slow higher order allocs by using them as
buffers for smaller objects.. The higher order allocs are in the slow
paths.

> > It seems that we currently need the slab allocators to compensate for the
> > performance problems in the page allocator for these higher order allocs.
> > I'd rather have the page allocator fixed but things are as they are.
>
> No objections from me with speeding up the page allocator of course.

At least something we agree on. Gotta go. Next meeting and I still have
not gotten through my email. Sigh.

2009-01-23 15:57:35

by Christoph Lameter

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Fri, 23 Jan 2009, Pekka Enberg wrote:

> On Fri, Jan 23, 2009 at 5:03 PM, Christoph Lameter
> <[email protected]> wrote:
> > It seems that we currently need the slab allocators to compensate for the
> > performance problems in the page allocator for these higher order allocs.
> > I'd rather have the page allocator fixed but things are as they are.
>
> So, is that an ACK or a NAK for the patch?-)

Doesnt that break down Nick's order-0 dominates-the-world scheme break
down? One of the reasons for doing this was to get the page allocator
performance issues exposed and addressed.

Acked-by: Christoph Lameter <[email protected]>

2009-01-23 16:00:47

by Christoph Lameter

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 24 Jan 2009, Nick Piggin wrote:

> However, I don't know if netperf udp 4K over loopback is totally
> realistic. Maybe real network drivers have different allocation patterns.
> If I were you I wouldn't be too hasty to make big changes based on that
> alone, if it could introduce regression in somewhere more important.

AFAICT it just wastes memory. My idea of a slab allocator is that it is
used for small (less than page sized) allocations. The page allocator
should be able to handle page sized allocations in an effective manner.

2009-01-23 16:02:52

by Christoph Lameter

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 24 Jan 2009, Nick Piggin wrote:

> Page allocator is never going to be as fast as slab allocator, for
> issues I explained a long time ago. Not to say it can't be improved,
> just stating facts.

Why not? Remember the discussion we had a while ago. You can bring the
pages into a state where minimal manipulations are required for alloc free
and avoid all the checks in the hot paths. The SLUB method could be used
taking a big contiguous chunk and then issueing page size portions of it.
That could be quite fast.

Or if you prefer order-0. Do a single linked list like SLQB does.

2009-01-23 16:03:40

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 24 Jan 2009, Nick Piggin wrote:
> > However, I don't know if netperf udp 4K over loopback is totally
> > realistic. Maybe real network drivers have different allocation patterns.
> > If I were you I wouldn't be too hasty to make big changes based on that
> > alone, if it could introduce regression in somewhere more important.

On Fri, 2009-01-23 at 10:57 -0500, Christoph Lameter wrote:
> AFAICT it just wastes memory. My idea of a slab allocator is that it is
> used for small (less than page sized) allocations. The page allocator
> should be able to handle page sized allocations in an effective manner.

FWIW, based on my results, it's actually the big objects that are so
badly fitting which causes much of the wasted memory. So I'd prefer to
get rid of the page allocator completely for kmalloc() and just use a
best fit allocator for everything. Something like my binalloc. ;-)

Pekka

2009-01-23 16:07:28

by Pekka Enberg

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 2009-01-24 at 02:37 +1100, Nick Piggin wrote:
> > > However, I don't know if netperf udp 4K over loopback is totally
> > > realistic. Maybe real network drivers have different allocation patterns.
> > > If I were you I wouldn't be too hasty to make big changes based on that
> > > alone, if it could introduce regression in somewhere more important.

On Saturday 24 January 2009 02:44:28 Pekka Enberg wrote:
> > Yup, I'm putting it in linux-next to see if anything pops up. It's of
> > course possible that it will introduce regression somewhere but seeing
> > the hit SLUB is taking from the page allocator, I'm bound to think that
> > performance increase claims of commit
> > aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 ("SLUB: direct pass through of
> > page size or higher kmalloc requests") don't hold anymore.

On Sat, 2009-01-24 at 02:54 +1100, Nick Piggin wrote:
> Well what workload were they relating to? That could easily be retested.

No idea. I'm not sure I even reviewed that patch as it went in.

2009-01-23 16:17:57

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Saturday 24 January 2009 02:59:17 Christoph Lameter wrote:
> On Sat, 24 Jan 2009, Nick Piggin wrote:
> > Page allocator is never going to be as fast as slab allocator, for
> > issues I explained a long time ago. Not to say it can't be improved,
> > just stating facts.
>
> Why not? Remember the discussion we had a while ago. You can bring the
> pages into a state where minimal manipulations are required for alloc free
> and avoid all the checks in the hot paths. The SLUB method could be used
> taking a big contiguous chunk and then issueing page size portions of it.
> That could be quite fast.
>
> Or if you prefer order-0. Do a single linked list like SLQB does.

The fundamental issues I guess are that slab pages are kernel mapped, and
within a given slab, the zone and movability are irrelevant.

Other ones which could be changed but could introduce regressions are
watermarks, buddy merging, and struct page error checking and setup.

I brought all this up when it was discussed. Did you find any ways to
improve anything?

(I did make that patch to enable refcounting to be avoided FWIW, which
avoids a couple of atomic operations, but I don't think it brought
performance up too much, but I still intend to dust it off at some
point).

2009-01-23 20:15:58

by Matt Mackall

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Fri, 2009-01-23 at 10:03 -0500, Christoph Lameter wrote:
> On Fri, 23 Jan 2009, Nick Piggin wrote:
>
> > Hmm, it lists quite a number of advantages that I guess are being
> > reverted too? What was the test case(s) that prompted this commit
> > in the first place? Better ensure it doesn't slow down...
>
> The advantage was mainly memory savings and abilty to redefined kmallocs
> to go directly to the page allocator. Totally avoids slab allocator overhead.
>
> I thought higher order allocations were not supposed to be used in
> performance critical paths? Didnt you want to do everything with order-0
> allocs?
>
> It seems that we currently need the slab allocators to compensate for the
> performance problems in the page allocator for these higher order allocs.
> I'd rather have the page allocator fixed but things are as they are.

I still think we should experiment with changing the hierarchy:

Rename all the core get_free_page* functions to buddy_*

Make SL*B call into buddy_* with a default order of N (>=0)

Replace the old get_free_page* functions with simple wrappers that
call into SL*B for order <= N or buddy_* for order >= N

This tackles several problems at once:

- fragmentation of SL*B due to small pages
- poor performance of get_free_pages moderate orders
- poor cache-locality for get_free_pages

--
http://selenic.com : development and support for Mercurial and Linux

2009-01-24 03:12:13

by Yanmin Zhang

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 2009-01-24 at 02:37 +1100, Nick Piggin wrote:
> On Saturday 24 January 2009 02:27:29 Christoph Lameter wrote:
> > On Sat, 24 Jan 2009, Nick Piggin wrote:
> > > > I thought higher order allocations were not supposed to be used in
> > > > performance critical paths?
> > >
> > > ? You use them all the time in SLUB alone.
> >
> > But SLUB compensates for the slow higher order allocs by using them as
> > buffers for smaller objects.. The higher order allocs are in the slow
> > paths.
>
> I don't quite know what you're asking of me. If you see higher order
> allocations in performance critical code, then you answer your own
> question?
>
> However, I don't know if netperf udp 4K over loopback is totally
> realistic. Maybe real network drivers have different allocation patterns.
Client/server model and distributed systems are popular. To be flexible,
these applications mostly can be configured in one machine, or in a group of
machines. If in one machine, they are used to communicate with network loopback.
I once touched a big distributed telecom application which use UDP to communicate
between front-end and back-end. If they merge front-end and back-end to one machine
based on some customers' requirements to reduce cost, that will be an example.

> If I were you I wouldn't be too hasty to make big changes based on that
> alone, if it could introduce regression in somewhere more important.

2009-01-26 17:21:36

by Christoph Lameter

[permalink] [raw]
Subject: Re: [PATCH] SLUB: revert direct page allocator pass through

On Sat, 24 Jan 2009, Nick Piggin wrote:

> > Or if you prefer order-0. Do a single linked list like SLQB does.
>
> The fundamental issues I guess are that slab pages are kernel mapped, and
> within a given slab, the zone and movability are irrelevant.

Right. There are multiple subsystems that all have the same requirements.

> Other ones which could be changed but could introduce regressions are
> watermarks, buddy merging, and struct page error checking and setup.

Isnt it possible to defer that (queuing them (sigh)). A bitmap could be
used to avoid queuing and may even allow fully concurrent allocations
without locks. Use a counter to check watermarks once in a while.

Struct page error checking could only be enabled for debugging purposes.


> I brought all this up when it was discussed. Did you find any ways to
> improve anything?

I played around with some bitmap concepts having basically a bit vector
per MAX_ORDER page (plus giving a preferred order to each MAX_ORDER
chunk in order to avoid fragmenation) but I had to abandon that at the
time due to time constraints.

The bitmap / counter approach could get a way with testing/incrementing a
percpu counter and flipping a bit. One of the reasons that I am quite
pushy on the per cpu atomic ops vs. interrupts is that these would allow
a page allocator fastpath w/o disabling interrupts.

> (I did make that patch to enable refcounting to be avoided FWIW, which
> avoids a couple of atomic operations, but I don't think it brought
> performance up too much, but I still intend to dust it off at some
> point).

Well if the page stays with a refcount of one then we do not need to check
the refcount at all but just push it in an out of some queue / bitmap or
something.