2006-02-04 13:33:51

by Pekka Enberg

[permalink] [raw]
Subject: [RFT/PATCH] slab: consolidate allocation paths

Hi,

I don't have access to NUMA machine and would appreciate if someone
could give this patch a spin and let me know I didn't break anything.

Pekka

Subject: slab: consolidate allocation paths
From: Pekka Enberg <[email protected]>

This patch consolidates the UMA and NUMA memory allocation paths in the
slab allocator. This is accomplished by making the UMA-path look like
we are on NUMA but always allocating from the current node.

Signed-off-by: Pekka Enberg <[email protected]>
---

mm/slab.c | 104 +++++++++++++++++++++++++++++++++-----------------------------
1 file changed, 56 insertions(+), 48 deletions(-)

Index: 2.6-git/mm/slab.c
===================================================================
--- 2.6-git.orig/mm/slab.c
+++ 2.6-git/mm/slab.c
@@ -828,8 +828,6 @@ static struct array_cache *alloc_arrayca
}

#ifdef CONFIG_NUMA
-static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
-
static struct array_cache **alloc_alien_cache(int node, int limit)
{
struct array_cache **ac_ptr;
@@ -2665,20 +2663,12 @@ static void *cache_alloc_debugcheck_afte
#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
#endif

-static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static inline void *cache_alloc_cpucache(struct kmem_cache *cachep,
+ gfp_t flags)
{
void *objp;
struct array_cache *ac;

-#ifdef CONFIG_NUMA
- if (unlikely(current->mempolicy && !in_interrupt())) {
- int nid = slab_node(current->mempolicy);
-
- if (nid != numa_node_id())
- return __cache_alloc_node(cachep, flags, nid);
- }
-#endif
-
check_irq_off();
ac = cpu_cache_get(cachep);
if (likely(ac->avail)) {
@@ -2692,23 +2682,6 @@ static inline void *____cache_alloc(stru
return objp;
}

-static __always_inline void *
-__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
-{
- unsigned long save_flags;
- void *objp;
-
- cache_alloc_debugcheck_before(cachep, flags);
-
- local_irq_save(save_flags);
- objp = ____cache_alloc(cachep, flags);
- local_irq_restore(save_flags);
- objp = cache_alloc_debugcheck_after(cachep, flags, objp,
- caller);
- prefetchw(objp);
- return objp;
-}
-
#ifdef CONFIG_NUMA
/*
* A interface to enable slab creation on nodeid
@@ -2770,8 +2743,57 @@ static void *__cache_alloc_node(struct k
done:
return obj;
}
+
+static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
+{
+
+ if (nodeid != -1 && nodeid != numa_node_id() &&
+ cachep->nodelists[nodeid])
+ return __cache_alloc_node(cachep, flags, nodeid);
+
+ if (unlikely(current->mempolicy && !in_interrupt())) {
+ nodeid = slab_node(current->mempolicy);
+
+ if (nodeid != numa_node_id() && cachep->nodelists[nodeid])
+ return __cache_alloc_node(cachep, flags, nodeid);
+ }
+
+ return cache_alloc_cpucache(cachep, flags);
+}
+
+#else
+
+/*
+ * On UMA, we always allocate directly from the per-CPU cache.
+ */
+
+static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
+{
+ return cache_alloc_cpucache(cachep, flags);
+}
+
#endif

+static __always_inline void * cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid,
+ void *caller)
+{
+ unsigned long save_flags;
+ void *objp;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+ local_irq_save(save_flags);
+
+ objp = __cache_alloc(cachep, flags, nodeid);
+
+ local_irq_restore(save_flags);
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ prefetchw(objp);
+ return objp;
+}
+
/*
* Caller needs to acquire correct kmem_list's list_lock
*/
@@ -2933,7 +2955,7 @@ static inline void __cache_free(struct k
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
+ return cache_alloc(cachep, flags, -1, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_alloc);

@@ -2994,23 +3016,7 @@ int fastcall kmem_ptr_validate(struct km
*/
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- unsigned long save_flags;
- void *ptr;
-
- cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
-
- if (nodeid == -1 || nodeid == numa_node_id() ||
- !cachep->nodelists[nodeid])
- ptr = ____cache_alloc(cachep, flags);
- else
- ptr = __cache_alloc_node(cachep, flags, nodeid);
- local_irq_restore(save_flags);
-
- ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
- __builtin_return_address(0));
-
- return ptr;
+ return cache_alloc(cachep, flags, nodeid, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_alloc_node);

@@ -3021,7 +3027,7 @@ void *kmalloc_node(size_t size, gfp_t fl
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
- return kmem_cache_alloc_node(cachep, flags, node);
+ return cache_alloc(cachep, flags, node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmalloc_node);
#endif
@@ -3060,7 +3066,7 @@ static __always_inline void *__do_kmallo
cachep = __find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
- return __cache_alloc(cachep, flags, caller);
+ return cache_alloc(cachep, flags, -1, caller);
}

#ifndef CONFIG_DEBUG_SLAB



2006-02-04 15:11:17

by Christoph Lameter

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

On Sat, 4 Feb 2006, Pekka Enberg wrote:

> I don't have access to NUMA machine and would appreciate if someone
> could give this patch a spin and let me know I didn't break anything.

No time to do a full review (off to traffic school... sigh), I did not
see anything by just glancing over it but the patch will conflict with
Paul Jacksons patchset to implement memory spreading.

2006-02-04 16:26:12

by Pekka Enberg

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

On Sat, 4 Feb 2006, Pekka Enberg wrote:
> > I don't have access to NUMA machine and would appreciate if someone
> > could give this patch a spin and let me know I didn't break anything.

On Sat, 2006-02-04 at 07:11 -0800, Christoph Lameter wrote:
> No time to do a full review (off to traffic school... sigh), I did not
> see anything by just glancing over it but the patch will conflict with
> Paul Jacksons patchset to implement memory spreading.

Here's the same patch rediffed on top of the cpuset changes.

Pekka

Subject: slab: consolidate allocation paths
From: Pekka Enberg <[email protected]>

This patch consolidates the UMA and NUMA memory allocation paths in the
slab allocator. This is accomplished by making the UMA-path look like
we are on NUMA but always allocating from the current node.

Cc: Manfred Spraul <[email protected]>
Cc: Christoph Lameter <[email protected]>
Signed-off-by: Pekka Enberg <[email protected]>
---

mm/slab.c | 132 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 67 insertions(+), 65 deletions(-)

Index: 2.6-cpuset/mm/slab.c
===================================================================
--- 2.6-cpuset.orig/mm/slab.c
+++ 2.6-cpuset/mm/slab.c
@@ -829,8 +829,6 @@ static struct array_cache *alloc_arrayca
}

#ifdef CONFIG_NUMA
-static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
-static void *alternate_node_alloc(struct kmem_cache *, gfp_t);

static struct array_cache **alloc_alien_cache(int node, int limit)
{
@@ -2667,17 +2665,12 @@ static void *cache_alloc_debugcheck_afte
#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
#endif

-static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static inline void *cache_alloc_cpucache(struct kmem_cache *cachep,
+ gfp_t flags)
{
void *objp;
struct array_cache *ac;

-#ifdef CONFIG_NUMA
- if (unlikely(current->flags & (PF_MEM_SPREAD|PF_MEMPOLICY)))
- if ((objp = alternate_node_alloc(cachep, flags)) != NULL)
- return objp;
-#endif
-
check_irq_off();
ac = cpu_cache_get(cachep);
if (likely(ac->avail)) {
@@ -2691,44 +2684,8 @@ static inline void *____cache_alloc(stru
return objp;
}

-static __always_inline void *
-__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
-{
- unsigned long save_flags;
- void *objp;
-
- cache_alloc_debugcheck_before(cachep, flags);
-
- local_irq_save(save_flags);
- objp = ____cache_alloc(cachep, flags);
- local_irq_restore(save_flags);
- objp = cache_alloc_debugcheck_after(cachep, flags, objp,
- caller);
- prefetchw(objp);
- return objp;
-}
-
#ifdef CONFIG_NUMA
/*
- * Try allocating on another node if PF_MEM_SPREAD or PF_MEMPOLICY.
- */
-static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
-{
- int nid_alloc, nid_here;
-
- if (in_interrupt())
- return NULL;
- nid_alloc = nid_here = numa_node_id();
- if (cpuset_mem_spread_check() && (cachep->flags & SLAB_MEM_SPREAD))
- nid_alloc = cpuset_mem_spread_node();
- else if (current->mempolicy)
- nid_alloc = slab_node(current->mempolicy);
- if (nid_alloc != nid_here)
- return __cache_alloc_node(cachep, flags, nid_alloc);
- return NULL;
-}
-
-/*
* A interface to enable slab creation on nodeid
*/
static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
@@ -2788,8 +2745,69 @@ static void *__cache_alloc_node(struct k
done:
return obj;
}
+
+/*
+ * Try allocating on another node if PF_MEM_SPREAD or PF_MEMPOLICY.
+ */
+static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+ int nid_alloc, nid_here;
+
+ if (in_interrupt())
+ return NULL;
+ nid_alloc = nid_here = numa_node_id();
+ if (cpuset_mem_spread_check() && (cachep->flags & SLAB_MEM_SPREAD))
+ nid_alloc = cpuset_mem_spread_node();
+ else if (current->mempolicy)
+ nid_alloc = slab_node(current->mempolicy);
+ if (nid_alloc != nid_here)
+ return __cache_alloc_node(cachep, flags, nid_alloc);
+ return NULL;
+}
+
+static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
+{
+ if (unlikely(current->flags & (PF_MEM_SPREAD|PF_MEMPOLICY))) {
+ void *obj = alternate_node_alloc(cachep, flags);
+ if (obj)
+ return obj;
+ }
+ return cache_alloc_cpucache(cachep, flags);
+}
+
+#else
+
+/*
+ * On UMA, we always allocate directly from the per-CPU cache.
+ */
+
+static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
+{
+ return cache_alloc_cpucache(cachep, flags);
+}
+
#endif

+static __always_inline void * cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid,
+ void *caller)
+{
+ unsigned long save_flags;
+ void *objp;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+ local_irq_save(save_flags);
+
+ objp = __cache_alloc(cachep, flags, nodeid);
+
+ local_irq_restore(save_flags);
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ prefetchw(objp);
+ return objp;
+}
+
/*
* Caller needs to acquire correct kmem_list's list_lock
*/
@@ -2951,7 +2969,7 @@ static inline void __cache_free(struct k
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
+ return cache_alloc(cachep, flags, -1, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_alloc);

@@ -3012,23 +3030,7 @@ int fastcall kmem_ptr_validate(struct km
*/
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- unsigned long save_flags;
- void *ptr;
-
- cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
-
- if (nodeid == -1 || nodeid == numa_node_id() ||
- !cachep->nodelists[nodeid])
- ptr = ____cache_alloc(cachep, flags);
- else
- ptr = __cache_alloc_node(cachep, flags, nodeid);
- local_irq_restore(save_flags);
-
- ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
- __builtin_return_address(0));
-
- return ptr;
+ return cache_alloc(cachep, flags, nodeid, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_alloc_node);

@@ -3039,7 +3041,7 @@ void *kmalloc_node(size_t size, gfp_t fl
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
- return kmem_cache_alloc_node(cachep, flags, node);
+ return cache_alloc(cachep, flags, node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmalloc_node);
#endif
@@ -3078,7 +3080,7 @@ static __always_inline void *__do_kmallo
cachep = __find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
- return __cache_alloc(cachep, flags, caller);
+ return cache_alloc(cachep, flags, -1, caller);
}

#ifndef CONFIG_DEBUG_SLAB


2006-02-04 16:33:04

by Pekka Enberg

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

On Sat, 2006-02-04 at 07:11 -0800, Christoph Lameter wrote:
> > No time to do a full review (off to traffic school... sigh), I did not
> > see anything by just glancing over it but the patch will conflict with
> > Paul Jacksons patchset to implement memory spreading.

On Sat, 2006-02-04 at 18:26 +0200, Pekka Enberg wrote:
> Here's the same patch rediffed on top of the cpuset changes.

Sorry, strike that. I forgot some bits from the NUMA version of
__cache_alloc. Here's a proper patch.

Subject: slab: consolidate allocation paths
From: Pekka Enberg <[email protected]>

This patch consolidates the UMA and NUMA memory allocation paths in the
slab allocator. This is accomplished by making the UMA-path look like
we are on NUMA but always allocating from the current node.

Cc: Manfred Spraul <[email protected]>
Cc: Christoph Lameter <[email protected]>
Signed-off-by: Pekka Enberg <[email protected]>
---

mm/slab.c | 136 ++++++++++++++++++++++++++++++++------------------------------
1 file changed, 71 insertions(+), 65 deletions(-)

Index: 2.6-cpuset/mm/slab.c
===================================================================
--- 2.6-cpuset.orig/mm/slab.c
+++ 2.6-cpuset/mm/slab.c
@@ -829,8 +829,6 @@ static struct array_cache *alloc_arrayca
}

#ifdef CONFIG_NUMA
-static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
-static void *alternate_node_alloc(struct kmem_cache *, gfp_t);

static struct array_cache **alloc_alien_cache(int node, int limit)
{
@@ -2667,17 +2665,12 @@ static void *cache_alloc_debugcheck_afte
#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
#endif

-static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static inline void *cache_alloc_cpucache(struct kmem_cache *cachep,
+ gfp_t flags)
{
void *objp;
struct array_cache *ac;

-#ifdef CONFIG_NUMA
- if (unlikely(current->flags & (PF_MEM_SPREAD|PF_MEMPOLICY)))
- if ((objp = alternate_node_alloc(cachep, flags)) != NULL)
- return objp;
-#endif
-
check_irq_off();
ac = cpu_cache_get(cachep);
if (likely(ac->avail)) {
@@ -2691,44 +2684,8 @@ static inline void *____cache_alloc(stru
return objp;
}

-static __always_inline void *
-__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
-{
- unsigned long save_flags;
- void *objp;
-
- cache_alloc_debugcheck_before(cachep, flags);
-
- local_irq_save(save_flags);
- objp = ____cache_alloc(cachep, flags);
- local_irq_restore(save_flags);
- objp = cache_alloc_debugcheck_after(cachep, flags, objp,
- caller);
- prefetchw(objp);
- return objp;
-}
-
#ifdef CONFIG_NUMA
/*
- * Try allocating on another node if PF_MEM_SPREAD or PF_MEMPOLICY.
- */
-static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
-{
- int nid_alloc, nid_here;
-
- if (in_interrupt())
- return NULL;
- nid_alloc = nid_here = numa_node_id();
- if (cpuset_mem_spread_check() && (cachep->flags & SLAB_MEM_SPREAD))
- nid_alloc = cpuset_mem_spread_node();
- else if (current->mempolicy)
- nid_alloc = slab_node(current->mempolicy);
- if (nid_alloc != nid_here)
- return __cache_alloc_node(cachep, flags, nid_alloc);
- return NULL;
-}
-
-/*
* A interface to enable slab creation on nodeid
*/
static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
@@ -2788,8 +2745,73 @@ static void *__cache_alloc_node(struct k
done:
return obj;
}
+
+/*
+ * Try allocating on another node if PF_MEM_SPREAD or PF_MEMPOLICY.
+ */
+static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+ int nid_alloc, nid_here;
+
+ if (in_interrupt())
+ return NULL;
+ nid_alloc = nid_here = numa_node_id();
+ if (cpuset_mem_spread_check() && (cachep->flags & SLAB_MEM_SPREAD))
+ nid_alloc = cpuset_mem_spread_node();
+ else if (current->mempolicy)
+ nid_alloc = slab_node(current->mempolicy);
+ if (nid_alloc != nid_here)
+ return __cache_alloc_node(cachep, flags, nid_alloc);
+ return NULL;
+}
+
+static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
+{
+ if (nodeid != -1 && nodeid != numa_node_id() &&
+ cachep->nodelists[nodeid])
+ return __cache_alloc_node(cachep, flags, nodeid);
+
+ if (unlikely(current->flags & (PF_MEM_SPREAD|PF_MEMPOLICY))) {
+ void *obj = alternate_node_alloc(cachep, flags);
+ if (obj)
+ return obj;
+ }
+ return cache_alloc_cpucache(cachep, flags);
+}
+
+#else
+
+/*
+ * On UMA, we always allocate directly from the per-CPU cache.
+ */
+
+static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid)
+{
+ return cache_alloc_cpucache(cachep, flags);
+}
+
#endif

+static __always_inline void * cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, int nodeid,
+ void *caller)
+{
+ unsigned long save_flags;
+ void *objp;
+
+ cache_alloc_debugcheck_before(cachep, flags);
+ local_irq_save(save_flags);
+
+ objp = __cache_alloc(cachep, flags, nodeid);
+
+ local_irq_restore(save_flags);
+ objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ prefetchw(objp);
+ return objp;
+}
+
/*
* Caller needs to acquire correct kmem_list's list_lock
*/
@@ -2951,7 +2973,7 @@ static inline void __cache_free(struct k
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
+ return cache_alloc(cachep, flags, -1, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_alloc);

@@ -3012,23 +3034,7 @@ int fastcall kmem_ptr_validate(struct km
*/
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- unsigned long save_flags;
- void *ptr;
-
- cache_alloc_debugcheck_before(cachep, flags);
- local_irq_save(save_flags);
-
- if (nodeid == -1 || nodeid == numa_node_id() ||
- !cachep->nodelists[nodeid])
- ptr = ____cache_alloc(cachep, flags);
- else
- ptr = __cache_alloc_node(cachep, flags, nodeid);
- local_irq_restore(save_flags);
-
- ptr = cache_alloc_debugcheck_after(cachep, flags, ptr,
- __builtin_return_address(0));
-
- return ptr;
+ return cache_alloc(cachep, flags, nodeid, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmem_cache_alloc_node);

@@ -3039,7 +3045,7 @@ void *kmalloc_node(size_t size, gfp_t fl
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
- return kmem_cache_alloc_node(cachep, flags, node);
+ return cache_alloc(cachep, flags, node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kmalloc_node);
#endif
@@ -3078,7 +3084,7 @@ static __always_inline void *__do_kmallo
cachep = __find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
- return __cache_alloc(cachep, flags, caller);
+ return cache_alloc(cachep, flags, -1, caller);
}

#ifndef CONFIG_DEBUG_SLAB


2006-02-05 02:00:49

by Paul Jackson

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

This consolidation patch looks ok to me on first read, though others
are certainly more expert in this code than I am. Certainly cleanup,
ifdef reduction and consolidation of mm/slab.c is a worthwhile goal.
That code is rough for folks like me to follow.

Two issues I can see:

1) This patch increased the text size of mm/slab.o by 776
bytes (ia64 sn2_defconfig gcc 3.3.3), which should be
justified. My naive expectation would have been that
such a source code consolidation patch would be text
size neutral, or close to it.

2) You might want to hold off this patch for a few days,
until the dust settles from my memory spread patch.

--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <[email protected]> 1.925.600.0401

2006-02-05 03:45:05

by Christoph Lameter

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

On Sat, 4 Feb 2006, Paul Jackson wrote:

> 1) This patch increased the text size of mm/slab.o by 776
> bytes (ia64 sn2_defconfig gcc 3.3.3), which should be
> justified. My naive expectation would have been that
> such a source code consolidation patch would be text
> size neutral, or close to it.

Hmmm... Maybe its worth a retry with gcc 3.4 and 4.X? Note that the
size increase may be much less on i386. The .o file includes descriptive
material too...

2006-02-05 04:36:34

by Paul Jackson

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

Christoph wrote:
> Hmmm... Maybe its worth a retry with gcc 3.4 and 4.X? Note that the
> size increase may be much less on i386. The .o file includes descriptive
> material too...

Yes, the other gcc's will no doubt have a different amount of increase.

Yes, i386 text sizes seem to run half the size of ia64.

No, I said "text" size, not file size. Meaning with the size command.
That same 776 byte size difference in text size showed up in the final
vmlinux, which I just verified.

This is not a 'big problem.' It's just a curiosity, for which an
explanation might provide interesting insight to what this patch is
doing.

--
I won't rest till it's the best ...
Programmer, Linux Scalability
Paul Jackson <[email protected]> 1.925.600.0401

2006-02-05 08:41:15

by Pekka Enberg

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

Hi,

On Sat, 2006-02-04 at 18:00 -0800, Paul Jackson wrote:
> Two issues I can see:
>
> 1) This patch increased the text size of mm/slab.o by 776
> bytes (ia64 sn2_defconfig gcc 3.3.3), which should be
> justified. My naive expectation would have been that
> such a source code consolidation patch would be text
> size neutral, or close to it.

Ah, sorry about that, I forgot to verify the NUMA case. The problem is
that to kmalloc_node() is calling cache_alloc() now which is forced
inline. I am wondering, would it be ok to make __cache_alloc()
non-inline for NUMA? The relevant numbers are:

text data bss dec hex filename
15882 2512 24 18418 47f2 mm/slab.o (original)
16029 2512 24 18565 4885 mm/slab.o (inline)
15798 2512 24 18334 479e mm/slab.o (non-inline)

> 2) You might want to hold off this patch for a few days,
> until the dust settles from my memory spread patch.

Sure.

Pekka

2006-02-05 09:18:32

by Pekka Enberg

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

On Sun, 2006-02-05 at 10:41 +0200, Pekka Enberg wrote:
> Ah, sorry about that, I forgot to verify the NUMA case. The problem is
> that to kmalloc_node() is calling cache_alloc() now which is forced
> inline. I am wondering, would it be ok to make __cache_alloc()
> non-inline for NUMA? The relevant numbers are:

[snip]

Btw, we can also change kmalloc_node() to use kmem_cache_alloc_node()
again but for that, we have a minor correctness issue, namely, the
__builtin_return_address(0) won't work for kmalloc_node(). Hmm.

Pekka

2006-02-05 12:29:15

by Pekka Enberg

[permalink] [raw]
Subject: Re: [RFT/PATCH] slab: consolidate allocation paths

On Sat, 2006-02-04 at 18:00 -0800, Paul Jackson wrote:
> 1) This patch increased the text size of mm/slab.o by 776
> bytes (ia64 sn2_defconfig gcc 3.3.3), which should be
> justified. My naive expectation would have been that
> such a source code consolidation patch would be text
> size neutral, or close to it.

I have a version of the patch now that reduces text size on NUMA. You
can find it here (it won't apply on top of cpuset though):

http://www.cs.helsinki.fi/u/penberg/linux/penberg-2.6/penberg-01-slab/

I'll wait until the cpuset patches have been settled down and repost.

Pekka