plain text document attachment (critical_mempools)
Support for using a single mempool as a critical pool for all slab allocations.
This patch adds *_mempool variants to the existing slab allocator API functions:
kmalloc_mempool(), kmalloc_node_mempool(), kmem_cache_alloc_mempool() &
kmem_cache_alloc_node_mempool(). These functions behave the same as their
non-mempool cousins, but they take an additional mempool_t argument.
This patch does not actually USE the mempool_t argument, but simply adds the
function calls. The next patch in the series will add code to use the
mempools. This patch should have no externally visible changes to existing
users of the slab allocator.
Signed-off-by: Matthew Dobson <[email protected]>
include/linux/slab.h | 38 +++++++++++++++++++++++++++++---------
mm/slab.c | 39 +++++++++++++++++++++++++++++++--------
2 files changed, 60 insertions(+), 17 deletions(-)
Index: linux-2.6.16-rc1+critical_mempools/include/linux/slab.h
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/include/linux/slab.h
+++ linux-2.6.16-rc1+critical_mempools/include/linux/slab.h
@@ -15,6 +15,7 @@ typedef struct kmem_cache kmem_cache_t;
#include <linux/gfp.h>
#include <linux/init.h>
#include <linux/types.h>
+#include <linux/mempool.h>
#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
@@ -63,6 +64,7 @@ extern kmem_cache_t *kmem_cache_create(c
void (*)(void *, kmem_cache_t *, unsigned long));
extern int kmem_cache_destroy(kmem_cache_t *);
extern int kmem_cache_shrink(kmem_cache_t *);
+extern void *kmem_cache_alloc_mempool(kmem_cache_t *, gfp_t, mempool_t *);
extern void *kmem_cache_alloc(kmem_cache_t *, gfp_t);
extern void kmem_cache_free(kmem_cache_t *, void *);
extern unsigned int kmem_cache_size(kmem_cache_t *);
@@ -76,9 +78,9 @@ struct cache_sizes {
kmem_cache_t *cs_dmacachep;
};
extern struct cache_sizes malloc_sizes[];
-extern void *__kmalloc(size_t, gfp_t);
+extern void *__kmalloc(size_t, gfp_t, mempool_t *);
-static inline void *kmalloc(size_t size, gfp_t flags)
+static inline void *kmalloc_mempool(size_t size, gfp_t flags, mempool_t *pool)
{
if (__builtin_constant_p(size)) {
int i = 0;
@@ -94,11 +96,16 @@ static inline void *kmalloc(size_t size,
__you_cannot_kmalloc_that_much();
}
found:
- return kmem_cache_alloc((flags & GFP_DMA) ?
+ return kmem_cache_alloc_mempool((flags & GFP_DMA) ?
malloc_sizes[i].cs_dmacachep :
- malloc_sizes[i].cs_cachep, flags);
+ malloc_sizes[i].cs_cachep, flags, pool);
}
- return __kmalloc(size, flags);
+ return __kmalloc(size, flags, pool);
+}
+
+static inline void *kmalloc(size_t size, gfp_t flags)
+{
+ return kmalloc_mempool(size, flags, NULL);
}
extern void *kzalloc_node(size_t, gfp_t, int);
@@ -121,14 +128,27 @@ extern void kfree(const void *);
extern unsigned int ksize(const void *);
#ifdef CONFIG_NUMA
-extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *kmem_cache_alloc_node_mempool(kmem_cache_t *, gfp_t, int, mempool_t *);
+extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t, int);
+extern void *kmalloc_node_mempool(size_t, gfp_t, int, mempool_t *);
+extern void *kmalloc_node(size_t, gfp_t, int);
#else
-static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
+static inline void *kmem_cache_alloc_node_mempool(kmem_cache_t *cachep, gfp_t flags,
+ int node_id, mempool_t *pool)
+{
+ return kmem_cache_alloc_mempool(cachep, flags, pool);
+}
+static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags,
+ int node_id)
{
return kmem_cache_alloc(cachep, flags);
}
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static inline void *kmalloc_node_mempool(size_t size, gfp_t flags, int node_id,
+ mempool_t *pool)
+{
+ return kmalloc_mempool(size, flags, pool);
+}
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node_id)
{
return kmalloc(size, flags);
}
Index: linux-2.6.16-rc1+critical_mempools/mm/slab.c
===================================================================
--- linux-2.6.16-rc1+critical_mempools.orig/mm/slab.c
+++ linux-2.6.16-rc1+critical_mempools/mm/slab.c
@@ -2837,17 +2837,25 @@ static inline void __cache_free(kmem_cac
}
/**
- * kmem_cache_alloc - Allocate an object
+ * kmem_cache_alloc_mempool - Allocate an object
* @cachep: The cache to allocate from.
* @flags: See kmalloc().
+ * @pool: mempool to allocate pages from if we need to refill a slab
*
* Allocate an object from this cache. The flags are only relevant
* if the cache has no available objects.
*/
-void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+void *kmem_cache_alloc_mempool(kmem_cache_t *cachep, gfp_t flags,
+ mempool_t *pool)
{
return __cache_alloc(cachep, flags);
}
+EXPORT_SYMBOL(kmem_cache_alloc_mempool);
+
+void *kmem_cache_alloc(kmem_cache_t *cachep, gfp_t flags)
+{
+ return kmem_cache_alloc_mempool(cachep, flags, NULL);
+}
EXPORT_SYMBOL(kmem_cache_alloc);
/**
@@ -2894,18 +2902,20 @@ int fastcall kmem_ptr_validate(kmem_cach
#ifdef CONFIG_NUMA
/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
+ * kmem_cache_alloc_node_mempool - Allocate an object on the specified node
* @cachep: The cache to allocate from.
* @flags: See kmalloc().
* @nodeid: node number of the target node.
+ * @pool: mempool to allocate pages from if we need to refill a slab
*
- * Identical to kmem_cache_alloc, except that this function is slow
+ * Identical to kmem_cache_alloc_mempool, except that this function is slow
* and can sleep. And it will allocate memory on the given node, which
* can improve the performance for cpu bound structures.
* New and improved: it will now make sure that the object gets
* put on the correct node list so that there is no false sharing.
*/
-void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
+void *kmem_cache_alloc_node_mempool(kmem_cache_t *cachep, gfp_t flags,
+ int nodeid, mempool_t *pool)
{
unsigned long save_flags;
void *ptr;
@@ -2934,16 +2944,29 @@ void *kmem_cache_alloc_node(kmem_cache_t
return ptr;
}
+EXPORT_SYMBOL(kmem_cache_alloc_node_mempool);
+
+void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
+{
+ return kmem_cache_alloc_node_mempool(cachep, flags, nodeid, NULL);
+}
EXPORT_SYMBOL(kmem_cache_alloc_node);
-void *kmalloc_node(size_t size, gfp_t flags, int node)
+void *kmalloc_node_mempool(size_t size, gfp_t flags, int nodeid,
+ mempool_t *pool)
{
kmem_cache_t *cachep;
cachep = kmem_find_general_cachep(size, flags);
if (unlikely(cachep == NULL))
return NULL;
- return kmem_cache_alloc_node(cachep, flags, node);
+ return kmem_cache_alloc_node_mempool(cachep, flags, nodeid, pool);
+}
+EXPORT_SYMBOL(kmalloc_node_mempool);
+
+void *kmalloc_node(size_t size, gfp_t flags, int nodeid)
+{
+ return kmalloc_node_mempool(size, flags, nodeid, NULL);
}
EXPORT_SYMBOL(kmalloc_node);
#endif
@@ -2969,7 +2992,7 @@ EXPORT_SYMBOL(kmalloc_node);
* platforms. For example, on i386, it means that the memory must come
* from the first 16MB.
*/
-void *__kmalloc(size_t size, gfp_t flags)
+void *__kmalloc(size_t size, gfp_t flags, mempool_t *pool)
{
kmem_cache_t *cachep;
--
Hi Matthew,
On 1/25/06, Matthew Dobson <[email protected]> wrote:
> +extern void *__kmalloc(size_t, gfp_t, mempool_t *);
If you really need to do this, please ntoe that you're adding an extra
parameter push for the nominal case where mempool is not required. The
compiler is unable to optimize it away. It's better that you create a
new entry point for the mempool case in mm/slab.c rather than
overloading __kmalloc() et al. See the following patch that does that
sort of thing:
http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.16-rc1/2.6.16-rc1-mm3/broken-out/slab-fix-kzalloc-and-kstrdup-caller-report-for-config_debug_slab.patch
Now as for the rest of the patch, are you sure you want to reserve
whole pages for each critical allocation that cannot be satisfied by
the slab allocator? Wouldn't it be better to use something like the
slob allocator to allocate from the mempool pages? That way you
wouldn't have to make the slab allocator mempool aware at all, simply
make your kmalloc_mempool first try the slab allocator and if it
returns NULL, go for the critical pool. All this in preferably
separate file so you don't make mm/slab.c any more complex than it is
now.
Pekka
Pekka Enberg wrote:
> Hi Matthew,
>
> On 1/25/06, Matthew Dobson <[email protected]> wrote:
>
>>+extern void *__kmalloc(size_t, gfp_t, mempool_t *);
>
>
> If you really need to do this, please ntoe that you're adding an extra
> parameter push for the nominal case where mempool is not required. The
> compiler is unable to optimize it away. It's better that you create a
> new entry point for the mempool case in mm/slab.c rather than
> overloading __kmalloc() et al. See the following patch that does that
> sort of thing:
>
> http://www.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.16-rc1/2.6.16-rc1-mm3/broken-out/slab-fix-kzalloc-and-kstrdup-caller-report-for-config_debug_slab.patch
At some level non-mempool users are going to have to use the same functions
as mempool users. As you can see in patch 9/9, all we really need is to
get the mempool_t pointer down to cache_grow() where the *actual* cache
growth happens. Unfortunately, between the external callers (kmalloc(),
kmem_cache_alloc(), etc) and cache_grow() there are several functions. I
will try to follow an approach similar to the patch you linked to above for
this patch (modifying the externally callable functions), but I don't think
the follow-on patch can change much. The overhead of passing along a NULL
pointer should not be too onerous.
> Now as for the rest of the patch, are you sure you want to reserve
> whole pages for each critical allocation that cannot be satisfied by
> the slab allocator? Wouldn't it be better to use something like the
> slob allocator to allocate from the mempool pages? That way you
> wouldn't have to make the slab allocator mempool aware at all, simply
> make your kmalloc_mempool first try the slab allocator and if it
> returns NULL, go for the critical pool. All this in preferably
> separate file so you don't make mm/slab.c any more complex than it is
> now.
I decided that using a whole page allocator would be the easiest way to
cover the most common uses of slab/kmalloc, but your idea is very
interesting. My immediate concern would be trying to determine, at kfree()
time, what was allocated by the slab allocator and what was allocated by
the critical pool. I will give this approach more thought, as the idea of
completely separating the critical pool and slab allocator is attractive.
Thanks!
-Matt
On Thu, 26 Jan 2006, Matthew Dobson wrote:
> The overhead of passing along a NULL pointer should not be too onerous.
It is, the extra parameter passing will be propagated all over the kernel
where kmalloc() is called. Increasing kernel text for no good reason is
not acceptable.
Pekka
On Thu, 26 Jan 2006, Matthew Dobson wrote:
> I decided that using a whole page allocator would be the easiest way to
> cover the most common uses of slab/kmalloc, but your idea is very
> interesting. My immediate concern would be trying to determine, at kfree()
> time, what was allocated by the slab allocator and what was allocated by
> the critical pool. I will give this approach more thought, as the idea of
> completely separating the critical pool and slab allocator is attractive.
I think you can use PageSlab for that.
Pekka