Hello,
I noticed that kmalloc and kmem_find_general_cachep are doing a linear
search in the cache_sizes array. Isnt it better to speed that up by doing a
binary search or a b-tree if like the following patch?
I am no gcc expert, so it might not help... any body knows?
I also not sure if removing the elses does produce faster code?
Perhaps we can improve the situation even more by using some fast path
optimization by looking at the usage profile of that function?
btw: the kmem_find_general_cache most likely needs to be inline now... not
sure if my kmalloc rewrite in that case is to bloated, if cut+paste is
better, here.
--- mm/slab.c.org Wed Nov 21 02:02:27 2001
+++ mm/slab.c Wed Nov 21 02:35:38 2001
@@ -1533,14 +1533,13 @@
*/
void * kmalloc (size_t size, int flags)
{
- cache_sizes_t *csizep = cache_sizes;
+ kmem_cache_t *cp;
+
+ cp = kmem_find_general_cachep(size, flags);
+
+ if (cp)
+ return __kmem_cache_alloc(cp, flags);
- for (; csizep->cs_size; csizep++) {
- if (size > csizep->cs_size)
- continue;
- return __kmem_cache_alloc(flags & GFP_DMA ?
- csizep->cs_dmacachep : csizep->cs_cachep, flags);
- }
return NULL;
}
@@ -1604,20 +1603,67 @@
local_irq_restore(flags);
}
-kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
-{
- cache_sizes_t *csizep = cache_sizes;
+/* This function could be moved to the header file, and
+ * made inline so consumers can quickly determine what
+ * cache pointer they require.
+ */
+
+#if PAGE_SIZE == 4096
+#define CACHE_ENTRY(p, f) (f & GFP_DMA ? cache_sizes[p].cs_dmacachep : cache_sizes[p].cs_cachep)
+#else
+#define CACHE_ENTRY(p, f) (f & ? cache_sizes[p-1]. : cache_sizes[p-1].)
+#endif
- /* This function could be moved to the header file, and
- * made inline so consumers can quickly determine what
- * cache pointer they require.
- */
- for ( ; csizep->cs_size; csizep++) {
- if (size > csizep->cs_size)
- continue;
- break;
+kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflag)
+{
+ if (size <= 512) {
+ if (size <= 128) {
+#if PAGE_SIZE == 4096
+#endif
+ if (size <= 64) {
+#if PAGE_SIZE == 4096
+ if (size <= 32) {
+ return CACHE_ENTRY(0, gfpflag); // 32
+ }
+#endif
+ return CACHE_ENTRY(1, gfpflag); // 64
+ } else
+ return CACHE_ENTRY(2, gfpflag); // 128
+ } else {
+ if (size <= 256)
+ return CACHE_ENTRY(3, gfpflag); // 256
+ else
+ return CACHE_ENTRY(4, gfpflag); // 512
+ }
}
- return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
+ if (size <= 8192) {
+ if (size <= 2048) {
+ if (size <= 1024)
+ return CACHE_ENTRY(5, gfpflag); // 1024
+ else
+ return CACHE_ENTRY(6, gfpflag); // 2048
+ } else {
+ if (size <= 4096)
+ return CACHE_ENTRY(7, gfpflag); // 4096
+ else
+ return CACHE_ENTRY(8, gfpflag); // 8192
+ }
+ }
+ if (size <= 131072) {
+ if (size <= 32768) {
+ if (size <= 16384)
+ return CACHE_ENTRY(9, gfpflag); // 16384
+ else
+ return CACHE_ENTRY(10, gfpflag); // 32768
+ } else {
+ if (size <= 65536)
+ return CACHE_ENTRY(11, gfpflag); // 65536
+ else
+ return CACHE_ENTRY(12, gfpflag); // 131072
+ }
+ }
+
+ return NULL;
}
#ifdef CONFIG_SMP
--
(OO) -- [email protected] --
( .. ) ecki@{inka.de,linux.de,debian.org} http://home.pages.de/~eckes/
o--o *plush* 2048/93600EFD eckes@irc +497257930613 BE5-RIPE
(O____O) When cryptography is outlawed, bayl bhgynjf jvyy unir cevinpl!
Bernd Eckenfels wrote:
>
> Hello,
>
> I noticed that kmalloc and kmem_find_general_cachep are doing a linear
> search in the cache_sizes array. Isnt it better to speed that up by doing a
> binary search or a b-tree if like the following patch?
>
Possibly. I've never seen it on a profile though.
Just for kicks, try feeding this into gcc and take a look
at the assembly output:
int thing(int size)
{
int ret;
switch (size) {
case 0 ... 2:
return 1;
case 3 ... 4:
return 2;
case 5 ... 8:
return 3;
case 9 ... 16:
return 4;
case 17 ... 31:
return 5;
case 32 ... 64:
return 6;
case 65 ... 128:
return 7;
case 129 ... 256:
return 8;
case 257 ... 512:
return 9;
case 513 ... 1024:
return 10;
case 1025 ... 2048:
return 11;
case 2049 ... 4096:
return 12;
case 4097 ... 8192:
return 13;
case 8193 ... 16384:
return 14;
case 16385 ... 32768:
return 15;
case 32769 ... 65536:
return 16;
}
return -1;
}
>>>>> "Bernd" == Bernd Eckenfels <[email protected]> writes:
Bernd> Hello,
Bernd> I noticed that kmalloc and kmem_find_general_cachep are doing a linear
Bernd> search in the cache_sizes array. Isnt it better to speed that up by doing a
Bernd> binary search or a b-tree if like the following patch?
Here is a patch using a gcc extension. gcc generates binary search for the case.
Regards,
-velco
--- slab.c.orig Wed Sep 19 00:16:26 2001
+++ slab.c Wed Nov 21 11:11:09 2001
@@ -1533,15 +1533,9 @@ void * kmem_cache_alloc (kmem_cache_t *c
*/
void * kmalloc (size_t size, int flags)
{
- cache_sizes_t *csizep = cache_sizes;
-
- for (; csizep->cs_size; csizep++) {
- if (size > csizep->cs_size)
- continue;
- return __kmem_cache_alloc(flags & GFP_DMA ?
- csizep->cs_dmacachep : csizep->cs_cachep, flags);
- }
- return NULL;
+ kmem_cache_t *cp = kmem_find_general_cachep (size, flags);
+
+ return cp == NULL ? NULL : __kmem_cache_alloc(cp, flags);
}
/**
@@ -1589,18 +1583,66 @@ void kfree (const void *objp)
kmem_cache_t * kmem_find_general_cachep (size_t size, int gfpflags)
{
- cache_sizes_t *csizep = cache_sizes;
+ int idx;
- /* This function could be moved to the header file, and
- * made inline so consumers can quickly determine what
- * cache pointer they require.
- */
- for ( ; csizep->cs_size; csizep++) {
- if (size > csizep->cs_size)
- continue;
+ switch (size) {
+#if PAGE_SIZE == 4096
+ case 0 ... 32:
+ idx = 0;
+ break;
+ case 33 ... 64:
+ idx = 1;
+ break;
+#else
+ case 0 ... 64:
+ idx = 1;
+ break;
+#endif
+ case 65 ... 128:
+ idx = 2;
+ break;
+ case 129 ... 256:
+ idx = 3;
+ break;
+ case 257 ...512:
+ idx = 4;
+ break;
+ case 513 ... 1024:
+ idx = 5;
+ break;
+ case 1025 ... 2048:
+ idx = 6;
+ break;
+ case 2049 ... 4096:
+ idx = 7;
+ break;
+ case 4097 ... 8192:
+ idx = 8;
+ break;
+ case 8193 ... 16384:
+ idx = 9;
+ break;
+ case 16385 ... 32768:
+ idx = 10;
+ break;
+ case 32769 ... 65536:
+ idx = 11;
+ break;
+ case 65537 ... 131072:
+ idx = 12;
+ break;
+ default:
+ idx = -1;
break;
}
- return (gfpflags & GFP_DMA) ? csizep->cs_dmacachep : csizep->cs_cachep;
+
+ if (idx == -1)
+ return NULL;
+
+#if PAGE_SIZE != 4096
+ idx = idx - 1;
+#endif
+ return (gfpflags & GFP_DMA) ? cache_sizes [idx].cs_dmacachep : cache_sizes [idx].cs_cachep;
}
#ifdef CONFIG_SMP
Momchil Velikov wrote:
>
> >>>>> "Bernd" == Bernd Eckenfels <[email protected]> writes:
>
> Bernd> Hello,
> Bernd> I noticed that kmalloc and kmem_find_general_cachep are doing a linear
> Bernd> search in the cache_sizes array. Isnt it better to speed that up by doing a
> Bernd> binary search or a b-tree if like the following patch?
>
> Here is a patch using a gcc extension. gcc generates binary search for the case.
the big "case" statement makes you wonder if ffz(~size) would do the
same ;)
>>>>> "Arjan" == Arjan van de Ven <[email protected]> writes:
Arjan> Momchil Velikov wrote:
>>
>> >>>>> "Bernd" == Bernd Eckenfels <[email protected]> writes:
>>
Bernd> Hello,
Bernd> I noticed that kmalloc and kmem_find_general_cachep are doing a linear
Bernd> search in the cache_sizes array. Isnt it better to speed that up by doing a
Bernd> binary search or a b-tree if like the following patch?
>>
>> Here is a patch using a gcc extension. gcc generates binary search for the case.
Arjan> the big "case" statement makes you wonder if ffz(~size) would do the
Arjan> same ;)
*Nod*, espesially on architectures, where it is a single instruction.
OTOH, this is possible only if the sizes are powers of two. And they
could be more closely sparsed ...
Regards,
-velco