2005-11-09 14:20:20

by Wu Fengguang

[permalink] [raw]
Subject: [PATCH 04/16] radix-tree: look-aside cache

This introduces a set of lookup functions to radix tree for the read-ahead
logic. Other access patterns with high locality may also benefit from them.

Most of them are best inlined, so some macros/structs in .c are moved into .h.

- radix_tree_lookup_node(root, index, level)
Perform partial lookup, return the @level'th parent of the slot at
@index.

- radix_tree_cache_lookup(root, cache, index)
Perform lookup with the aid of a look-aside cache.
- radix_tree_cache_xxx()
Init/Query the cache.

- radix_tree_lookup_head(root, index, max_scan)
- radix_tree_lookup_tail(root, index, max_scan)
[head, tail) is a segment with continuous pages. The functions
search for the head and tail index of the segment at @index.

Signed-off-by: Wu Fengguang <[email protected]>
---

include/linux/radix-tree.h | 150 ++++++++++++++++++++++++++++++++++++++++++++-
lib/radix-tree.c | 142 ++++++++++++++++++++++++++++++++----------
2 files changed, 255 insertions(+), 37 deletions(-)

--- linux-2.6.14-mm1.orig/include/linux/radix-tree.h
+++ linux-2.6.14-mm1/include/linux/radix-tree.h
@@ -22,12 +22,39 @@
#include <linux/preempt.h>
#include <linux/types.h>

+#ifdef __KERNEL__
+#define RADIX_TREE_MAP_SHIFT 6
+#else
+#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
+#endif
+#define RADIX_TREE_TAGS 2
+
+#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
+
+#define RADIX_TREE_TAG_LONGS \
+ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct radix_tree_node {
+ unsigned int count;
+ void *slots[RADIX_TREE_MAP_SIZE];
+ unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
+};
+
struct radix_tree_root {
unsigned int height;
gfp_t gfp_mask;
struct radix_tree_node *rnode;
};

+/*
+ * Support access patterns with strong locality.
+ */
+struct radix_tree_cache {
+ unsigned long first_index;
+ struct radix_tree_node *tree_node;
+};
+
#define RADIX_TREE_INIT(mask) { \
.height = 0, \
.gfp_mask = (mask), \
@@ -45,9 +72,13 @@ do { \
} while (0)

int radix_tree_insert(struct radix_tree_root *, unsigned long, void *);
-void *radix_tree_lookup(struct radix_tree_root *, unsigned long);
-void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
+void *radix_tree_lookup_node(struct radix_tree_root *, unsigned long,
+ unsigned int);
void *radix_tree_delete(struct radix_tree_root *, unsigned long);
+unsigned long radix_tree_lookup_head(struct radix_tree_root *root,
+ unsigned long index, unsigned int max_scan);
+unsigned long radix_tree_lookup_tail(struct radix_tree_root *root,
+ unsigned long index, unsigned int max_scan);
unsigned int
radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
unsigned long first_index, unsigned int max_items);
@@ -69,4 +100,119 @@ static inline void radix_tree_preload_en
preempt_enable();
}

+/**
+ * radix_tree_lookup - perform lookup operation on a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the item at the position @index in the radix tree @root.
+ */
+static inline void *radix_tree_lookup(struct radix_tree_root *root,
+ unsigned long index)
+{
+ return radix_tree_lookup_node(root, index, 0);
+}
+
+/**
+ * radix_tree_lookup_slot - lookup a slot in a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the slot corresponding to the position @index in the radix tree
+ * @root. This is useful for update-if-exists operations.
+ */
+static inline void **radix_tree_lookup_slot(struct radix_tree_root *root,
+ unsigned long index)
+{
+ struct radix_tree_node *node;
+
+ node = radix_tree_lookup_node(root, index, 1);
+ return node->slots + (index & RADIX_TREE_MAP_MASK);
+}
+
+/**
+ * radix_tree_cache_lookup_node - cached lookup node
+ * @root: radix tree root
+ * @cache: look-aside cache
+ * @index: index key
+ *
+ * Lookup the item at the position @index in the radix tree @root,
+ * and return the node @level levels from the bottom in the search path.
+ * @cache stores the last accessed upper level tree node by this
+ * function, and is always checked first before searching in the tree.
+ * It can improve speed for access patterns with strong locality.
+ * NOTE:
+ * - The cache becomes invalid on leaving the lock;
+ * - Do not intermix calls with different @level.
+ */
+static inline void *radix_tree_cache_lookup_node(struct radix_tree_root *root,
+ struct radix_tree_cache *cache,
+ unsigned long index, unsigned int level)
+{
+ struct radix_tree_node *node;
+ unsigned long i;
+ unsigned long mask;
+
+ if (level && level >= root->height)
+ return root->rnode;
+
+ i = ((index >> (level * RADIX_TREE_MAP_SHIFT)) & RADIX_TREE_MAP_MASK);
+ mask = ~((RADIX_TREE_MAP_SIZE << (level * RADIX_TREE_MAP_SHIFT)) - 1);
+
+ if ((index & mask) == cache->first_index)
+ return cache->tree_node->slots[i];
+
+ node = radix_tree_lookup_node(root, index, level + 1);
+ if (!node)
+ return 0;
+
+ cache->tree_node = node;
+ cache->first_index = (index & mask);
+ return node->slots[i];
+}
+
+/**
+ * radix_tree_cache_lookup - cached lookup page
+ * @root: radix tree root
+ * @cache: look-aside cache
+ * @index: index key
+ *
+ * Lookup the item at the position @index in the radix tree @root.
+ */
+static inline void *radix_tree_cache_lookup(struct radix_tree_root *root,
+ struct radix_tree_cache *cache,
+ unsigned long index)
+{
+ return radix_tree_cache_lookup_node(root, cache, index, 0);
+}
+
+static inline void radix_tree_cache_init(struct radix_tree_cache *cache)
+{
+ cache->first_index = 0x77;
+ cache->tree_node = NULL;
+}
+
+static inline int radix_tree_cache_size(struct radix_tree_cache *cache)
+{
+ return RADIX_TREE_MAP_SIZE;
+}
+
+static inline int radix_tree_cache_count(struct radix_tree_cache *cache)
+{
+ if (cache->first_index != 0x77)
+ return cache->tree_node->count;
+ else
+ return 0;
+}
+
+static inline int radix_tree_cache_full(struct radix_tree_cache *cache)
+{
+ return radix_tree_cache_count(cache) == radix_tree_cache_size(cache);
+}
+
+static inline int radix_tree_cache_first_index(struct radix_tree_cache *cache)
+{
+ return cache->first_index;
+}
+
#endif /* _LINUX_RADIX_TREE_H */
--- linux-2.6.14-mm1.orig/lib/radix-tree.c
+++ linux-2.6.14-mm1/lib/radix-tree.c
@@ -32,25 +32,6 @@
#include <linux/bitops.h>


-#ifdef __KERNEL__
-#define RADIX_TREE_MAP_SHIFT 6
-#else
-#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
-#endif
-#define RADIX_TREE_TAGS 2
-
-#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
-#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
-
-#define RADIX_TREE_TAG_LONGS \
- ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
-
-struct radix_tree_node {
- unsigned int count;
- void *slots[RADIX_TREE_MAP_SIZE];
- unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
-};
-
struct radix_tree_path {
struct radix_tree_node *node;
int offset;
@@ -287,8 +268,21 @@ int radix_tree_insert(struct radix_tree_
}
EXPORT_SYMBOL(radix_tree_insert);

-static inline void **__lookup_slot(struct radix_tree_root *root,
- unsigned long index)
+/**
+ * radix_tree_lookup_node - low level lookup routine
+ * @root: radix tree root
+ * @index: index key
+ * @level: stop at that many levels from bottom
+ *
+ * Lookup the item at the position @index in the radix tree @root.
+ * The return value is:
+ * @level == 0: page at @index;
+ * @level == 1: the corresponding bottom level tree node;
+ * @level < height: (height - @level)th level tree node;
+ * @level >= height: root node.
+ */
+void *radix_tree_lookup_node(struct radix_tree_root *root,
+ unsigned long index, unsigned int level)
{
unsigned int height, shift;
struct radix_tree_node *slot;
@@ -300,7 +294,7 @@ static inline void **__lookup_slot(struc
shift = (height-1) * RADIX_TREE_MAP_SHIFT;
slot = root->rnode;

- while (height > 0) {
+ while (height > level) {
if (slot == NULL)
return NULL;

@@ -311,36 +305,114 @@ static inline void **__lookup_slot(struc

return slot;
}
+EXPORT_SYMBOL(radix_tree_lookup_node);

/**
- * radix_tree_lookup_slot - lookup a slot in a radix tree
+ * radix_tree_lookup_head - lookup the head index
* @root: radix tree root
* @index: index key
+ * @max_scan: max items to scan
*
- * Lookup the slot corresponding to the position @index in the radix tree
- * @root. This is useful for update-if-exists operations.
+ * Lookup head index of the segment which contains @index. A segment is
+ * a set of continuous pages in a file.
+ * CASE RETURN VALUE
+ * no page at @index (not head) = @index + 1
+ * found in the range @index - @max_scan < (head index) <= @index
+ * not found in range (unfinished head) <= @index - @max_scan
*/
-void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+unsigned long radix_tree_lookup_head(struct radix_tree_root *root,
+ unsigned long index, unsigned int max_scan)
{
- return __lookup_slot(root, index);
+ struct radix_tree_cache cache;
+ struct radix_tree_node *node;
+ int i;
+ unsigned long origin;
+
+ origin = index;
+ if (unlikely(max_scan > index))
+ max_scan = index;
+ radix_tree_cache_init(&cache);
+
+next_node:
+ if (origin - index > max_scan)
+ goto out;
+
+ node = radix_tree_cache_lookup_node(root, &cache, index, 1);
+ if (!node)
+ goto out;
+
+ if (node->count == RADIX_TREE_MAP_SIZE) {
+ if (index < RADIX_TREE_MAP_SIZE) {
+ index = -1;
+ goto out;
+ }
+ index = (index - RADIX_TREE_MAP_SIZE) | RADIX_TREE_MAP_MASK;
+ goto next_node;
+ }
+
+ for (i = index & RADIX_TREE_MAP_MASK; i >= 0; i--, index--) {
+ if (!node->slots[i])
+ goto out;
+ }
+
+ goto next_node;
+
+out:
+ return index + 1;
}
-EXPORT_SYMBOL(radix_tree_lookup_slot);
+EXPORT_SYMBOL(radix_tree_lookup_head);

/**
- * radix_tree_lookup - perform lookup operation on a radix tree
+ * radix_tree_lookup_tail - lookup the tail index
* @root: radix tree root
* @index: index key
+ * @max_scan: max items to scan
*
- * Lookup the item at the position @index in the radix tree @root.
+ * Lookup tail(pass the end) index of the segment which contains @index.
+ * A segment is a set of continuous pages in a file.
+ * CASE RETURN VALUE
+ * found in the range @index <= (tail index) < @index + @max_scan
+ * not found in range @index + @max_scan <= (non tail)
*/
-void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+unsigned long radix_tree_lookup_tail(struct radix_tree_root *root,
+ unsigned long index, unsigned int max_scan)
{
- void **slot;
+ struct radix_tree_cache cache;
+ struct radix_tree_node *node;
+ int i;
+ unsigned long origin;
+
+ origin = index;
+ if (unlikely(index + max_scan < index))
+ max_scan = LONG_MAX - index;
+ radix_tree_cache_init(&cache);
+
+next_node:
+ if (index - origin >= max_scan)
+ goto out;

- slot = __lookup_slot(root, index);
- return slot != NULL ? *slot : NULL;
+ node = radix_tree_cache_lookup_node(root, &cache, index, 1);
+ if (!node)
+ goto out;
+
+ if (node->count == RADIX_TREE_MAP_SIZE) {
+ index = (index | RADIX_TREE_MAP_MASK) + 1;
+ if (unlikely(!index))
+ goto out;
+ goto next_node;
+ }
+
+ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++, index++) {
+ if (!node->slots[i])
+ goto out;
+ }
+
+ goto next_node;
+
+out:
+ return index;
}
-EXPORT_SYMBOL(radix_tree_lookup);
+EXPORT_SYMBOL(radix_tree_lookup_tail);

/**
* radix_tree_tag_set - set a tag on a radix tree node

--


2005-11-09 23:28:57

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH 04/16] radix-tree: look-aside cache

Wu Fengguang wrote:
> This introduces a set of lookup functions to radix tree for the read-ahead
> logic. Other access patterns with high locality may also benefit from them.
>

Hi Wu,

Does this cache add much performance compared with simple repeated
lookups? If the access patterns are highly local, the top of the
radix tree should be in cache.

I worry that it is a fair bit of extra complexity for something
slow like the IO path - however I haven't looked at how you use the
cache.

> Most of them are best inlined, so some macros/structs in .c are moved into .h.
>

I would not inline them. You'd find that the extra icache misses
that costs outweighs the improvements for larger functions.

> +
> +struct radix_tree_node {
> + unsigned int count;
> + void *slots[RADIX_TREE_MAP_SIZE];
> + unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
> +};
> +

Would be much nicer if this weren't declared in the header file, so
people don't start trying to use the nodes where they shouldn't.
This ought to be possible after uninlining a couple of things.

> struct radix_tree_root {
> unsigned int height;
> gfp_t gfp_mask;
> struct radix_tree_node *rnode;
> };
>
> +/*
> + * Support access patterns with strong locality.
> + */

Do you think you could provide a simple 'use case' for an overview
of how you use the cache and what calls to make?

> +struct radix_tree_cache {
> + unsigned long first_index;
> + struct radix_tree_node *tree_node;
> +};
> +

> +static inline void radix_tree_cache_init(struct radix_tree_cache *cache)
> +{
> + cache->first_index = 0x77;
> + cache->tree_node = NULL;
> +}
> +
> +static inline int radix_tree_cache_size(struct radix_tree_cache *cache)
> +{
> + return RADIX_TREE_MAP_SIZE;
> +}
> +
> +static inline int radix_tree_cache_count(struct radix_tree_cache *cache)
> +{
> + if (cache->first_index != 0x77)
> + return cache->tree_node->count;
> + else
> + return 0;
> +}
> +

What's 0x77 for? And what happens if your cache gets big enough that
the first index is 0x77?

Thanks,
Nick

--
SUSE Labs, Novell Inc.

Send instant messages to your online friends http://au.messenger.yahoo.com

2005-11-10 05:24:24

by Wu Fengguang

[permalink] [raw]
Subject: Re: [PATCH 04/16] radix-tree: look-aside cache

Hi Nick,

On Thu, Nov 10, 2005 at 10:31:09AM +1100, Nick Piggin wrote:
> Does this cache add much performance compared with simple repeated
> lookups? If the access patterns are highly local, the top of the
> radix tree should be in cache.
It just guarantees constant lookup time for small/large files.

My context based read-ahead code has been quite tricky just to avoid many radix
tree lookups. I made it much simple and robust in the recent versions by just
scanning through the cache. With the help of look-aside cache, the performance
remains comparable with the tricky one. Sorry, the oprofile log was overwrote.
But if you do need some numbers about the cache, I'll make one.

>
> I worry that it is a fair bit of extra complexity for something
> slow like the IO path - however I haven't looked at how you use the
> cache.
Most are one-liners, except radix_tree_cache_lookup_node(). Which is about 10
lines. Currently it is always called with a constant @level, where inline can
help. Only several speed critical functions call it, so I guess icache misses
will not be a big problem. But I do feel it ugly to expose internal data
structures in .h :(
>
> >Most of them are best inlined, so some macros/structs in .c are moved into
> >.h.
> >
>
> I would not inline them. You'd find that the extra icache misses
> that costs outweighs the improvements for larger functions.
>
> >+
> >+struct radix_tree_node {
> >+ unsigned int count;
> >+ void *slots[RADIX_TREE_MAP_SIZE];
> >+ unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
> >+};
> >+
>
> Would be much nicer if this weren't declared in the header file, so
> people don't start trying to use the nodes where they shouldn't.
> This ought to be possible after uninlining a couple of things.
Ok. I'll try it.
>
> > struct radix_tree_root {
> > unsigned int height;
> > gfp_t gfp_mask;
> > struct radix_tree_node *rnode;
> > };
> >
> >+/*
> >+ * Support access patterns with strong locality.
> >+ */
>
> Do you think you could provide a simple 'use case' for an overview
> of how you use the cache and what calls to make?
Ok, here it is:

void func() {
+ struct radix_tree_cache cache;
+
+ radix_tree_cache_init(&cache);
read_lock_irq(&mapping->tree_lock);
for(;;) {
- page = radix_tree_lookup(&mapping->page_tree, index);
+ page = radix_tree_cache_lookup(&mapping->page_tree, &cache, index);
}
read_unlock_irq(&mapping->tree_lock);
}

>
> >+struct radix_tree_cache {
> >+ unsigned long first_index;
> >+ struct radix_tree_node *tree_node;
> >+};
> >+
>
> >+static inline void radix_tree_cache_init(struct radix_tree_cache *cache)
> >+{
> >+ cache->first_index = 0x77;
> >+ cache->tree_node = NULL;
> >+}
> >+
> >+static inline int radix_tree_cache_size(struct radix_tree_cache *cache)
> >+{
> >+ return RADIX_TREE_MAP_SIZE;
> >+}
> >+
> >+static inline int radix_tree_cache_count(struct radix_tree_cache *cache)
> >+{
> >+ if (cache->first_index != 0x77)
> >+ return cache->tree_node->count;
> >+ else
> >+ return 0;
> >+}
> >+
>
> What's 0x77 for? And what happens if your cache gets big enough that
> the first index is 0x77?
Sorry for the ugly code. It is better written as:
if (cache->first_index & RADIX_TREE_MAP_MASK)
return 0;
else
return cache->tree_node->count;

The 0x77 is an invalid value that will be detected in radix_tree_cache_lookup_node():

mask = ~((RADIX_TREE_MAP_SIZE << (level * RADIX_TREE_MAP_SHIFT)) - 1);

---> if ((index & mask) == cache->first_index)
return cache->tree_node->slots[i];

node = radix_tree_lookup_node(root, index, level + 1);

It can be initialized to 1, 0xFF, or any i that (i & RADIX_TREE_MAP_MASK != 0).
I'd better just init it as RADIX_TREE_MAP_MASK.

Regards,
Wu

2005-11-10 06:50:21

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH 04/16] radix-tree: look-aside cache

Wu Fengguang wrote:

>Hi Nick,
>
>On Thu, Nov 10, 2005 at 10:31:09AM +1100, Nick Piggin wrote:
>
>>Does this cache add much performance compared with simple repeated
>>lookups? If the access patterns are highly local, the top of the
>>radix tree should be in cache.
>>
>It just guarantees constant lookup time for small/large files.
>
>My context based read-ahead code has been quite tricky just to avoid many radix
>tree lookups. I made it much simple and robust in the recent versions by just
>scanning through the cache. With the help of look-aside cache, the performance
>remains comparable with the tricky one. Sorry, the oprofile log was overwrote.
>But if you do need some numbers about the cache, I'll make one.
>
>

But it isn't *really* constant time lookups? I mean you'll
always have the O(logn) lookup. Amortised I guess that
becomes insignificant?

Briefly: is there a reason why you couldn't use gang lookups
instead? (Sorry I haven't been able to read and understand your
actual readahead code).

>>Do you think you could provide a simple 'use case' for an overview
>>of how you use the cache and what calls to make?
>>
>Ok, here it is:
>
> void func() {
>+ struct radix_tree_cache cache;
>+
>+ radix_tree_cache_init(&cache);
> read_lock_irq(&mapping->tree_lock);
> for(;;) {
>- page = radix_tree_lookup(&mapping->page_tree, index);
>+ page = radix_tree_cache_lookup(&mapping->page_tree, &cache, index);
> }
> read_unlock_irq(&mapping->tree_lock);
> }
>
>

OK, I guess that seems reasonable.
You have introduced some other APIs as well...

Profile numbers would be great for the cached / non-cached cases.


Send instant messages to your online friends http://au.messenger.yahoo.com

2005-11-10 08:30:19

by Wu Fengguang

[permalink] [raw]
Subject: Re: [PATCH 04/16] radix-tree: look-aside cache

On Thu, Nov 10, 2005 at 05:50:09PM +1100, Nick Piggin wrote:
> But it isn't *really* constant time lookups? I mean you'll
> always have the O(logn) lookup. Amortised I guess that
> becomes insignificant?

For sequential scan, I get 64*O(1) + 1*O(logn), that's pretty much gain.

> Briefly: is there a reason why you couldn't use gang lookups
> instead? (Sorry I haven't been able to read and understand your
> actual readahead code).

Because they have different semantics, one cannot replace the another.

> Profile numbers would be great for the cached / non-cached cases.

Ok, I'll do it, but at some time later. Currently I have several tasks that
need immediate handling, and that will take about a week. See you then :)

Regards,
Wu

2005-11-18 11:23:26

by Wu Fengguang

[permalink] [raw]
Subject: Re: [PATCH 04/16] radix-tree: look-aside cache

Hi Nick,

On Thu, Nov 10, 2005 at 05:50:09PM +1100, Nick Piggin wrote:
> Profile numbers would be great for the cached / non-cached cases.

Sorry for the delay!

I run two rounds of oprofile on the context based method, which is the user of
radix_tree_lookup_{head,tail}, which take advantage of the look-aside cache.
This is the diffprofile grep output(disable vs. enable cache):

radixtree-lookaside-cache.diffprofile1: -8 -53.3% radix_tree_lookup_head
radixtree-lookaside-cache.diffprofile1: -30 -7.8% radix_tree_lookup_node
radixtree-lookaside-cache.diffprofile1: -34 -18.9% radix_tree_insert

radixtree-lookaside-cache.diffprofile2: 16 10.9% radix_tree_insert
radixtree-lookaside-cache.diffprofile2: 12 18.8% radix_tree_preload
radixtree-lookaside-cache.diffprofile2: 6 42.9% radix_tree_lookup_tail
radixtree-lookaside-cache.diffprofile2: -7 -63.6% radix_tree_lookup_head
radixtree-lookaside-cache.diffprofile2: -23 -10.5% radix_tree_delete
radixtree-lookaside-cache.diffprofile2: -29 -6.9% radix_tree_lookup_node

Regards,
Wu

2005-11-18 12:12:19

by Wu Fengguang

[permalink] [raw]
Subject: Re: [PATCH 04/16] radix-tree: look-aside cache

On Fri, Nov 18, 2005 at 07:25:01PM +0800, Wu Fengguang wrote:
> I run two rounds of oprofile on the context based method, which is the user of
> radix_tree_lookup_{head,tail}, which take advantage of the look-aside cache.
> This is the diffprofile grep output(disable vs. enable cache):
>
> radixtree-lookaside-cache.diffprofile1: -8 -53.3% radix_tree_lookup_head
> radixtree-lookaside-cache.diffprofile1: -30 -7.8% radix_tree_lookup_node
> radixtree-lookaside-cache.diffprofile1: -34 -18.9% radix_tree_insert
>
> radixtree-lookaside-cache.diffprofile2: 16 10.9% radix_tree_insert
> radixtree-lookaside-cache.diffprofile2: 12 18.8% radix_tree_preload
> radixtree-lookaside-cache.diffprofile2: 6 42.9% radix_tree_lookup_tail
> radixtree-lookaside-cache.diffprofile2: -7 -63.6% radix_tree_lookup_head
> radixtree-lookaside-cache.diffprofile2: -23 -10.5% radix_tree_delete
> radixtree-lookaside-cache.diffprofile2: -29 -6.9% radix_tree_lookup_node

The above profile data are gathered on comparing two big files:

# du /temp/kernel/bigfile*
626M /temp/kernel/bigfile
626M /temp/kernel/bigfile2

I run another real life test:

# grep -r 'asdfghjkl;' /backup/test/linux-2.6.14-rc4-git4-orig/

radixtree-lookaside-cache.diffprofile3: 16 43.2% radix_tree_tag_clear
radixtree-lookaside-cache.diffprofile3: 8 24.2% radix_tree_tag_set
radixtree-lookaside-cache.diffprofile3: -9 -5.8% radix_tree_lookup_node

radixtree-lookaside-cache.diffprofile4: 7 4.5% radix_tree_lookup_node
radixtree-lookaside-cache.diffprofile4: -8 -11.8% radix_tree_tag_clear

As expected, there's no noticable difference for small files.

Regards,
Wu