2003-01-10 09:43:49

by Daniel Ritz

[permalink] [raw]
Subject: [PATCH 2.5] speedup kallsyms_lookup

[please cc...you know why]

a patch to speed up the kallsyms_lookup() function while still doing
compression.
- make 4 sections: addresses, lens, stem, strings
- only strncpy() when needed
- no strlen() at all (only in the script)
- save space we lose for len table by not making strings zero terminated

not yet included is hugh's patch to sort the symbols by name, i didn't
look at it yet. but at least it should not be off-by-one...i din't test
it that much, but it looks quiet ok..

not yet sent to linus. comments first. against 2.5.55.


beep
-daniel



--- 2555-clean/scripts/kallsyms.c 2003-01-09 05:04:27.000000000 +0100
+++ 2555/scripts/kallsyms.c 2003-01-10 10:16:41.000000000 +0100
@@ -26,7 +26,8 @@
static void
usage(void)
{
- fprintf(stderr, "Usage: kallsyms < in.map > out.S\n");
+ fprintf(stderr, "Usage: kallsyms out.S < in.map\n");
+ fprintf(stderr, " Generates out.S.[123], cat them together.\n");
exit(1);
}

@@ -89,26 +90,42 @@
}

static void
-write_src(void)
+write_src(char *name)
{
unsigned long long last_addr;
int i, valid = 0;
char *prev;

- printf("#include <asm/types.h>\n");
- printf("#if BITS_PER_LONG == 64\n");
- printf("#define PTR .quad\n");
- printf("#define ALGN .align 8\n");
- printf("#else\n");
- printf("#define PTR .long\n");
- printf("#define ALGN .align 4\n");
- printf("#endif\n");
+ char *tname;
+ int len;
+ FILE *f1, *f2, *f3;

- printf(".data\n");
+ len = strlen(name);
+ tname = malloc(len + 3);
+ strcpy(tname, name);

- printf(".globl kallsyms_addresses\n");
- printf("\tALGN\n");
- printf("kallsyms_addresses:\n");
+ strcpy(tname + len, ".1");
+ f1 = fopen(tname, "w");
+ strcpy(tname + len, ".2");
+ f2 = fopen(tname, "w");
+ strcpy(tname + len, ".3");
+ f3 = fopen(tname, "w");
+ free(tname);
+
+ fprintf(f1, "#include <asm/types.h>\n");
+ fprintf(f1, "#if BITS_PER_LONG == 64\n");
+ fprintf(f1, "#define PTR .quad\n");
+ fprintf(f1, "#define ALGN .align 8\n");
+ fprintf(f1, "#else\n");
+ fprintf(f1, "#define PTR .long\n");
+ fprintf(f1, "#define ALGN .align 4\n");
+ fprintf(f1, "#endif\n");
+
+ fprintf(f1, ".data\n");
+
+ fprintf(f1, ".globl kallsyms_addresses\n");
+ fprintf(f1, "\tALGN\n");
+ fprintf(f1, "kallsyms_addresses:\n");
for (i = 0, last_addr = 0; i < cnt; i++) {
if (!symbol_valid(&table[i]))
continue;
@@ -116,21 +133,29 @@
if (table[i].addr == last_addr)
continue;

- printf("\tPTR\t%#llx\n", table[i].addr);
+ fprintf(f1, "\tPTR\t%#llx\n", table[i].addr);
valid++;
last_addr = table[i].addr;
}
- printf("\n");
+ fprintf(f1, "\n");

- printf(".globl kallsyms_num_syms\n");
- printf("\tALGN\n");
- printf("kallsyms_num_syms:\n");
- printf("\tPTR\t%d\n", valid);
- printf("\n");
+ fprintf(f1, ".globl kallsyms_num_syms\n");
+ fprintf(f1, "\tALGN\n");
+ fprintf(f1, "kallsyms_num_syms:\n");
+ fprintf(f1, "\tPTR\t%d\n", valid);
+ fprintf(f1, "\n");

- printf(".globl kallsyms_names\n");
- printf("\tALGN\n");
- printf("kallsyms_names:\n");
+ fprintf(f1, ".globl kallsyms_lens\n");
+ fprintf(f1, "\tALGN\n");
+ fprintf(f1, "kallsyms_lens:\n");
+
+ fprintf(f2, ".globl kallsyms_stem\n");
+ fprintf(f2, "\tALGN\n");
+ fprintf(f2, "kallsyms_stem:\n");
+
+ fprintf(f3, ".globl kallsyms_names\n");
+ fprintf(f3, "\tALGN\n");
+ fprintf(f3, "kallsyms_names:\n");
prev = "";
for (i = 0, last_addr = 0; i < cnt; i++) {
int k;
@@ -144,21 +169,29 @@
for (k = 0; table[i].sym[k] && table[i].sym[k] == prev[k]; ++k)
;

- printf("\t.byte 0x%02x ; .asciz\t\"%s\"\n", k, table[i].sym + k);
+ fprintf(f1, "\t.byte 0x%02x\n", strlen(table[i].sym) - k);
+ fprintf(f2, "\t.byte 0x%02x\n", k);
+ fprintf(f3, "\t.ascii\t\"%s\"\n", table[i].sym + k);
last_addr = table[i].addr;
prev = table[i].sym;
}
- printf("\n");
+ fprintf(f1, "\n");
+ fprintf(f2, "\n");
+ fprintf(f3, "\n");
+
+ fclose(f1);
+ fclose(f2);
+ fclose(f3);
}

int
main(int argc, char **argv)
{
- if (argc != 1)
+ if (argc != 2)
usage();

read_map(stdin);
- write_src();
+ write_src(argv[1]);

return 0;
}
--- 2555-clean/Makefile 2003-01-09 05:03:54.000000000 +0100
+++ 2555/Makefile 2003-01-10 00:14:21.000000000 +0100
@@ -355,7 +355,7 @@
kallsyms.o := .tmp_kallsyms2.o

quiet_cmd_kallsyms = KSYM $@
-cmd_kallsyms = $(NM) -n $< | scripts/kallsyms > $@
+cmd_kallsyms = $(NM) -n $< | scripts/kallsyms $@; cat [email protected] [email protected] [email protected] > $@

.tmp_kallsyms1.o .tmp_kallsyms2.o: %.o: %.S scripts FORCE
$(call if_changed_dep,as_o_S)
--- 2555-clean/kernel/kallsyms.c 2003-01-09 05:04:26.000000000 +0100
+++ 2555/kernel/kallsyms.c 2003-01-10 01:32:00.000000000 +0100
@@ -14,6 +14,8 @@
/* These will be re-linked against their real values during the second link stage */
extern unsigned long kallsyms_addresses[1] __attribute__((weak, alias("kallsyms_dummy")));
extern unsigned long kallsyms_num_syms __attribute__((weak, alias("kallsyms_dummy")));
+extern char kallsyms_lens[1] __attribute__((weak, alias("kallsyms_dummy")));
+extern char kallsyms_stem[1] __attribute__((weak, alias("kallsyms_dummy")));
extern char kallsyms_names[1] __attribute__((weak, alias("kallsyms_dummy")));

/* Defined by the linker script. */
@@ -26,6 +28,7 @@
char **modname, char *namebuf)
{
unsigned long i, best = 0;
+ unsigned long sym_offs = 0, stem = 0, stem_tmp, len;

/* This kernel should never had been booted. */
if ((void *)kallsyms_addresses == &kallsyms_dummy)
@@ -45,13 +48,31 @@
best = i;
}

- /* Grab name */
- for (i = 0; i < best; i++) {
- unsigned prefix = *name++;
- strncpy(namebuf + prefix, name, 127 - prefix);
- name += strlen(name) + 1;
+ /* find name offset */
+ for (i = 0; i < best; i++)
+ sym_offs += (unsigned long) kallsyms_lens[i];
+
+ /* go for the end of the name */
+ name = kallsyms_names + sym_offs;
+ stem = (unsigned long) kallsyms_stem[best];
+ len = (unsigned long) kallsyms_lens[best];
+ strncpy(namebuf + stem, name, len);
+ namebuf[stem + len] = '\0';
+
+ /* go back, find the beginning */
+ if (stem) {
+ i = best;
+ do {
+ i--;
+ name -= kallsyms_lens[i];
+ stem_tmp = (unsigned long) kallsyms_stem[i];
+
+ if (stem_tmp < stem)
+ strncpy(namebuf + stem_tmp, name, stem - stem_tmp);
+ } while (stem_tmp);
}

+
/* Base symbol size on next symbol. */
if (best + 1 < kallsyms_num_syms)
symbol_end = kallsyms_addresses[best + 1];






2003-01-10 15:19:19

by Hugh Dickins

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On 10 Jan 2003, Daniel Ritz wrote:
> [please cc...you know why]
>
> a patch to speed up the kallsyms_lookup() function while still doing
> compression.
> - make 4 sections: addresses, lens, stem, strings
> - only strncpy() when needed
> - no strlen() at all (only in the script)
> - save space we lose for len table by not making strings zero terminated

First impression is that it has good ideas, but seems inelegant
(always easy to make that judgment on others' code! ignore me)
and misses the main point.

In earlier mail, Andi highlighted the performance criticality of top
reading /proc/<pid>/wchan. I think we have to decide which way to
jump on that: either withdraw that functionality as too expensive,
and minimize the table size and code stupidity (all those strncpy's of
nearly 127! include/asm-i386/string.h strncpy seems in the wrong there);
or speed kallsyms_lookup as much as possible (binary chop or whatever
algorithm to locate symbol from address). The current linear search
through 6000(?) addresses is not nice, but of course the strncpy is
making it much worse.

I didn't reply to that part of Andi's mail, not because I thought it
irrelevant, quite the reverse; but because I didn't have an opinion
which way to go, and hoped someone else would chime in. I don't
see how to proceed without deciding that. CC'd rml since I believe
he fathered /proc/<pid>/wchan. Now, I'm inclined to say that anyone
worried about memory occupancy just shouldn't switch CONFIG_KALLSYMS
on, so it's speed we should aim for here.

If maximizing speed, then obviously the values should be sorted by
value (as now, unlike in my patch), and maybe we forget all about
stem compression? If minimizing memory, then a combination of your
patch and mine?

I hope I can leave this discussion to others: I just wanted to get
my symbols printing out right, and noticed the current stem compression
unnecessarily weak there; but I'm no expert on suitable algorithms.

Hugh

2003-01-10 15:54:57

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, Jan 10, 2003 at 03:29:19PM +0000, Hugh Dickins wrote:
> I hope I can leave this discussion to others: I just wanted to get
> my symbols printing out right, and noticed the current stem compression
> unnecessarily weak there; but I'm no expert on suitable algorithms.

I can help some here but probably no more than you (in fact, you've
spotted far more [> 0] issues with the current code than I).


Basically, if you want fast string lookup of compressed stuff I can sit
down with whiteboard etc. and fiddle around, but it sounds like from f's
comments that this isn't really wanted.

So the end-result of the discussion is, "What should really happen here?"
and "What, if anything, do you want me to do?"


Bill

2003-01-10 16:03:30

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup


> So the end-result of the discussion is, "What should really happen here?"
> and "What, if anything, do you want me to do?"

IMHO best would be to get rid of /proc/*/wchan and keep the kallsyms
lookup slow, simple and stupid.

-Andi

2003-01-10 16:06:39

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

At some point in the past, I wrote:
>> So the end-result of the discussion is, "What should really happen here?"
>> and "What, if anything, do you want me to do?"

On Fri, Jan 10, 2003 at 05:12:12PM +0100, Andi Kleen wrote:
> IMHO best would be to get rid of /proc/*/wchan and keep the kallsyms
> lookup slow, simple and stupid.

Slow, simple, and stupid == "wli, get the Hell out". I'm gone.


Thanks,
Bill

2003-01-10 16:24:03

by Hugh Dickins

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, 10 Jan 2003, William Lee Irwin III wrote:
> At some point in the past, I wrote:
> >> So the end-result of the discussion is, "What should really happen here?"
> >> and "What, if anything, do you want me to do?"
>
> On Fri, Jan 10, 2003 at 05:12:12PM +0100, Andi Kleen wrote:
> > IMHO best would be to get rid of /proc/*/wchan and keep the kallsyms
> > lookup slow, simple and stupid.
>
> Slow, simple, and stupid == "wli, get the Hell out". I'm gone.

Indeed! I think that was Andi volunteering :-}
But we should let rml defend his wchan.

Hugh

2003-01-10 16:27:52

by Zwane Mwaikambo

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, 10 Jan 2003, William Lee Irwin III wrote:

> At some point in the past, I wrote:
> >> So the end-result of the discussion is, "What should really happen here?"
> >> and "What, if anything, do you want me to do?"
>
> On Fri, Jan 10, 2003 at 05:12:12PM +0100, Andi Kleen wrote:
> > IMHO best would be to get rid of /proc/*/wchan and keep the kallsyms
> > lookup slow, simple and stupid.
>
> Slow, simple, and stupid == "wli, get the Hell out". I'm gone.

I don't really see a need for blazing fast lookup here, as long as it gets
done within a reasonable timeframe for common cases. Unless of course your
optimised version also saves space code wise.

Zwane
--
function.linuxpower.ca

2003-01-10 17:05:10

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, 10 Jan 2003 17:12:12 +0100, Andi Kleen <[email protected]> said:
>
> > So the end-result of the discussion is, "What should really happen here?"
> > and "What, if anything, do you want me to do?"
>
> IMHO best would be to get rid of /proc/*/wchan and keep the kallsyms
> lookup slow, simple and stupid.

And replace the current /proc/*/wchan functionality with what?
(Note that saying "read the System.map file from userspace" doesn't *quite*
work, as I may be running a kernel that doesn't have System.map installed
someplace easily findable. At the moment my /boot partition has 6 assorted
vmlinuz-\(*\) files, only 4 of which have System.map-\1 files matching).

--
Valdis Kletnieks
Computer Systems Senior Engineer
Virginia Tech


Attachments:
(No filename) (226.00 B)

2003-01-10 17:06:44

by Robert Love

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, 2003-01-10 at 11:34, Hugh Dickins wrote:

> Indeed! I think that was Andi volunteering :-}
> But we should let rml defend his wchan.

Well, of course I want to keep it - but I am biased :)

I think its a simple export that gives us a neat feature. Additionally,
from the procps perspective, it saves us from having to parse System.map
for each process. In fact, it means we do not need a System.map at all
for any procps functionality.

I guess Linus at least mildly liked it too, since he merged it.

But if its such a performance crippling item perhaps it does need to be
removed (or somehow restricted).

I do agree that, if possible, wchan should be kept simple... so, is
everyone else for the removal of /proc/pid/wchan ? :-(

Robert Love

2003-01-10 17:11:07

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, Jan 10, 2003 at 12:13:48PM -0500, [email protected] wrote:
> On Fri, 10 Jan 2003 17:12:12 +0100, Andi Kleen <[email protected]> said:
> >
> > > So the end-result of the discussion is, "What should really happen here?"
> > > and "What, if anything, do you want me to do?"
> >
> > IMHO best would be to get rid of /proc/*/wchan and keep the kallsyms
> > lookup slow, simple and stupid.
>
> And replace the current /proc/*/wchan functionality with what?

Ctrl-Rollen (or whatever the key is called on your keyboard) on the console,
like in all previous linux releases.

Note /proc/*/wchan is not in 2.4.

Also you still have WCHAN in ps, just not a full backtrace.

-Andi

2003-01-10 18:58:31

by Randy.Dunlap

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, 10 Jan 2003, Andi Kleen wrote:

| On Fri, Jan 10, 2003 at 12:13:48PM -0500, [email protected] wrote:
| > On Fri, 10 Jan 2003 17:12:12 +0100, Andi Kleen <[email protected]> said:
| > >
| > > > So the end-result of the discussion is, "What should really happen here?"
| > > > and "What, if anything, do you want me to do?"
| > >
| > > IMHO best would be to get rid of /proc/*/wchan and keep the kallsyms
| > > lookup slow, simple and stupid.
| >
| > And replace the current /proc/*/wchan functionality with what?
|
| Ctrl-Rollen (or whatever the key is called on your keyboard) on the console,
| like in all previous linux releases.

Ctrl-ScrollLock on mine if I'm following you correctly.
Same as Sysrq-P if sysrq is enabled, except that the former
is always available.

Are there other similar functions that are available without
Sysrq enabled? If so, where can I find info about them?
(other than drivers/char/keyboard.c :)

| Note /proc/*/wchan is not in 2.4.
|
| Also you still have WCHAN in ps, just not a full backtrace.

--
~Randy

2003-01-10 19:34:39

by Daniel Ritz

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

hmm...i don't see why i want wchan. tell me.
but you're right, for some reason linus merged it. so i think (if it's
kept) we should restrict it a bit and make the kallsyms_lookup a bit
faster. not a super fast complex algorithm, but at least not that
braindead as it currently is...

btw. does my ugly early-in-the-moring-hack work right?

On Fri, 2003-01-10 at 18:15, Robert Love wrote:
> On Fri, 2003-01-10 at 11:34, Hugh Dickins wrote:
>
> > Indeed! I think that was Andi volunteering :-}
> > But we should let rml defend his wchan.
>
> Well, of course I want to keep it - but I am biased :)
>
> I think its a simple export that gives us a neat feature. Additionally,
> from the procps perspective, it saves us from having to parse System.map
> for each process. In fact, it means we do not need a System.map at all
> for any procps functionality.
>
> I guess Linus at least mildly liked it too, since he merged it.
>
> But if its such a performance crippling item perhaps it does need to be
> removed (or somehow restricted).
>
> I do agree that, if possible, wchan should be kept simple... so, is
> everyone else for the removal of /proc/pid/wchan ? :-(
>
> Robert Love
>


2003-01-11 04:06:08

by Albert D. Cahalan

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup


>> a patch to speed up the kallsyms_lookup() function while
>> still doing compression.

First a few different ideas for the internals:

There won't be that many names actually getting used.
Hash with space for a few hundred, and gzip the rest.
You'll almost never take the hit of decompression.

Have the waitable things put addresses into a special
ELF section. During the build, process this data to
reduce the number of names that go into the table.

Addresses and string pointers have lots of redundancy.

WCHAN doesn't change if a process doesn't run. Cache the
number in the task_struct if it helps greatly.

> In earlier mail, Andi highlighted the performance criticality of top
> reading /proc/<pid>/wchan. I think we have to decide which way to
> jump on that: either withdraw that functionality as too expensive,
...
> or speed kallsyms_lookup as much as possible (binary chop or whatever
> algorithm to locate symbol from address). The current linear search
> through 6000(?) addresses is not nice, but of course the strncpy is
> making it much worse.

To fix the interface:

1. Add a full /proc/ksyms, containing all waitable locations.
2. Implement st_mtime for /proc/ksyms. (Really important!!!)
3. Add addresses to wchan files ("c013a0c0 do_select")

The above would eliminate a race condition that prevents
the /proc/*/wchan files from being cached and would allow
semi-traditional WCHAN processing (no System.map) as an option.


--- more details ---

Consider this:
ps -eo pid,nwchan,wchan

The /proc/*/stat file is needed for that, even on a 2.5.xx kernel.
Without it, nwchan (the number) would not be available.

User code would like to cache a nwchan-to-wchan mapping, so that
the wchan files wouldn't be needed so often. This can not be done
due to race conditions though. If the numbers were provided in the
/proc/*/wchan files, then a number-to-name mapping could be cached.

The /proc/*/wchan method has a high per-process cost, and a low
start-up cost. Thus it would be nice to have old-style WCHAN
available for long-running apps like top, gtop, ktop, and ncps.

For traditional WCHAN determination, /proc/ksyms needs to
contain the following:

a. the function names NOT found in System.map (from modules)
b. enough of the other function names to sanity-check

Alternately, it could contain all function names and an
indication that this is the case. This would eliminate the
error-prone search for a System.map file that seems to be
a decent match.

Two things for long-term consideration:

The problem of multiple WCHAN entries needs to be considered
for new-style threads and AIO.

I'm told that a simple "ls -lR /proc" will crash a NUMA box
with more than about 4000 tasks. Locks get held for a long
time, and then the NMI watchdog causes a reboot. In spite of
this, reading /proc/ksyms and /proc/kcore would work fine.
You know what I'm thinking. :-( For the really big systems,
this is the only path to take. So I'll be needing addresses
of various data structures as well. The LKCD patches would
be really helpful.



2003-01-11 04:19:34

by Andi Kleen

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

> Ctrl-ScrollLock on mine if I'm following you correctly.
> Same as Sysrq-P if sysrq is enabled, except that the former
> is always available.

Try Ctrl-,Alt/AltGr-,Shift- with the same key.
>
> Are there other similar functions that are available without
> Sysrq enabled? If so, where can I find info about them?

Quite a lot.

In your keymap. e.g. on a SuSE system.

getkeycodes will output a map of the key codes.

% less /usr/share/kbd/keymaps/i386/include/linux-keys-bare.inc

alt keycode 59 = Console_1
alt keycode 60 = Console_2
...
control alt keycode 59 = Console_1
control alt keycode 60 = Console_2
...
alt keycode 71 = Ascii_7
alt keycode 72 = Ascii_8
...
alt keycode 103 = KeyboardSignal
alt keycode 105 = Decr_Console
alt keycode 106 = Incr_Console
...
shift keycode 104 = Scroll_Backward
shift keycode 109 = Scroll_Forward
control alt keycode 111 = Boot
...
keycode 84 = Last_Console # Alt+SysRq/PrintScrn
#keycode 99 = Control_backslash # SysRq/PrintScrn
keycode 99 = VoidSymbol
control keycode 99 = Control_backslash

plain keycode 70 = Scroll_Lock
shift keycode 70 = Show_Memory
control keycode 70 = Show_State
alt keycode 70 = Show_Registers
keycode 101 = Break # Ctrl+Break/Pause
control keycode 101 = Control_c
shift keycode 101 = Control_backslash
keycode 119 = Pause # Break/Pause




-Andi

2003-01-11 05:25:15

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [PATCH 2.5] speedup kallsyms_lookup

On Fri, Jan 10, 2003 at 11:14:47PM -0500, Albert D. Cahalan wrote:
> I'm told that a simple "ls -lR /proc" will crash a NUMA box
> with more than about 4000 tasks. Locks get held for a long
> time, and then the NMI watchdog causes a reboot. In spite of
> this, reading /proc/ksyms and /proc/kcore would work fine.
> You know what I'm thinking. :-( For the really big systems,
> this is the only path to take. So I'll be needing addresses
> of various data structures as well. The LKCD patches would
> be really helpful.

It's filed in the bugzilla, too, but I think it's a separate issue.
Basically proc_pid_readdir() has quadratic behavior. A rank-ordered
tree representation of the tasklist would be useful in order to restart
the walks in O(lg(n)) time. I haven't bothered starting since it looks
like too much code to merge into 2.5. Notable is that it actually does
drop the lock between rounds of linear-time iteration; there is a NUMA
badness lurking here with cachelines getting "trapped" on a node and
the offending /proc/ scanner reacquiring the lock unfairly. The longer-
lived iterations should actually lose the cacheline, so there is still
a small element of mystery.

There was some rumor radix trees could be used for this but I've yet to
see an O(lg(n)) or better seek operation (go to the nth present item in
the tree ordering) that's obvious how to do from the structure. From all
appearances the thing has to get scanned starting from 0 for a best-case
n/64 == O(n), worst-case n, restart of the list walk, where bottom-level
counts allow us to avoid looking at individual items, but it's only a
linear speedup, so the whole shebang is still quadratic, and the thing
will explode again just by multiplying the minimum number of tasks by
the factor of the average speedup (if it's actually a speedup and not a
pessimization). Maintaining rank in radix trees is actually somewhat
expensive, so it's probably not a great idea.

Also, scanning the bitmap doesn't help: only full processes are listed
in /proc/, not threads, and the check actually makes the bitmap scan
more expensive than the list walk, as you have to check the hashtables,
which by the time it matters will be loaded. I've actually tried this,
as it's relatively simple, and was disappointed. =( It's also are stuck
with the O(n) seek operation here, but the seek itself is slightly
better as it touches (far) fewer cachelines. The patch I posted on the
subject ages ago actually did the seeking incorrectly.


Bill