LinuxLists.cc - [PATCH] I3 sched tweaks...

2002-01-16 05:49:15

Subject: [PATCH] I3 sched tweaks...

This one actually compiles(!!):

1) Comment about RT event id removed (no longer relevent).
2) Order by address, delete rq_cpu().
3) lock_task_rq returns the rq, rather than assigning it, for clarity.
4) scheduler_tick needs no args (p is always equal to current).
5) Unused max_rq_len() function deleted.

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.3-pre1-mingoI3/include/linux/sched.h working-2.5.3-pre1-mingoI3-fix/include/linux/sched.h
--- linux-2.5.3-pre1-mingoI3/include/linux/sched.h Wed Jan 16 15:58:58 2002
+++ working-2.5.3-pre1-mingoI3-fix/include/linux/sched.h Wed Jan 16 16:30:54 2002
@@ -146,7 +146,7 @@
extern void update_process_times(int user);
extern void update_one_process(task_t *p, unsigned long user,
unsigned long system, int cpu);
-extern void scheduler_tick(task_t *p);
+extern void scheduler_tick(void);

#define MAX_SCHEDULE_TIMEOUT LONG_MAX
extern signed long FASTCALL(schedule_timeout(signed long timeout));
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.3-pre1-mingoI3/kernel/fork.c working-2.5.3-pre1-mingoI3-fix/kernel/fork.c
--- linux-2.5.3-pre1-mingoI3/kernel/fork.c Wed Jan 16 15:17:14 2002
+++ working-2.5.3-pre1-mingoI3-fix/kernel/fork.c Wed Jan 16 16:45:21 2002
@@ -704,7 +704,7 @@
* runqueue lock is not a problem.
*/
current->time_slice = 1;
- scheduler_tick(current);
+ scheduler_tick();
}
p->sleep_timestamp = jiffies;
__restore_flags(flags);
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.3-pre1-mingoI3/kernel/sched.c working-2.5.3-pre1-mingoI3-fix/kernel/sched.c
--- linux-2.5.3-pre1-mingoI3/kernel/sched.c Wed Jan 16 15:17:14 2002
+++ working-2.5.3-pre1-mingoI3-fix/kernel/sched.c Wed Jan 16 16:10:57 2002
@@ -37,11 +37,7 @@
*
* Locking rule: those places that want to lock multiple runqueues
* (such as the load balancing or the process migration code), lock
- * acquire operations must be ordered by the runqueue's cpu id.
- *
- * The RT event id is used to avoid calling into the the RT scheduler
- * if there is a RT task active in an SMP system but there is no
- * RT scheduling activity otherwise.
+ * acquire operations must be ordered by ascending &runqueue.
*/
struct runqueue {
spinlock_t lock;
@@ -57,22 +53,24 @@
#define this_rq() cpu_rq(smp_processor_id())
#define task_rq(p) cpu_rq((p)->cpu)
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
-#define rq_cpu(rq) ((rq) - runqueues)
#define rt_task(p) ((p)->policy != SCHED_OTHER)

-
-#define lock_task_rq(rq,p,flags) \
-do { \
+#define lock_task_rq(p,flags) \
+({ \
+ __label__ repeat_lock_task; \
+ struct runqueue *__rq; \
+ \
repeat_lock_task: \
- rq = task_rq(p); \
- spin_lock_irqsave(&rq->lock, flags); \
- if (unlikely(rq_cpu(rq) != (p)->cpu)) { \
- spin_unlock_irqrestore(&rq->lock, flags); \
+ __rq = task_rq(p); \
+ spin_lock_irqsave(&__rq->lock, (flags)); \
+ if (unlikely(__rq != task_rq(p))) { \
+ spin_unlock_irqrestore(&__rq->lock, flags); \
goto repeat_lock_task; \
} \
-} while (0)
+ __rq; \
+})

-#define unlock_task_rq(rq,p,flags) \
+#define unlock_task_rq(rq,p,flags) \
spin_unlock_irqrestore(&rq->lock, flags)

/*
@@ -185,7 +183,7 @@
cpu_relax();
barrier();
}
- lock_task_rq(rq, p, flags);
+ rq = lock_task_rq(p, flags);
if (unlikely(rq->curr == p)) {
unlock_task_rq(rq, p, flags);
goto repeat;
@@ -223,7 +221,7 @@
int success = 0;
runqueue_t *rq;

- lock_task_rq(rq, p, flags);
+ rq = lock_task_rq(p, flags);
p->state = TASK_RUNNING;
if (!p->array) {
activate_task(p, rq);
@@ -312,18 +310,6 @@
return sum;
}

-static inline unsigned long max_rq_len(void)
-{
- unsigned long i, curr, max = 0;
-
- for (i = 0; i < smp_num_cpus; i++) {
- curr = cpu_rq(i)->nr_running;
- if (curr > max)
- max = curr;
- }
- return max;
-}
-
/*
* Current runqueue is empty, or rebalance tick: if there is an
* inbalance (current runqueue is too short) then pull from
@@ -406,7 +392,7 @@
* Ok, lets do some actual balancing:
*/

- if (rq_cpu(busiest) < this_cpu) {
+ if (busiest < this_rq) {
spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock);
spin_lock(&this_rq->lock);
@@ -504,10 +490,11 @@
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
*/
-void scheduler_tick(task_t *p)
+void scheduler_tick(void)
{
unsigned long now = jiffies;
runqueue_t *rq = this_rq();
+ task_t *p = current;

if (p == rq->idle || !rq->idle)
return idle_tick();
@@ -839,7 +826,7 @@
* We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU.
*/
- lock_task_rq(rq, p, flags);
+ rq = lock_task_rq(p, flags);
if (rt_task(p)) {
p->__nice = nice;
goto out_unlock;
@@ -853,7 +840,7 @@
if (array) {
enqueue_task(p, array);
/*
- * If the task is runnable and lowered its priority,
+ * If the task is running and lowered its priority,
* or increased its priority then reschedule its CPU:
*/
if ((nice < p->__nice) ||
@@ -938,7 +925,7 @@
* To be able to change p->policy safely, the apropriate
* runqueue lock must be held.
*/
- lock_task_rq(rq,p,flags);
+ rq = lock_task_rq(p, flags);

if (policy < 0)
policy = p->policy;
@@ -1231,7 +1218,7 @@
if (rq1 == rq2)
spin_lock(&rq1->lock);
else {
- if (rq_cpu(rq1) < rq_cpu(rq2)) {
+ if (rq1 < rq2) {
spin_lock(&rq1->lock);
spin_lock(&rq2->lock);
} else {
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.3-pre1-mingoI3/kernel/timer.c working-2.5.3-pre1-mingoI3-fix/kernel/timer.c
--- linux-2.5.3-pre1-mingoI3/kernel/timer.c Wed Jan 16 15:17:14 2002
+++ working-2.5.3-pre1-mingoI3-fix/kernel/timer.c Wed Jan 16 15:55:13 2002
@@ -594,7 +594,7 @@
if (local_bh_count(cpu) || local_irq_count(cpu) > 1)
kstat.per_cpu_system[cpu] += system;
}
- scheduler_tick(p);
+ scheduler_tick();
}

/*

--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-01-16 09:20:20

by Ingo Molnar

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

> 3) lock_task_rq returns the rq, rather than assigning it, for clarity.

i've made it an inline function instead of a macro.

Ingo

2002-01-16 09:17:28

by Ingo Molnar

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

On Wed, 16 Jan 2002, Rusty Russell wrote:

> 1) Comment about RT event id removed (no longer relevent).
> 2) Order by address, delete rq_cpu().
> 3) lock_task_rq returns the rq, rather than assigning it, for clarity.

ok.

> 4) scheduler_tick needs no args (p is always equal to current).

i have not taken this part. We have 'current' calculated in
update_process_times(), so why not pass it along to the scheduler_tick()
function?

> 5) Unused max_rq_len() function deleted.

ok.

Ingo

2002-01-16 10:35:00

by Rusty Russell

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

In message <Pine.LNX.4.33.0201160412180.24929-100000@devserv.devel.redhat.com>
you write:
> > 4) scheduler_tick needs no args (p is always equal to current).
>
> i have not taken this part. We have 'current' calculated in
> update_process_times(), so why not pass it along to the scheduler_tick()
> function?

Because it's redundant. It's *always* p == current (and the code
assumes this!), but I had to grep the callers to find out.

Moreover, the function doesn't make *sense* if p != current...

> > 3) lock_task_rq returns the rq, rather than assigning it, for clarity.
>
> i've made it an inline function instead of a macro.

I thought of that, but assumed you had a good reason for making it a
macro in the first place...

Thanks!
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-01-16 20:49:39

by Ingo Molnar

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

On Wed, 16 Jan 2002, Rusty Russell wrote:

> > > 4) scheduler_tick needs no args (p is always equal to current).
> >
> > i have not taken this part. We have 'current' calculated in
> > update_process_times(), so why not pass it along to the scheduler_tick()
> > function?
>
> Because it's redundant. It's *always* p == current (and the code
> assumes this!), but I had to grep the callers to find out.

we pass pointers across functions regularly, even if the pointer could be
calculated within the function. We do this in the timer code too. It's
slightly cheaper to pass an already existing (calculated) 'current'
pointer over to another function, instead of calculating it once more in
that function. This will be especially true once we make 'current' a tiny
bit more expensive (Alan's kernel stack coloring rewrite will do that i
think, it will be one more instruction to get 'current'.)

> Moreover, the function doesn't make *sense* if p != current...

yes - would it be perhaps cleaner then to name the variable 'this_task' or
something like that?

> > > 3) lock_task_rq returns the rq, rather than assigning it, for clarity.
> >
> > i've made it an inline function instead of a macro.
>
> I thought of that, but assumed you had a good reason for making it a
> macro in the first place...

no good reason, the macro started out being simple, but then grew in size
significantly, as the sophistication and correctness of the O(1) scheduler
improved ;-)

Ingo

2002-01-16 21:07:49

by Robert Love

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

On Wed, 2002-01-16 at 17:46, Ingo Molnar wrote:

> we pass pointers across functions regularly, even if the pointer could be
> calculated within the function. We do this in the timer code too. It's
> slightly cheaper to pass an already existing (calculated) 'current'
> pointer over to another function, instead of calculating it once more in
> that function. This will be especially true once we make 'current' a tiny
> bit more expensive (Alan's kernel stack coloring rewrite will do that i
> think, it will be one more instruction to get 'current'.)

Maybe we should benchmark it? It is very easy to calculate current.

Certainly I see the benefit if we start coloring the pointer (it adds 2
instructions I believe) but let's make sure it is worth passing another
32-bit argument. It could very well be, schedule_tick is called
enough...

> > Moreover, the function doesn't make *sense* if p != current...
>
> yes - would it be perhaps cleaner then to name the variable 'this_task' or
> something like that?

Yes, good idea.

Robert Love

2002-01-16 21:21:49

by Justin Carlson

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

On Wed, 2002-01-16 at 16:10, Robert Love wrote:
> On Wed, 2002-01-16 at 17:46, Ingo Molnar wrote:
>
> > we pass pointers across functions regularly, even if the pointer could be
> > calculated within the function. We do this in the timer code too. It's
> > slightly cheaper to pass an already existing (calculated) 'current'
> > pointer over to another function, instead of calculating it once more in
> > that function. This will be especially true once we make 'current' a tiny
> > bit more expensive (Alan's kernel stack coloring rewrite will do that i
> > think, it will be one more instruction to get 'current'.)
>
> Maybe we should benchmark it? It is very easy to calculate current.
>
> Certainly I see the benefit if we start coloring the pointer (it adds 2
> instructions I believe) but let's make sure it is worth passing another
> 32-bit argument. It could very well be, schedule_tick is called
> enough...

Don't forget that, in non-x86 land, current tends to be just kept in a
register. No computations required. Certainly passing it around on,
e.g. mips is a clear loss.

-Justin

Attachments:

(No filename) (232.00 B)

2002-01-16 21:26:01

by Robert Love

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

On Wed, 2002-01-16 at 16:19, Justin Carlson wrote:

> Don't forget that, in non-x86 land, current tends to be just kept in a
> register. No computations required. Certainly passing it around on,
> e.g. mips is a clear loss.

current is stored in a register (esp) in x86, too. This is why I
cautioned that looking up current was cheap -- I think every sane arch
stores current in some fast access way. That's why it is a macro -- it
is assembly code to quickly snag the address.

So is passing current still worth it?

Robert Love

2002-01-16 21:35:03

by Ingo Molnar

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

On 16 Jan 2002, Robert Love wrote:

> current is stored in a register (esp) in x86, too. [...]

it's stored in a register, but then it also needs to do some more work to
get at the true pointer. The typical way to load 'current' on x86 into
%eax:

b8 00 e0 ff ff mov $0xffffe000,%eax
21 e0 and %esp,%eax

7 bytes icache footprint.

while the cost of moving an already calculated 'current' pointer to the
stack is:

50 push %eax

1 byte instruction.

and in addition, consider that for larger functions, the 'current' value
will be moved to a gcc spill register anyway, it will end up looking like
this:

b8 00 e0 ff ff mov $0xffffe000,%edi
21 e0 and %esp,%edi
89 7c 24 14 mov %edi,0x14(%esp,1)

while in the function-call variant the pointer is on the stack already, so
accessing it is easy:

89 7c 24 14 mov 0x10(%esp,1), %eax

so on x86, considering the sched_tick() case, it's slightly faster to pass
the argument. But there isnt any big difference.

but add an instruction or two to the 'current' calculation method, and the
icache footprint and actual overhead will be more clearly in favor of
function calls.

i agree that on architectures where 'current' is stored in a register it's
better to use 'current' explicitly.

Ingo

2002-01-16 21:46:39

by Paul Mackerras

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

Ingo Molnar writes:

> It's
> slightly cheaper to pass an already existing (calculated) 'current'
> pointer over to another function, instead of calculating it once more in
> that function.

On x86 that's true; many other architectures - alpha, ia64, m68k,
mips, mips64, parisc, ppc, ppc64, sparc, sparc64 - keep current in a
register already and thus it is slightly more expensive to pass it as
a parameter instead of just using current in the function.

Either way surely the cost is tiny, and the maintainability
considerations should prevail. Having a function which takes a
task_struct * parameter which _has_ to be current sounds to me like an
invitation for somebody to get it wrong down the track.

Paul.

2002-01-17 03:37:18

by Rusty Russell

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

In message <[email protected]> you
write:
>
> On Wed, 16 Jan 2002, Rusty Russell wrote:
>
> > > > 4) scheduler_tick needs no args (p is always equal to current).
> > >
> > > i have not taken this part. We have 'current' calculated in
> > > update_process_times(), so why not pass it along to the scheduler_tick()
> > > function?
> >
> > Because it's redundant. It's *always* p == current (and the code
> > assumes this!), but I had to grep the callers to find out.
>
> we pass pointers across functions regularly, even if the pointer could be
> calculated within the function. We do this in the timer code too.

Look at it semantically: scheduler_tick() is just a function called
regularly for scheduler maintenance. It might need the CPU number,
the runqueue length, or phase of the moon: the caller shouldn't care.

If it was a static fn, maybe this optimization makes sense. But it's
an interface wart, and the "optimization" is utterly marginal anyway.

That said, I never would have sent such a trivial patch by itself: I
can't believe how many keystrokes were wasted over this issue!

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-01-17 13:02:42

by Bill Davidsen

[permalink] [raw]

Subject: Re: [PATCH] I3 sched tweaks...

In article <1011216429.1083.95.camel@phantasy> you write:
| On Wed, 2002-01-16 at 16:19, Justin Carlson wrote:
|
| > Don't forget that, in non-x86 land, current tends to be just kept in a
| > register. No computations required. Certainly passing it around on,
| > e.g. mips is a clear loss.
|
| current is stored in a register (esp) in x86, too. This is why I
| cautioned that looking up current was cheap -- I think every sane arch
| stores current in some fast access way. That's why it is a macro -- it
| is assembly code to quickly snag the address.
|
| So is passing current still worth it?

It sounds as if it may be one of those "it depends" things, but if
current is not calculated the exact same way in all places... people
talk about coloring (or colouring in Canada) and several other things I
forget, it's sort of nice to be sure it's calculated just once, and that
if the calculation becomes more expensive in the future that the
tradeoff between passing and calculating won't change.

I guess passing is safer for the future and of a known cost.

--
bill davidsen <[email protected]>
CTO, TMR Associates, Inc
Doing interesting things with little computers since 1979.