2002-06-19 11:22:10

by Dave Jones

[permalink] [raw]
Subject: Linux 2.5.23-dj1

Keeping up to date, and getting some of the more important bits
out of the way from the pending folder.

As usual,..

Patch against 2.5.23 vanilla is available from:
ftp://ftp.kernel.org/pub/linux/kernel/people/davej/patches/2.5/

Merged patch archive: http://www.codemonkey.org.uk/patches/merged/

Check http://www.codemonkey.org.uk/Linux-2.5.html before reporting
known bugs that are also in mainline.

-- Davej.

2.5.23-dj1
o Small UP optimisation in the scheduler. (James Bottomley)
o Update x86 cpufreq scaling code. (Dominik Brodowski)
o Export ioremap_nocache() for modules. (Andi Kleen)
o Export default_wake_function() for modules. (Benjamin LaHaise)
o Compaq hotplug compile fixes. (Felipe Contreras)
o Fix migration thread for non linear numbered CPUs. (Ingo Molnar)
o Framebuffer updates. (James Simmons)
o Introduce CONFIG_ISA option for i386. (Andi Kleen)
o Fix bad locking in driver/ core. (Arnd Bergmann)

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs


2002-06-19 23:38:39

by Ingo Molnar

[permalink] [raw]
Subject: [patch] scheduler bits from 2.5.23-dj1


the scheduler optimisation in 2.5.23-dj1, from James Bottomley, looks fine
to me. I did some modifications:

- use another form: task_cpu(p) & set_task_cpu(p, cpu), which is shorter
for the common uses - and this way the optimisation also makes the
source more compact.

- put task_cpu() and set_task_cpu() into sched.h, since these are
task-level operations.

- there is no need for the #if CONFIG_SMP, gcc is good at optimizing away
a branch if it has a (0 != 0) condition on UP :-)

- (i did not carry over all the comment changes, only the speling ones.)

another change in 2.5.23-dj1 is the initialization of the pidhash in
sched_init(). It does not belong there - please create a new init function
within fork.c if needed. The pidhash init used to be in sched_init(), but
this doesnt make it right.

And i'm not quite sure whether it's needed to expose the pidhash to the
rest of the kernel - it would be much simpler to have it in kernel/fork.c
locally, and find_task_by_pid() would be a function instead of an inline.
(it has a ~49 bytes footprint on x86, it's rather heavy i think.)

my current scheduler patchset (against 2.5.23, tested) is attached.

Ingo

diff -Nru a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h Thu Jun 20 01:21:34 2002
+++ b/include/linux/sched.h Thu Jun 20 01:21:34 2002
@@ -863,6 +863,34 @@
clear_thread_flag(TIF_SIGPENDING);
}

+/*
+ * Wrappers for p->thread_info->cpu access. No-op on UP.
+ */
+#ifdef CONFIG_SMP
+
+static inline unsigned int task_cpu(struct task_struct *p)
+{
+ return p->thread_info->cpu;
+}
+
+static inline unsigned int set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+ p->thread_info->cpu = cpu;
+}
+
+#else
+
+static inline unsigned int task_cpu(struct task_struct *p)
+{
+ return 0;
+}
+
+static inline unsigned int set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
+#endif /* CONFIG_SMP */
+
#endif /* __KERNEL__ */

#endif
diff -Nru a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c Thu Jun 20 01:21:34 2002
+++ b/kernel/sched.c Thu Jun 20 01:21:34 2002
@@ -148,7 +148,7 @@

#define cpu_rq(cpu) (runqueues + (cpu))
#define this_rq() cpu_rq(smp_processor_id())
-#define task_rq(p) cpu_rq((p)->thread_info->cpu)
+#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define rt_task(p) ((p)->prio < MAX_RT_PRIO)

@@ -284,8 +284,8 @@
need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);

- if (!need_resched && !nrpolling && (p->thread_info->cpu != smp_processor_id()))
- smp_send_reschedule(p->thread_info->cpu);
+ if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
+ smp_send_reschedule(task_cpu(p));
preempt_enable();
#else
set_tsk_need_resched(p);
@@ -366,10 +366,10 @@
* currently. Do not violate hard affinity.
*/
if (unlikely(sync && (rq->curr != p) &&
- (p->thread_info->cpu != smp_processor_id()) &&
+ (task_cpu(p) != smp_processor_id()) &&
(p->cpus_allowed & (1UL << smp_processor_id())))) {

- p->thread_info->cpu = smp_processor_id();
+ set_task_cpu(p, smp_processor_id());
task_rq_unlock(rq, &flags);
goto repeat_lock_task;
}
@@ -409,7 +409,7 @@
p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
p->prio = effective_prio(p);
}
- p->thread_info->cpu = smp_processor_id();
+ set_task_cpu(p, smp_processor_id());
activate_task(p, rq);

rq_unlock(rq);
@@ -663,7 +663,7 @@
*/
dequeue_task(next, array);
busiest->nr_running--;
- next->thread_info->cpu = this_cpu;
+ set_task_cpu(next, this_cpu);
this_rq->nr_running++;
enqueue_task(next, this_rq->active);
if (next->prio < current->prio)
@@ -821,7 +821,7 @@
spin_lock_irq(&rq->lock);

/*
- * if entering off a kernel preemption go straight
+ * if entering off of a kernel preemption go straight
* to picking the next task.
*/
if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
@@ -906,7 +906,7 @@
schedule();
ti->preempt_count = 0;

- /* we can miss a preemption opportunity between schedule and now */
+ /* we could miss a preemption opportunity between schedule and now */
barrier();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
@@ -1630,7 +1630,7 @@

void __init init_idle(task_t *idle, int cpu)
{
- runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->thread_info->cpu);
+ runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
unsigned long flags;

__save_flags(flags);
@@ -1642,7 +1642,7 @@
idle->array = NULL;
idle->prio = MAX_PRIO;
idle->state = TASK_RUNNING;
- idle->thread_info->cpu = cpu;
+ set_task_cpu(idle, cpu);
double_rq_unlock(idle_rq, rq);
set_tsk_need_resched(idle);
__restore_flags(flags);
@@ -1751,7 +1751,7 @@
* Can the task run on the task's current CPU? If not then
* migrate the process off to a proper CPU.
*/
- if (new_mask & (1UL << p->thread_info->cpu)) {
+ if (new_mask & (1UL << task_cpu(p))) {
task_rq_unlock(rq, &flags);
goto out;
}
@@ -1760,7 +1760,7 @@
* it is sufficient to simply update the task's cpu field.
*/
if (!p->array && (p != rq->curr)) {
- p->thread_info->cpu = __ffs(p->cpus_allowed);
+ set_task_cpu(p, __ffs(p->cpus_allowed));
task_rq_unlock(rq, &flags);
goto out;
}
@@ -1775,6 +1775,8 @@
preempt_enable();
}

+static __initdata int master_migration_thread;
+
static int migration_thread(void * bind_cpu)
{
int cpu = (int) (long) bind_cpu;
@@ -1786,14 +1788,12 @@
sigfillset(&current->blocked);
set_fs(KERNEL_DS);

- /* FIXME: First CPU may not be zero, but this crap code
- vanishes with hotplug cpu patch anyway. --RR */
/*
- * The first migration thread is started on CPU #0. This one can
- * migrate the other migration threads to their destination CPUs.
+ * The first migration thread is started on the boot CPU, it
+ * migrates the other migration threads to their destination CPUs.
*/
- if (cpu != 0) {
- while (!cpu_rq(0)->migration_thread)
+ if (cpu != master_migration_thread) {
+ while (!cpu_rq(master_migration_thread)->migration_thread)
yield();
set_cpus_allowed(current, 1UL << cpu);
}
@@ -1829,18 +1829,18 @@
cpu_dest = __ffs(p->cpus_allowed);
rq_dest = cpu_rq(cpu_dest);
repeat:
- cpu_src = p->thread_info->cpu;
+ cpu_src = task_cpu(p);
rq_src = cpu_rq(cpu_src);

local_irq_save(flags);
double_rq_lock(rq_src, rq_dest);
- if (p->thread_info->cpu != cpu_src) {
+ if (task_cpu(p) != cpu_src) {
double_rq_unlock(rq_src, rq_dest);
local_irq_restore(flags);
goto repeat;
}
if (rq_src == rq) {
- p->thread_info->cpu = cpu_dest;
+ set_task_cpu(p, cpu_dest);
if (p->array) {
deactivate_task(p, rq_src);
activate_task(p, rq_dest);
@@ -1857,7 +1857,9 @@
{
int cpu;

- current->cpus_allowed = 1UL << 0;
+ master_migration_thread = smp_processor_id();
+ current->cpus_allowed = 1UL << master_migration_thread;
+
for (cpu = 0; cpu < NR_CPUS; cpu++) {
if (!cpu_online(cpu))
continue;

2002-06-19 23:47:30

by Dave Jones

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, Jun 20, 2002 at 01:35:35AM +0200, Ingo Molnar wrote:

Hi Ingo,

> the scheduler optimisation in 2.5.23-dj1, from James Bottomley, look fine
> to me. I did some modifications:

Thanks for taking time out to look them over..
The bulk of the UP optimisation was by Mikael Petterson, James just
fixed up something I goofed in an earlier patchset.


> - there is no need for the #if CONFIG_SMP, gcc is good at optimizing away
> a branch if it has a (0 != 0) condition on UP :-)

*nod*, I had intended to clean this up, but there's only so many hours
in a day.. 8-)

> another change in 2.5.23-dj1 is the initialization of the pidhash in
> sched_init(). It does not belong there - please create a new init function
> within fork.c if needed. The pidhash init used to be in sched_init(), but
> this doesnt make it right.

Agreed.

> And i'm not quite sure whether it's needed to expose the pidhash to the
> rest of the kernel - it would be much simpler to have it in kernel/fork.c
> locally, and find_task_by_pid() would be a function instead of an inline.
> (it has a ~49 bytes footprint on x86, it's rather heavy i think.)

I'll take a look at this tomorrow, unless William "no sleep `til 2.6" Irwin
beats me to it 8-) (he did this part of the patch iirc).

> my current scheduler patchset (against 2.5.23, tested) is attached.

Thanks again.

Dave.

--
| Dave Jones. http://www.codemonkey.org.uk
| SuSE Labs

2002-06-19 23:47:11

by Robert Love

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Wed, 2002-06-19 at 16:35, Ingo Molnar wrote:

> the scheduler optimisation in 2.5.23-dj1, from James Bottomley, look fine
> to me. I did some modifications:

Nice.

> +static inline unsigned int task_cpu(struct task_struct *p)
> +static inline unsigned int set_task_cpu(struct task_struct *p, unsigned int cpu)

Technically, shouldn't we make these `unsigned long' ?

I know on x86 we store cpu as a `u32' so it does not matter per se, but
in reality it is an unsigned long and other architectures may export it
as such.

Further, we compare and set it against the various CPU bitmaps and they
are all `unsigned long' and we do shifts against 1UL ...

Robert Love

2002-06-19 23:57:02

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, Jun 20, 2002 at 01:36:29AM +0200, Ingo Molnar wrote:
> another change in 2.5.23-dj1 is the initialization of the pidhash in
> sched_init(). It does not belong there - please create a new init function
> within fork.c if needed. The pidhash init used to be in sched_init(), but
> this doesnt make it right.

On its way shortly.


Cheers,
Bill

2002-06-20 00:07:01

by Andrea Arcangeli

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Wed, Jun 19, 2002 at 04:47:00PM -0700, Robert Love wrote:
> On Wed, 2002-06-19 at 16:35, Ingo Molnar wrote:
>
> > the scheduler optimisation in 2.5.23-dj1, from James Bottomley, look fine
> > to me. I did some modifications:
>
> Nice.
>
> > +static inline unsigned int task_cpu(struct task_struct *p)
> > +static inline unsigned int set_task_cpu(struct task_struct *p, unsigned int cpu)
>
> Technically, shouldn't we make these `unsigned long' ?

obviously not. Supporting 4G cpus is enough for this century, so the
other 32bit would be just wasted space. the 1 in the shiftleft needs the
UL anyways to be correct with >32 cpus (it's not strictly a bug right
now to forget the UL but if we get it right we'll be able to go 64-way
on 64bit systems with no change other than NR_TASKS). So the bitmasks
must be all unsigned longs, the cpu numbers are definitely fine as
unsigned ints.

Even after we break at some point the 64CPU limit growing the bitmask
ala sigset_t, still the cpu numbers will remain unsigned int for a very
long time (probably we'll never have a chance to see the need of
unsigned long cpu numbers in our lifes).

Andrea

2002-06-20 00:10:55

by Robert Love

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Wed, 2002-06-19 at 17:07, Andrea Arcangeli wrote:

> obviously not. Supporting 4G cpus is enough for this century, so the
> other 32bit would be just wasted space. the 1 in the shiftleft needs the
> UL anyways to be correct with >32 cpus (it's not strictly a bug right
> now to forget the UL but if we get it right we'll be able to go 64-way
> on 64bit systems with no change other than NR_TASKS). So the bitmasks
> must be all unsigned longs, the cpu numbers are definitely fine as
> unsigned ints.

Eh, very true. I was confusing the bitmasks and the counts.

Sorry, Ingo - ignore me.

Robert Love

2002-06-20 07:27:36

by Manik Raina

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

Ingo,
A small doubt,
Should'nt this function below return something ?
set_task_cpu() should return unsigned int but it
seems to do nothing ....


Ingo Molnar wrote:
> +
> +static inline unsigned int set_task_cpu(struct task_struct *p, unsigned int cpu)
> +{
> +}
> +
> +#endif /* CONFIG_SMP */
> +
> #endif /* __KERNEL__ */
>

2002-06-20 10:11:03

by Mikael Pettersson

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, 20 Jun 2002 00:26:39 -0700, Manik Raina wrote:
> A small doubt,
> Should'nt this function below return something ?
> set_task_cpu() should return unsigned int but it
> seems to do nothing ....
>
>
>Ingo Molnar wrote:
>> +
>> +static inline unsigned int set_task_cpu(struct task_struct *p, unsigned int cpu)
>> +{
>> +}

My UP CPU number optimisation patch, from which this originates,
made the set_${foo}_cpu() inline 'void'. If for some reason a
return value is needed, just return 0 since that's what the
optimised-away "->cpu = 0" assignment would have had as value.

(The difference is that I had foo=thread_info, but Ingo lifted it
up so foo=task_struct here. I also removed the x86 thread_info cpu
field on UP.)

/Mikael

2002-06-20 13:15:38

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1


On Thu, 20 Jun 2002, Manik Raina wrote:

> Ingo,
> A small doubt,
> Should'nt this function below return something ?
> set_task_cpu() should return unsigned int but it
> seems to do nothing ....

it's a NOP on UP. On SMP it changes p->thread_info->cpu.

Ingo


2002-06-20 13:36:16

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1


On Thu, 20 Jun 2002, Manik Raina wrote:

> A small doubt,
> Should'nt this function below return something ?
> set_task_cpu() should return unsigned int but it
> seems to do nothing ....

now i understand. We shouldnt return anything, it should be a 'void', like
Mikael Pettersson suggests. New scheduler patch attached, with this fixed,
against 2.5.23.

Ingo

diff -Nru a/include/linux/sched.h b/include/linux/sched.h
--- a/include/linux/sched.h Thu Jun 20 15:32:36 2002
+++ b/include/linux/sched.h Thu Jun 20 15:32:36 2002
@@ -863,6 +863,34 @@
clear_thread_flag(TIF_SIGPENDING);
}

+/*
+ * Wrappers for p->thread_info->cpu access. No-op on UP.
+ */
+#ifdef CONFIG_SMP
+
+static inline unsigned int task_cpu(struct task_struct *p)
+{
+ return p->thread_info->cpu;
+}
+
+static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+ p->thread_info->cpu = cpu;
+}
+
+#else
+
+static inline unsigned int task_cpu(struct task_struct *p)
+{
+ return 0;
+}
+
+static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
+{
+}
+
+#endif /* CONFIG_SMP */
+
#endif /* __KERNEL__ */

#endif
diff -Nru a/include/linux/smp.h b/include/linux/smp.h
diff -Nru a/kernel/sched.c b/kernel/sched.c
--- a/kernel/sched.c Thu Jun 20 15:32:36 2002
+++ b/kernel/sched.c Thu Jun 20 15:32:36 2002
@@ -148,7 +148,7 @@

#define cpu_rq(cpu) (runqueues + (cpu))
#define this_rq() cpu_rq(smp_processor_id())
-#define task_rq(p) cpu_rq((p)->thread_info->cpu)
+#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define rt_task(p) ((p)->prio < MAX_RT_PRIO)

@@ -284,8 +284,8 @@
need_resched = test_and_set_tsk_thread_flag(p,TIF_NEED_RESCHED);
nrpolling |= test_tsk_thread_flag(p,TIF_POLLING_NRFLAG);

- if (!need_resched && !nrpolling && (p->thread_info->cpu != smp_processor_id()))
- smp_send_reschedule(p->thread_info->cpu);
+ if (!need_resched && !nrpolling && (task_cpu(p) != smp_processor_id()))
+ smp_send_reschedule(task_cpu(p));
preempt_enable();
#else
set_tsk_need_resched(p);
@@ -366,10 +366,10 @@
* currently. Do not violate hard affinity.
*/
if (unlikely(sync && (rq->curr != p) &&
- (p->thread_info->cpu != smp_processor_id()) &&
+ (task_cpu(p) != smp_processor_id()) &&
(p->cpus_allowed & (1UL << smp_processor_id())))) {

- p->thread_info->cpu = smp_processor_id();
+ set_task_cpu(p, smp_processor_id());
task_rq_unlock(rq, &flags);
goto repeat_lock_task;
}
@@ -409,7 +409,7 @@
p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
p->prio = effective_prio(p);
}
- p->thread_info->cpu = smp_processor_id();
+ set_task_cpu(p, smp_processor_id());
activate_task(p, rq);

rq_unlock(rq);
@@ -663,7 +663,7 @@
*/
dequeue_task(next, array);
busiest->nr_running--;
- next->thread_info->cpu = this_cpu;
+ set_task_cpu(next, this_cpu);
this_rq->nr_running++;
enqueue_task(next, this_rq->active);
if (next->prio < current->prio)
@@ -821,7 +821,7 @@
spin_lock_irq(&rq->lock);

/*
- * if entering off a kernel preemption go straight
+ * if entering off of a kernel preemption go straight
* to picking the next task.
*/
if (unlikely(preempt_get_count() & PREEMPT_ACTIVE))
@@ -906,7 +906,7 @@
schedule();
ti->preempt_count = 0;

- /* we can miss a preemption opportunity between schedule and now */
+ /* we could miss a preemption opportunity between schedule and now */
barrier();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
@@ -1630,7 +1630,7 @@

void __init init_idle(task_t *idle, int cpu)
{
- runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->thread_info->cpu);
+ runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(task_cpu(idle));
unsigned long flags;

__save_flags(flags);
@@ -1642,7 +1642,7 @@
idle->array = NULL;
idle->prio = MAX_PRIO;
idle->state = TASK_RUNNING;
- idle->thread_info->cpu = cpu;
+ set_task_cpu(idle, cpu);
double_rq_unlock(idle_rq, rq);
set_tsk_need_resched(idle);
__restore_flags(flags);
@@ -1751,7 +1751,7 @@
* Can the task run on the task's current CPU? If not then
* migrate the process off to a proper CPU.
*/
- if (new_mask & (1UL << p->thread_info->cpu)) {
+ if (new_mask & (1UL << task_cpu(p))) {
task_rq_unlock(rq, &flags);
goto out;
}
@@ -1760,7 +1760,7 @@
* it is sufficient to simply update the task's cpu field.
*/
if (!p->array && (p != rq->curr)) {
- p->thread_info->cpu = __ffs(p->cpus_allowed);
+ set_task_cpu(p, __ffs(p->cpus_allowed));
task_rq_unlock(rq, &flags);
goto out;
}
@@ -1775,6 +1775,8 @@
preempt_enable();
}

+static __initdata int master_migration_thread;
+
static int migration_thread(void * bind_cpu)
{
int cpu = (int) (long) bind_cpu;
@@ -1786,14 +1788,12 @@
sigfillset(&current->blocked);
set_fs(KERNEL_DS);

- /* FIXME: First CPU may not be zero, but this crap code
- vanishes with hotplug cpu patch anyway. --RR */
/*
- * The first migration thread is started on CPU #0. This one can
- * migrate the other migration threads to their destination CPUs.
+ * The first migration thread is started on the boot CPU, it
+ * migrates the other migration threads to their destination CPUs.
*/
- if (cpu != 0) {
- while (!cpu_rq(0)->migration_thread)
+ if (cpu != master_migration_thread) {
+ while (!cpu_rq(master_migration_thread)->migration_thread)
yield();
set_cpus_allowed(current, 1UL << cpu);
}
@@ -1829,18 +1829,18 @@
cpu_dest = __ffs(p->cpus_allowed);
rq_dest = cpu_rq(cpu_dest);
repeat:
- cpu_src = p->thread_info->cpu;
+ cpu_src = task_cpu(p);
rq_src = cpu_rq(cpu_src);

local_irq_save(flags);
double_rq_lock(rq_src, rq_dest);
- if (p->thread_info->cpu != cpu_src) {
+ if (task_cpu(p) != cpu_src) {
double_rq_unlock(rq_src, rq_dest);
local_irq_restore(flags);
goto repeat;
}
if (rq_src == rq) {
- p->thread_info->cpu = cpu_dest;
+ set_task_cpu(p, cpu_dest);
if (p->array) {
deactivate_task(p, rq_src);
activate_task(p, rq_dest);
@@ -1857,7 +1857,9 @@
{
int cpu;

- current->cpus_allowed = 1UL << 0;
+ master_migration_thread = smp_processor_id();
+ current->cpus_allowed = 1UL << master_migration_thread;
+
for (cpu = 0; cpu < NR_CPUS; cpu++) {
if (!cpu_online(cpu))
continue;

2002-06-20 17:21:33

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, Jun 20, 2002 at 01:47:29AM +0200, Dave Jones wrote:
> I'll take a look at this tomorrow, unless William "no sleep `til 2.6" Irwin
> beats me to it 8-) (he did this part of the patch iirc).

How does this look? (compiles, boots, & runs on UP i386)

Cheers,
Bill


diff -urN linux-2.5.23-virgin/include/linux/sched.h linux-2.5.23-wli/include/linux/sched.h
--- linux-2.5.23-virgin/include/linux/sched.h Thu Jun 20 00:10:26 2002
+++ linux-2.5.23-wli/include/linux/sched.h Thu Jun 20 08:21:30 2002
@@ -144,6 +144,7 @@
typedef struct task_struct task_t;

extern void sched_init(void);
+extern void pidhash_init(void);
extern void init_idle(task_t *idle, int cpu);
extern void show_state(void);
extern void cpu_init (void);
diff -urN linux-2.5.23-virgin/init/main.c linux-2.5.23-wli/init/main.c
--- linux-2.5.23-virgin/init/main.c Thu Jun 20 00:10:27 2002
+++ linux-2.5.23-wli/init/main.c Thu Jun 20 08:21:04 2002
@@ -347,6 +347,7 @@
trap_init();
init_IRQ();
sched_init();
+ pidhash_init();
softirq_init();
time_init();

diff -urN linux-2.5.23-virgin/kernel/fork.c linux-2.5.23-wli/kernel/fork.c
--- linux-2.5.23-virgin/kernel/fork.c Thu Jun 20 00:10:27 2002
+++ linux-2.5.23-wli/kernel/fork.c Thu Jun 20 08:20:33 2002
@@ -27,7 +27,7 @@
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/jiffies.h>
-
+#include <linux/bootmem.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
@@ -49,6 +49,25 @@

list_t *pidhash;
unsigned long pidhash_size;
+
+void __init pidhash_init(void)
+{
+ int i, size = PAGE_SIZE*sizeof(list_t);
+
+ do {
+ pidhash = (list_t *)alloc_bootmem(size);
+ if (!pidhash)
+ size >>= 1;
+ } while (!pidhash && size >= sizeof(list_t));
+
+ if (!pidhash)
+ panic("Failed to allocated pid hash table!\n");
+
+ pidhash_size = size/sizeof(list_t);
+
+ for (i = 0; i < pidhash_size; ++i)
+ INIT_LIST_HEAD(&pidhash[i]);
+}

rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */

diff -urN linux-2.5.23-virgin/kernel/sched.c linux-2.5.23-wli/kernel/sched.c
--- linux-2.5.23-virgin/kernel/sched.c Thu Jun 20 00:10:27 2002
+++ linux-2.5.23-wli/kernel/sched.c Thu Jun 20 08:20:47 2002
@@ -1662,21 +1662,7 @@
void __init sched_init(void)
{
runqueue_t *rq;
- int i, j, k, size = PAGE_SIZE*sizeof(list_t);
-
- do {
- pidhash = (list_t *)alloc_bootmem(size);
- if (!pidhash)
- size >>= 1;
- } while (!pidhash && size >= sizeof(list_t));
-
- if (!pidhash)
- panic("Failed to allocated pid hash table!\n");
-
- pidhash_size = size/sizeof(list_t);
-
- for (i = 0; i < pidhash_size; ++i)
- INIT_LIST_HEAD(&pidhash[i]);
+ int i, j, k;

for (i = 0; i < NR_CPUS; i++) {
prio_array_t *array;

2002-06-20 17:34:28

by Ingo Molnar

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1


On Thu, 20 Jun 2002, William Lee Irwin III wrote:

> On Thu, Jun 20, 2002 at 01:47:29AM +0200, Dave Jones wrote:
> > I'll take a look at this tomorrow, unless William "no sleep `til 2.6" Irwin
> > beats me to it 8-) (he did this part of the patch iirc).
>
> How does this look? (compiles, boots, & runs on UP i386)

looks good to me - what do you think about my other pidhash suggestion:

> And i'm not quite sure whether it's needed to expose the pidhash to the
> rest of the kernel - it would be much simpler to have it in
> kernel/fork.c locally, and find_task_by_pid() would be a function
> instead of an inline. (it has a ~49 bytes footprint on x86, it's rather
> heavy i think.)

Ingo



2002-06-20 17:53:04

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, Jun 20, 2002 at 07:31:18PM +0200, Ingo Molnar wrote:
> looks good to me - what do you think about my other pidhash suggestion:

>> And i'm not quite sure whether it's needed to expose the pidhash to the
>> rest of the kernel - it would be much simpler to have it in
>> kernel/fork.c locally, and find_task_by_pid() would be a function
>> instead of an inline. (it has a ~49 bytes footprint on x86, it's rather
>> heavy i think.)

It's an excellent idea, I think I only forgot that was in the queue of
things to write. I'll follow up with that as well.


Thanks,
Bill

2002-06-20 18:18:04

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, Jun 20, 2002 at 07:31:18PM +0200, Ingo Molnar wrote:
> looks good to me - what do you think about my other pidhash suggestion:

An excellent idea. I didn't go all the way and make the pidhash entirely
private to fork.c but taking find_task_by_pid() out-of-line is implemented
in the following, built atop the prior patch. I can also privatize the
pidhash entirely if that's wanted.


Cheers,
Bill

diff -urN linux-2.5.23-virgin/include/linux/sched.h linux-2.5.23-wli/include/linux/sched.h
--- linux-2.5.23-virgin/include/linux/sched.h Thu Jun 20 10:53:42 2002
+++ linux-2.5.23-wli/include/linux/sched.h Thu Jun 20 10:55:18 2002
@@ -459,19 +459,7 @@
list_del(&p->pidhash_list);
}

-static inline task_t *find_task_by_pid(int pid)
-{
- list_t *p, *pid_list = &pidhash[pid_hashfn(pid)];
-
- list_for_each(p, pid_list) {
- task_t *t = list_entry(p, task_t, pidhash_list);
-
- if(t->pid == pid)
- return t;
- }
-
- return NULL;
-}
+extern task_t *find_task_by_pid(int pid);

/* per-UID process charging. */
extern struct user_struct * alloc_uid(uid_t);
diff -urN linux-2.5.23-virgin/kernel/fork.c linux-2.5.23-wli/kernel/fork.c
--- linux-2.5.23-virgin/kernel/fork.c Thu Jun 20 10:53:42 2002
+++ linux-2.5.23-wli/kernel/fork.c Thu Jun 20 10:55:55 2002
@@ -69,6 +69,21 @@
INIT_LIST_HEAD(&pidhash[i]);
}

+task_t *find_task_by_pid(int pid)
+{
+ list_t *p, *pid_list = &pidhash[pid_hashfn(pid)];
+
+ list_for_each(p, pid_list) {
+ task_t *t = list_entry(p, task_t, pidhash_list);
+
+ if(t->pid == pid)
+ return t;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL(find_task_by_pid);
+
rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */

void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)

2002-06-20 19:27:10

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, Jun 20, 2002 at 07:31:18PM +0200, Ingo Molnar wrote:
>> looks good to me - what do you think about my other pidhash suggestion:

On Thu, Jun 20, 2002 at 11:17:29AM -0700, William Lee Irwin III wrote:
> An excellent idea. I didn't go all the way and make the pidhash entirely
> private to fork.c but taking find_task_by_pid() out-of-line is implemented
> in the following, built atop the prior patch. I can also privatize the
> pidhash entirely if that's wanted.

The final piece, built on top of the previous two. Please comment and/or
use your discretion. Although I test compiled and booted it on UP i386,
this may not be the desired cleanup. The original purpose of this was to
enforce the usage of Linux' standard list data type for the pidhash.


Cheers,
Bill


diff -urN linux-2.5.23-virgin/include/linux/sched.h linux-2.5.23-wli/include/linux/sched.h
--- linux-2.5.23-virgin/include/linux/sched.h Thu Jun 20 11:39:08 2002
+++ linux-2.5.23-wli/include/linux/sched.h Thu Jun 20 11:41:26 2002
@@ -441,24 +441,8 @@
extern struct task_struct *init_tasks[NR_CPUS];

/* PID hashing. */
-extern list_t *pidhash;
-extern unsigned long pidhash_size;
-
-static inline unsigned pid_hashfn(pid_t pid)
-{
- return ((pid >> 8) ^ pid) & (pidhash_size - 1);
-}
-
-static inline void hash_pid(struct task_struct *p)
-{
- list_add(&p->pidhash_list, &pidhash[pid_hashfn(p->pid)]);
-}
-
-static inline void unhash_pid(struct task_struct *p)
-{
- list_del(&p->pidhash_list);
-}
-
+extern void hash_pid(task_t *);
+extern void unhash_pid(task_t *);
extern task_t *find_task_by_pid(int pid);

/* per-UID process charging. */
diff -urN linux-2.5.23-virgin/kernel/fork.c linux-2.5.23-wli/kernel/fork.c
--- linux-2.5.23-virgin/kernel/fork.c Thu Jun 20 11:39:08 2002
+++ linux-2.5.23-wli/kernel/fork.c Thu Jun 20 11:43:30 2002
@@ -69,6 +69,11 @@
INIT_LIST_HEAD(&pidhash[i]);
}

+static inline unsigned pid_hashfn(pid_t pid)
+{
+ return ((pid >> 8) ^ pid) & (pidhash_size - 1);
+}
+
task_t *find_task_by_pid(int pid)
{
list_t *p, *pid_list = &pidhash[pid_hashfn(pid)];
@@ -82,7 +87,20 @@

return NULL;
}
+
+void hash_pid(task_t *p)
+{
+ list_add(&p->pidhash_list, &pidhash[pid_hashfn(p->pid)]);
+}
+
+void unhash_pid(task_t *p)
+{
+ list_del(&p->pidhash_list);
+}
+
EXPORT_SYMBOL(find_task_by_pid);
+EXPORT_SYMBOL(hash_pid);
+EXPORT_SYMBOL(unhash_pid);

rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */

diff -urN linux-2.5.23-virgin/kernel/ksyms.c linux-2.5.23-wli/kernel/ksyms.c
--- linux-2.5.23-virgin/kernel/ksyms.c Thu Jun 20 00:10:27 2002
+++ linux-2.5.23-wli/kernel/ksyms.c Thu Jun 20 11:41:41 2002
@@ -602,5 +602,3 @@
EXPORT_SYMBOL(init_thread_union);

EXPORT_SYMBOL(tasklist_lock);
-EXPORT_SYMBOL(pidhash);
-EXPORT_SYMBOL(pidhash_size);

2002-06-21 01:34:40

by William Lee Irwin III

[permalink] [raw]
Subject: Re: [patch] scheduler bits from 2.5.23-dj1

On Thu, Jun 20, 2002 at 12:26:33PM -0700, William Lee Irwin III wrote:
> The final piece, built on top of the previous two. Please comment and/or
> use your discretion. Although I test compiled and booted it on UP i386,
> this may not be the desired cleanup. The original purpose of this was to
> enforce the usage of Linux' standard list data type for the pidhash.

Okay, one more piece, thanks to Brad Heilbrun <[email protected]>:


Cheers,
Bill



diff -urN linux-2.5.23-virgin/kernel/fork.c linux-2.5.23-wli/kernel/fork.c
--- linux-2.5.23-virgin/kernel/fork.c Thu Jun 20 18:21:18 2002
+++ linux-2.5.23-wli/kernel/fork.c Thu Jun 20 18:21:31 2002
@@ -61,7 +61,7 @@
} while (!pidhash && size >= sizeof(list_t));

if (!pidhash)
- panic("Failed to allocated pid hash table!\n");
+ panic("Failed to allocate pid hash table!\n");

pidhash_size = size/sizeof(list_t);