2002-06-11 07:08:40

by Rusty Russell

[permalink] [raw]
Subject: [PATCH] 2.5.21 Nonlinear CPU support

Linus, please apply. Tested on my dual x86 box.

This patch removes smp_num_cpus, cpu_number_map and cpu_logical_map
from generic code, and uses cpu_online(cpu) instead, in preparation
for hotplug CPUS.

Given how problematic the logical/number mapping of CPUs has been
(eg. Ingo's recent scheduler work), I think this is a win anyway,
independent of the fact that adding/removing CPUs makes it pointless.

[BTW: I didn't *have* to remove smp_num_cpus, but I did because almost
all code using it is buggy once cpus go nonlinear...]

Next patch does updates i386, PPC and ia64 (thanks Kimio!)
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

Name: Non-linear CPU Numbers Patch
Author: Rusty Russell
Status: Experimental

D: This patch removes the concept of "logical" CPU numbers, in
D: preparation for CPU hotplugging.

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/Documentation/DocBook/kernel-hacking.tmpl linux-2.5.21.24110.updated/Documentation/DocBook/kernel-hacking.tmpl
--- linux-2.5.21.24110/Documentation/DocBook/kernel-hacking.tmpl Tue Apr 23 11:39:29 2002
+++ linux-2.5.21.24110.updated/Documentation/DocBook/kernel-hacking.tmpl Tue Jun 11 13:53:32 2002
@@ -702,19 +702,14 @@
</sect1>

<sect1 id="routines-processorids">
- <title><function>smp_processor_id</function>()/<function>cpu_[number/logical]_map()</function>
+ <title><function>smp_processor_id</function>()
<filename class=headerfile>include/asm/smp.h</filename></title>

<para>
<function>smp_processor_id()</function> returns the current
processor number, between 0 and <symbol>NR_CPUS</symbol> (the
maximum number of CPUs supported by Linux, currently 32). These
- values are not necessarily continuous: to get a number between 0
- and <function>smp_num_cpus()</function> (the number of actual
- processors in this machine), the
- <function>cpu_number_map()</function> function is used to map the
- processor id to a logical number.
- <function>cpu_logical_map()</function> does the reverse.
+ values are not necessarily continuous.
</para>
</sect1>

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/drivers/acpi/processor.c linux-2.5.21.24110.updated/drivers/acpi/processor.c
--- linux-2.5.21.24110/drivers/acpi/processor.c Mon Jun 3 12:21:22 2002
+++ linux-2.5.21.24110.updated/drivers/acpi/processor.c Tue Jun 11 13:53:32 2002
@@ -2060,8 +2060,9 @@
return_VALUE(-EINVAL);

#ifdef CONFIG_SMP
- if (smp_num_cpus > 1)
- errata.smp = smp_num_cpus;
+ /* FIXME: What should this be? -- RR */
+ if (num_online_cpus() > 1)
+ errata.smp = num_online_cpus();
#endif

acpi_processor_errata(pr);
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/drivers/char/agp/agpgart_be.c linux-2.5.21.24110.updated/drivers/char/agp/agpgart_be.c
--- linux-2.5.21.24110/drivers/char/agp/agpgart_be.c Mon Jun 3 12:21:23 2002
+++ linux-2.5.21.24110.updated/drivers/char/agp/agpgart_be.c Tue Jun 11 13:53:32 2002
@@ -98,7 +98,7 @@

static void smp_flush_cache(void)
{
- atomic_set(&cpus_waiting, smp_num_cpus - 1);
+ atomic_set(&cpus_waiting, num_online_cpus() - 1);
if (smp_call_function(ipi_handler, NULL, 1, 0) != 0)
panic(PFX "timed out waiting for the other CPUs!\n");
flush_cache();
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/drivers/net/aironet4500_core.c linux-2.5.21.24110.updated/drivers/net/aironet4500_core.c
--- linux-2.5.21.24110/drivers/net/aironet4500_core.c Wed Feb 20 17:57:08 2002
+++ linux-2.5.21.24110.updated/drivers/net/aironet4500_core.c Tue Jun 11 13:53:32 2002
@@ -2669,10 +2669,8 @@
* but without it card gets screwed up
*/
#ifdef CONFIG_SMP
- if(smp_num_cpus > 1){
both_bap_lock = 1;
bap_setup_spinlock = 1;
- }
#endif
//awc_dump_registers(dev);

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/drivers/video/fbcon.c linux-2.5.21.24110.updated/drivers/video/fbcon.c
--- linux-2.5.21.24110/drivers/video/fbcon.c Mon Apr 29 16:00:26 2002
+++ linux-2.5.21.24110.updated/drivers/video/fbcon.c Tue Jun 11 13:53:32 2002
@@ -2177,7 +2177,7 @@
if (p->fb_info->fbops->fb_rasterimg)
p->fb_info->fbops->fb_rasterimg(p->fb_info, 1);

- for (x = 0; x < smp_num_cpus * (LOGO_W + 8) &&
+ for (x = 0; x < num_online_cpus() * (LOGO_W + 8) &&
x < p->var.xres - (LOGO_W + 8); x += (LOGO_W + 8)) {

#if defined(CONFIG_FBCON_CFB16) || defined(CONFIG_FBCON_CFB24) || \
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/fs/ntfs/compress.c linux-2.5.21.24110.updated/fs/ntfs/compress.c
--- linux-2.5.21.24110/fs/ntfs/compress.c Sat May 25 14:34:53 2002
+++ linux-2.5.21.24110.updated/fs/ntfs/compress.c Tue Jun 11 13:53:32 2002
@@ -69,16 +69,16 @@

BUG_ON(ntfs_compression_buffers);

- ntfs_compression_buffers = (u8**)kmalloc(smp_num_cpus * sizeof(u8*),
+ ntfs_compression_buffers = (u8**)kmalloc(NR_CPUS * sizeof(u8*),
GFP_KERNEL);
if (!ntfs_compression_buffers)
return -ENOMEM;
- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = 0; i < NR_CPUS; i++) {
ntfs_compression_buffers[i] = (u8*)vmalloc(NTFS_MAX_CB_SIZE);
if (!ntfs_compression_buffers[i])
break;
}
- if (i == smp_num_cpus)
+ if (i == NR_CPUS)
return 0;
/* Allocation failed, cleanup and return error. */
for (j = 0; j < i; j++)
@@ -100,7 +100,7 @@

BUG_ON(!ntfs_compression_buffers);

- for (i = 0; i < smp_num_cpus; i++)
+ for (i = 0; i < NR_CPUS; i++)
vfree(ntfs_compression_buffers[i]);
kfree(ntfs_compression_buffers);
ntfs_compression_buffers = NULL;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/fs/proc/array.c linux-2.5.21.24110.updated/fs/proc/array.c
--- linux-2.5.21.24110/fs/proc/array.c Thu May 30 10:00:57 2002
+++ linux-2.5.21.24110.updated/fs/proc/array.c Tue Jun 11 13:53:32 2002
@@ -695,12 +695,14 @@
task->times.tms_utime,
task->times.tms_stime);

- for (i = 0 ; i < smp_num_cpus; i++)
+ for (i = 0 ; i < NR_CPUS; i++) {
+ if (cpu_online(i))
len += sprintf(buffer + len, "cpu%d %lu %lu\n",
i,
- task->per_cpu_utime[cpu_logical_map(i)],
- task->per_cpu_stime[cpu_logical_map(i)]);
+ task->per_cpu_utime[i],
+ task->per_cpu_stime[i]);

+ }
return len;
}
#endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/fs/proc/proc_misc.c linux-2.5.21.24110.updated/fs/proc/proc_misc.c
--- linux-2.5.21.24110/fs/proc/proc_misc.c Thu May 30 10:00:57 2002
+++ linux-2.5.21.24110.updated/fs/proc/proc_misc.c Tue Jun 11 13:53:32 2002
@@ -281,29 +281,32 @@
unsigned int sum = 0, user = 0, nice = 0, system = 0;
int major, disk;

- for (i = 0 ; i < smp_num_cpus; i++) {
- int cpu = cpu_logical_map(i), j;
+ for (i = 0 ; i < NR_CPUS; i++) {
+ int j;

- user += kstat.per_cpu_user[cpu];
- nice += kstat.per_cpu_nice[cpu];
- system += kstat.per_cpu_system[cpu];
+ if(!cpu_online(i)) continue;
+ user += kstat.per_cpu_user[i];
+ nice += kstat.per_cpu_nice[i];
+ system += kstat.per_cpu_system[i];
#if !defined(CONFIG_ARCH_S390)
for (j = 0 ; j < NR_IRQS ; j++)
- sum += kstat.irqs[cpu][j];
+ sum += kstat.irqs[i][j];
#endif
}

len = sprintf(page, "cpu %u %u %u %lu\n", user, nice, system,
- jif * smp_num_cpus - (user + nice + system));
- for (i = 0 ; i < smp_num_cpus; i++)
+ jif * num_online_cpus() - (user + nice + system));
+ for (i = 0 ; i < NR_CPUS; i++){
+ if (!cpu_online(i)) continue;
len += sprintf(page + len, "cpu%d %u %u %u %lu\n",
i,
- kstat.per_cpu_user[cpu_logical_map(i)],
- kstat.per_cpu_nice[cpu_logical_map(i)],
- kstat.per_cpu_system[cpu_logical_map(i)],
- jif - ( kstat.per_cpu_user[cpu_logical_map(i)] \
- + kstat.per_cpu_nice[cpu_logical_map(i)] \
- + kstat.per_cpu_system[cpu_logical_map(i)]));
+ kstat.per_cpu_user[i],
+ kstat.per_cpu_nice[i],
+ kstat.per_cpu_system[i],
+ jif - ( kstat.per_cpu_user[i] \
+ + kstat.per_cpu_nice[i] \
+ + kstat.per_cpu_system[i]));
+ }
len += sprintf(page + len,
"page %u %u\n"
"swap %u %u\n"
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/include/asm-generic/tlb.h linux-2.5.21.24110.updated/include/asm-generic/tlb.h
--- linux-2.5.21.24110/include/asm-generic/tlb.h Sat May 25 14:34:56 2002
+++ linux-2.5.21.24110.updated/include/asm-generic/tlb.h Tue Jun 11 13:53:32 2002
@@ -54,7 +54,7 @@
tlb->freed = 0;

/* Use fast mode if only one CPU is online */
- tlb->nr = smp_num_cpus > 1 ? 0UL : ~0UL;
+ tlb->nr = num_online_cpus() > 1 ? 0UL : ~0UL;
return tlb;
}

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/include/linux/kernel_stat.h linux-2.5.21.24110.updated/include/linux/kernel_stat.h
--- linux-2.5.21.24110/include/linux/kernel_stat.h Sat May 18 15:53:43 2002
+++ linux-2.5.21.24110.updated/include/linux/kernel_stat.h Tue Jun 11 13:53:32 2002
@@ -43,8 +43,8 @@
{
int i, sum=0;

- for (i = 0 ; i < smp_num_cpus ; i++)
- sum += kstat.irqs[cpu_logical_map(i)][irq];
+ for (i = 0 ; i < NR_CPUS ; i++)
+ sum += kstat.irqs[i][irq];

return sum;
}
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/include/linux/smp.h linux-2.5.21.24110.updated/include/linux/smp.h
--- linux-2.5.21.24110/include/linux/smp.h Fri Jun 7 13:59:08 2002
+++ linux-2.5.21.24110.updated/include/linux/smp.h Tue Jun 11 13:53:32 2002
@@ -57,8 +57,6 @@
*/
extern int smp_threads_ready;

-extern int smp_num_cpus;
-
extern volatile unsigned long smp_msg_data;
extern volatile int smp_src_cpu;
extern volatile int smp_msg_id;
@@ -79,19 +77,17 @@
* These macros fold the SMP functionality into a single CPU system
*/

-#define smp_num_cpus 1
#define smp_processor_id() 0
#define hard_smp_processor_id() 0
#define smp_threads_ready 1
#ifndef CONFIG_PREEMPT
#define kernel_lock()
#endif
-#define cpu_logical_map(cpu) 0
-#define cpu_number_map(cpu) 0
#define smp_call_function(func,info,retry,wait) ({ 0; })
-#define cpu_online_map 1
static inline void smp_send_reschedule(int cpu) { }
static inline void smp_send_reschedule_all(void) { }
+#define cpu_online(cpu) 1
+#define num_online_cpus() 1
#define __per_cpu_data
#define per_cpu(var, cpu) var
#define this_cpu(var) var
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/kernel/fork.c linux-2.5.21.24110.updated/kernel/fork.c
--- linux-2.5.21.24110/kernel/fork.c Mon Jun 10 16:03:56 2002
+++ linux-2.5.21.24110.updated/kernel/fork.c Tue Jun 11 13:53:32 2002
@@ -692,9 +692,8 @@
int i;

/* ?? should we just memset this ?? */
- for(i = 0; i < smp_num_cpus; i++)
- p->per_cpu_utime[cpu_logical_map(i)] =
- p->per_cpu_stime[cpu_logical_map(i)] = 0;
+ for(i = 0; i < NR_CPUS; i++)
+ p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
spin_lock_init(&p->sigmask_lock);
}
#endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/kernel/sched.c linux-2.5.21.24110.updated/kernel/sched.c
--- linux-2.5.21.24110/kernel/sched.c Mon Jun 10 16:03:56 2002
+++ linux-2.5.21.24110.updated/kernel/sched.c Tue Jun 11 13:53:32 2002
@@ -434,8 +434,8 @@
{
unsigned long i, sum = 0;

- for (i = 0; i < smp_num_cpus; i++)
- sum += cpu_rq(cpu_logical_map(i))->nr_running;
+ for (i = 0; i < NR_CPUS; i++)
+ sum += cpu_rq(i)->nr_running;

return sum;
}
@@ -444,8 +444,8 @@
{
unsigned long i, sum = 0;

- for (i = 0; i < smp_num_cpus; i++)
- sum += cpu_rq(cpu_logical_map(i))->nr_uninterruptible;
+ for (i = 0; i < NR_CPUS; i++)
+ sum += cpu_rq(i)->nr_uninterruptible;

return sum;
}
@@ -454,8 +454,8 @@
{
unsigned long i, sum = 0;

- for (i = 0; i < smp_num_cpus; i++)
- sum += cpu_rq(cpu_logical_map(i))->nr_switches;
+ for (i = 0; i < NR_CPUS; i++)
+ sum += cpu_rq(i)->nr_switches;

return sum;
}
@@ -530,15 +530,16 @@

busiest = NULL;
max_load = 1;
- for (i = 0; i < smp_num_cpus; i++) {
- int logical = cpu_logical_map(i);
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ continue;

- rq_src = cpu_rq(logical);
- if (idle || (rq_src->nr_running < this_rq->prev_nr_running[logical]))
+ rq_src = cpu_rq(i);
+ if (idle || (rq_src->nr_running < this_rq->prev_nr_running[i]))
load = rq_src->nr_running;
else
- load = this_rq->prev_nr_running[logical];
- this_rq->prev_nr_running[logical] = rq_src->nr_running;
+ load = this_rq->prev_nr_running[i];
+ this_rq->prev_nr_running[i] = rq_src->nr_running;

if ((load > max_load) && (rq_src != this_rq)) {
busiest = rq_src;
@@ -1701,7 +1702,7 @@

static int migration_thread(void * bind_cpu)
{
- int cpu = cpu_logical_map((int) (long) bind_cpu);
+ int cpu = (int) (long) bind_cpu;
struct sched_param param = { sched_priority: MAX_RT_PRIO-1 };
runqueue_t *rq;
int ret;
@@ -1709,12 +1710,15 @@
daemonize();
sigfillset(&current->blocked);
set_fs(KERNEL_DS);
+
+ /* FIXME: First CPU may not be zero, but this crap code
+ vanishes with hotplug cpu patch anyway. --RR */
/*
* The first migration thread is started on CPU #0. This one can migrate
* the other migration threads to their destination CPUs.
*/
if (cpu != 0) {
- while (!cpu_rq(cpu_logical_map(0))->migration_thread)
+ while (!cpu_rq(0)->migration_thread)
yield();
set_cpus_allowed(current, 1UL << cpu);
}
@@ -1778,16 +1782,21 @@
{
int cpu;

- current->cpus_allowed = 1UL << cpu_logical_map(0);
- for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+ current->cpus_allowed = 1UL << 0;
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (!cpu_online(cpu))
+ continue;
if (kernel_thread(migration_thread, (void *) (long) cpu,
CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
BUG();
}
current->cpus_allowed = -1L;

- for (cpu = 0; cpu < smp_num_cpus; cpu++)
- while (!cpu_rq(cpu_logical_map(cpu))->migration_thread)
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (!cpu_online(cpu))
+ continue;
+ while (!cpu_rq(cpu)->migration_thread)
schedule_timeout(2);
+ }
}
#endif
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/kernel/softirq.c linux-2.5.21.24110.updated/kernel/softirq.c
--- linux-2.5.21.24110/kernel/softirq.c Mon Jun 3 12:21:28 2002
+++ linux-2.5.21.24110.updated/kernel/softirq.c Tue Jun 11 13:53:32 2002
@@ -363,8 +363,7 @@

static int ksoftirqd(void * __bind_cpu)
{
- int bind_cpu = (int) (long) __bind_cpu;
- int cpu = cpu_logical_map(bind_cpu);
+ int cpu = (int) (long) __bind_cpu;

daemonize();
set_user_nice(current, 19);
@@ -376,7 +375,7 @@
if (smp_processor_id() != cpu)
BUG();

- sprintf(current->comm, "ksoftirqd_CPU%d", bind_cpu);
+ sprintf(current->comm, "ksoftirqd_CPU%d", cpu);

__set_current_state(TASK_INTERRUPTIBLE);
mb();
@@ -402,13 +401,16 @@
{
int cpu;

- for (cpu = 0; cpu < smp_num_cpus; cpu++)
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+ if (!cpu_online(cpu))
+ continue;
if (kernel_thread(ksoftirqd, (void *) (long) cpu,
CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
printk("spawn_ksoftirqd() failed for cpu %d\n", cpu);
else
- while (!ksoftirqd_task(cpu_logical_map(cpu)))
+ while (!ksoftirqd_task(cpu))
yield();
+ }
return 0;
}

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/kernel/suspend.c linux-2.5.21.24110.updated/kernel/suspend.c
--- linux-2.5.21.24110/kernel/suspend.c Mon Jun 10 16:03:56 2002
+++ linux-2.5.21.24110.updated/kernel/suspend.c Tue Jun 11 13:53:32 2002
@@ -282,7 +282,8 @@
sh->num_physpages = num_physpages;
strncpy(sh->machine, system_utsname.machine, 8);
strncpy(sh->version, system_utsname.version, 20);
- sh->num_cpus = smp_num_cpus;
+ /* FIXME: Is this bogus? --RR */
+ sh->num_cpus = num_online_cpus();
sh->page_size = PAGE_SIZE;
sh->suspend_pagedir = pagedir_nosave;
if (pagedir_save != pagedir_nosave)
@@ -1013,7 +1014,7 @@
return sanity_check_failed("Incorrect machine type");
if(strncmp(sh->version, system_utsname.version, 20))
return sanity_check_failed("Incorrect version");
- if(sh->num_cpus != smp_num_cpus)
+ if(sh->num_cpus != num_online_cpus())
return sanity_check_failed("Incorrect number of cpus");
if(sh->page_size != PAGE_SIZE)
return sanity_check_failed("Incorrect PAGE_SIZE");
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/lib/brlock.c linux-2.5.21.24110.updated/lib/brlock.c
--- linux-2.5.21.24110/lib/brlock.c Sat Nov 10 09:11:15 2001
+++ linux-2.5.21.24110.updated/lib/brlock.c Tue Jun 11 13:53:32 2002
@@ -24,16 +24,16 @@
{
int i;

- for (i = 0; i < smp_num_cpus; i++)
- write_lock(&__brlock_array[cpu_logical_map(i)][idx]);
+ for (i = 0; i < NR_CPUS; i++)
+ write_lock(&__brlock_array[i][idx]);
}

void __br_write_unlock (enum brlock_indices idx)
{
int i;

- for (i = 0; i < smp_num_cpus; i++)
- write_unlock(&__brlock_array[cpu_logical_map(i)][idx]);
+ for (i = 0; i < NR_CPUS; i++)
+ write_unlock(&__brlock_array[i][idx]);
}

#else /* ! __BRLOCK_USE_ATOMICS */
@@ -50,8 +50,8 @@

again:
spin_lock(&__br_write_locks[idx].lock);
- for (i = 0; i < smp_num_cpus; i++)
- if (__brlock_array[cpu_logical_map(i)][idx] != 0) {
+ for (i = 0; i < NR_CPUS; i++)
+ if (__brlock_array[i][idx] != 0) {
spin_unlock(&__br_write_locks[idx].lock);
barrier();
cpu_relax();
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/mm/page_alloc.c linux-2.5.21.24110.updated/mm/page_alloc.c
--- linux-2.5.21.24110/mm/page_alloc.c Mon Jun 10 16:03:56 2002
+++ linux-2.5.21.24110.updated/mm/page_alloc.c Tue Jun 11 13:53:32 2002
@@ -574,10 +574,13 @@
int pcpu;

memset(ret, 0, sizeof(*ret));
- for (pcpu = 0; pcpu < smp_num_cpus; pcpu++) {
+ for (pcpu = 0; pcpu < NR_CPUS; pcpu++) {
struct page_state *ps;

- ps = &page_states[cpu_logical_map(pcpu)];
+ if (!cpu_online(pcpu))
+ continue;
+
+ ps = &page_states[pcpu];
ret->nr_dirty += ps->nr_dirty;
ret->nr_writeback += ps->nr_writeback;
ret->nr_pagecache += ps->nr_pagecache;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/mm/slab.c linux-2.5.21.24110.updated/mm/slab.c
--- linux-2.5.21.24110/mm/slab.c Mon May 13 12:00:40 2002
+++ linux-2.5.21.24110.updated/mm/slab.c Tue Jun 11 13:53:32 2002
@@ -941,8 +941,8 @@
down(&cache_chain_sem);
smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);

- for (i = 0; i < smp_num_cpus; i++) {
- cpucache_t* ccold = new.new[cpu_logical_map(i)];
+ for (i = 0; i < NR_CPUS; i++) {
+ cpucache_t* ccold = new.new[i];
if (!ccold || (ccold->avail == 0))
continue;
local_irq_disable();
@@ -1675,16 +1675,18 @@

memset(&new.new,0,sizeof(new.new));
if (limit) {
- for (i = 0; i< smp_num_cpus; i++) {
+ for (i = 0; i < NR_CPUS; i++) {
cpucache_t* ccnew;

ccnew = kmalloc(sizeof(void*)*limit+
sizeof(cpucache_t), GFP_KERNEL);
- if (!ccnew)
- goto oom;
+ if (!ccnew) {
+ for (i--; i >= 0; i--) kfree(new.new[i]);
+ return -ENOMEM;
+ }
ccnew->limit = limit;
ccnew->avail = 0;
- new.new[cpu_logical_map(i)] = ccnew;
+ new.new[i] = ccnew;
}
}
new.cachep = cachep;
@@ -1694,8 +1696,8 @@

smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);

- for (i = 0; i < smp_num_cpus; i++) {
- cpucache_t* ccold = new.new[cpu_logical_map(i)];
+ for (i = 0; i < NR_CPUS; i++) {
+ cpucache_t* ccold = new.new[i];
if (!ccold)
continue;
local_irq_disable();
@@ -1704,10 +1706,6 @@
kfree(ccold);
}
return 0;
-oom:
- for (i--; i >= 0; i--)
- kfree(new.new[cpu_logical_map(i)]);
- return -ENOMEM;
}

static void enable_cpucache (kmem_cache_t *cachep)
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/core/dev.c linux-2.5.21.24110.updated/net/core/dev.c
--- linux-2.5.21.24110/net/core/dev.c Mon Jun 10 16:03:56 2002
+++ linux-2.5.21.24110.updated/net/core/dev.c Tue Jun 11 13:54:41 2002
@@ -1817,11 +1817,13 @@
static int dev_proc_stats(char *buffer, char **start, off_t offset,
int length, int *eof, void *data)
{
- int i, lcpu;
+ int i;
int len = 0;

- for (lcpu = 0; lcpu < smp_num_cpus; lcpu++) {
- i = cpu_logical_map(lcpu);
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_online(i))
+ continue;
+
len += sprintf(buffer + len, "%08x %08x %08x %08x %08x %08x "
"%08x %08x %08x\n",
netdev_rx_stat[i].total,
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/ipv4/netfilter/arp_tables.c linux-2.5.21.24110.updated/net/ipv4/netfilter/arp_tables.c
--- linux-2.5.21.24110/net/ipv4/netfilter/arp_tables.c Thu Mar 21 14:14:57 2002
+++ linux-2.5.21.24110.updated/net/ipv4/netfilter/arp_tables.c Tue Jun 11 13:53:32 2002
@@ -259,7 +259,7 @@
read_lock_bh(&table->lock);
table_base = (void *)table->private->entries
+ TABLE_OFFSET(table->private,
- cpu_number_map(smp_processor_id()));
+ smp_processor_id());
e = get_entry(table_base, table->private->hook_entry[hook]);
back = get_entry(table_base, table->private->underflow[hook]);

@@ -705,7 +705,7 @@
}

/* And one copy for every other CPU */
- for (i = 1; i < smp_num_cpus; i++) {
+ for (i = 1; i < NR_CPUS; i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
@@ -756,7 +756,7 @@
unsigned int cpu;
unsigned int i;

- for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
i = 0;
ARPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
@@ -874,7 +874,7 @@
return -ENOMEM;

newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(tmp.size) * smp_num_cpus);
+ + SMP_ALIGN(tmp.size) * NR_CPUS);
if (!newinfo)
return -ENOMEM;

@@ -1143,7 +1143,7 @@

MOD_INC_USE_COUNT;
newinfo = vmalloc(sizeof(struct arpt_table_info)
- + SMP_ALIGN(table->table->size) * smp_num_cpus);
+ + SMP_ALIGN(table->table->size) * NR_CPUS);
if (!newinfo) {
ret = -ENOMEM;
MOD_DEC_USE_COUNT;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/ipv4/netfilter/ip_tables.c linux-2.5.21.24110.updated/net/ipv4/netfilter/ip_tables.c
--- linux-2.5.21.24110/net/ipv4/netfilter/ip_tables.c Wed Feb 20 17:56:17 2002
+++ linux-2.5.21.24110.updated/net/ipv4/netfilter/ip_tables.c Tue Jun 11 13:53:32 2002
@@ -288,8 +288,7 @@
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
table_base = (void *)table->private->entries
- + TABLE_OFFSET(table->private,
- cpu_number_map(smp_processor_id()));
+ + TABLE_OFFSET(table->private, smp_processor_id());
e = get_entry(table_base, table->private->hook_entry[hook]);

#ifdef CONFIG_NETFILTER_DEBUG
@@ -865,7 +864,7 @@
}

/* And one copy for every other CPU */
- for (i = 1; i < smp_num_cpus; i++) {
+ for (i = 1; i < NR_CPUS; i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
@@ -887,7 +886,7 @@
struct ipt_entry *table_base;
unsigned int i;

- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = 0; i < NR_CPUS; i++) {
table_base =
(void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i);
@@ -934,7 +933,7 @@
unsigned int cpu;
unsigned int i;

- for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
i = 0;
IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
@@ -1072,7 +1071,7 @@
return -ENOMEM;

newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(tmp.size) * smp_num_cpus);
+ + SMP_ALIGN(tmp.size) * NR_CPUS);
if (!newinfo)
return -ENOMEM;

@@ -1385,7 +1384,7 @@

MOD_INC_USE_COUNT;
newinfo = vmalloc(sizeof(struct ipt_table_info)
- + SMP_ALIGN(table->table->size) * smp_num_cpus);
+ + SMP_ALIGN(table->table->size) * NR_CPUS);
if (!newinfo) {
ret = -ENOMEM;
MOD_DEC_USE_COUNT;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/ipv4/netfilter/ipchains_core.c linux-2.5.21.24110.updated/net/ipv4/netfilter/ipchains_core.c
--- linux-2.5.21.24110/net/ipv4/netfilter/ipchains_core.c Mon Jun 10 16:03:56 2002
+++ linux-2.5.21.24110.updated/net/ipv4/netfilter/ipchains_core.c Tue Jun 11 13:53:32 2002
@@ -125,8 +125,8 @@
* UP.
*
* For backchains and counters, we use an array, indexed by
- * [cpu_number_map[smp_processor_id()]*2 + !in_interrupt()]; the array is of
- * size [smp_num_cpus*2]. For v2.0, smp_num_cpus is effectively 1. So,
+ * [smp_processor_id()*2 + !in_interrupt()]; the array is of
+ * size [NR_CPUS*2]. For v2.0, NR_CPUS is effectively 1. So,
* confident of uniqueness, we modify counters even though we only
* have a read lock (to read the counters, you need a write lock,
* though). */
@@ -151,11 +151,11 @@
#endif

#ifdef CONFIG_SMP
-#define SLOT_NUMBER() (cpu_number_map(smp_processor_id())*2 + !in_interrupt())
+#define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt())
#else /* !SMP */
#define SLOT_NUMBER() (!in_interrupt())
#endif /* CONFIG_SMP */
-#define NUM_SLOTS (smp_num_cpus*2)
+#define NUM_SLOTS (NR_CPUS*2)

#define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
+ NUM_SLOTS*sizeof(struct ip_reent))
@@ -1122,7 +1122,7 @@
label->chain = NULL;
label->refcount = ref;
label->policy = policy;
- for (i = 0; i < smp_num_cpus*2; i++) {
+ for (i = 0; i < NUM_SLOTS; i++) {
label->reent[i].counters.pcnt = label->reent[i].counters.bcnt
= 0;
label->reent[i].prevchain = NULL;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/ipv4/proc.c linux-2.5.21.24110.updated/net/ipv4/proc.c
--- linux-2.5.21.24110/net/ipv4/proc.c Thu May 17 03:21:45 2001
+++ linux-2.5.21.24110.updated/net/ipv4/proc.c Tue Jun 11 13:53:32 2002
@@ -55,8 +55,8 @@
int res = 0;
int cpu;

- for (cpu=0; cpu<smp_num_cpus; cpu++)
- res += proto->stats[cpu_logical_map(cpu)].inuse;
+ for (cpu=0; cpu<NR_CPUS; cpu++)
+ res += proto->stats[cpu].inuse;

return res;
}
@@ -103,9 +103,9 @@

sz /= sizeof(unsigned long);

- for (i=0; i<smp_num_cpus; i++) {
- res += begin[2*cpu_logical_map(i)*sz + nr];
- res += begin[(2*cpu_logical_map(i)+1)*sz + nr];
+ for (i=0; i<NR_CPUS; i++) {
+ res += begin[2*i*sz + nr];
+ res += begin[(2*i+1)*sz + nr];
}
return res;
}
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/ipv4/route.c linux-2.5.21.24110.updated/net/ipv4/route.c
--- linux-2.5.21.24110/net/ipv4/route.c Mon May 13 12:00:40 2002
+++ linux-2.5.21.24110.updated/net/ipv4/route.c Tue Jun 11 13:53:32 2002
@@ -280,12 +280,10 @@
static int rt_cache_stat_get_info(char *buffer, char **start, off_t offset, int length)
{
unsigned int dst_entries = atomic_read(&ipv4_dst_ops.entries);
- int i, lcpu;
+ int i;
int len = 0;

- for (lcpu = 0; lcpu < smp_num_cpus; lcpu++) {
- i = cpu_logical_map(lcpu);
-
+ for (i = 0; i < NR_CPUS; i++) {
len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
dst_entries,
rt_cache_stat[i].in_hit,
@@ -2437,19 +2435,16 @@
memcpy(dst, src, length);

#ifdef CONFIG_SMP
- if (smp_num_cpus > 1 || cpu_logical_map(0) != 0) {
+ /* Alexey, be ashamed: speed gained, horror unleashed. --RR */
+ if (num_online_cpus() > 1 || !cpu_online(0)) {
int i;
int cnt = length / 4;

- for (i = 0; i < smp_num_cpus; i++) {
- int cpu = cpu_logical_map(i);
+ for (i = 1; i < NR_CPUS; i++) {
int k;

- if (cpu == 0)
- continue;
-
src = (u32*)(((u8*)ip_rt_acct) + offset +
- cpu * 256 * sizeof(struct ip_rt_acct));
+ i * 256 * sizeof(struct ip_rt_acct));

for (k = 0; k < cnt; k++)
dst[k] += src[k];
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/ipv6/netfilter/ip6_tables.c linux-2.5.21.24110.updated/net/ipv6/netfilter/ip6_tables.c
--- linux-2.5.21.24110/net/ipv6/netfilter/ip6_tables.c Mon May 6 11:12:01 2002
+++ linux-2.5.21.24110.updated/net/ipv6/netfilter/ip6_tables.c Tue Jun 11 13:53:32 2002
@@ -336,8 +336,7 @@
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
table_base = (void *)table->private->entries
- + TABLE_OFFSET(table->private,
- cpu_number_map(smp_processor_id()));
+ + TABLE_OFFSET(table->private, smp_processor_id());
e = get_entry(table_base, table->private->hook_entry[hook]);

#ifdef CONFIG_NETFILTER_DEBUG
@@ -913,7 +912,7 @@
}

/* And one copy for every other CPU */
- for (i = 1; i < smp_num_cpus; i++) {
+ for (i = 1; i < NR_CPUS; i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
newinfo->entries,
SMP_ALIGN(newinfo->size));
@@ -935,7 +934,7 @@
struct ip6t_entry *table_base;
unsigned int i;

- for (i = 0; i < smp_num_cpus; i++) {
+ for (i = 0; i < NR_CPUS; i++) {
table_base =
(void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i);
@@ -982,7 +981,7 @@
unsigned int cpu;
unsigned int i;

- for (cpu = 0; cpu < smp_num_cpus; cpu++) {
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
i = 0;
IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
t->size,
@@ -1116,7 +1115,7 @@
return -ENOMEM;

newinfo = vmalloc(sizeof(struct ip6t_table_info)
- + SMP_ALIGN(tmp.size) * smp_num_cpus);
+ + SMP_ALIGN(tmp.size) * NR_CPUS);
if (!newinfo)
return -ENOMEM;

@@ -1429,7 +1428,7 @@

MOD_INC_USE_COUNT;
newinfo = vmalloc(sizeof(struct ip6t_table_info)
- + SMP_ALIGN(table->table->size) * smp_num_cpus);
+ + SMP_ALIGN(table->table->size) * NR_CPUS);
if (!newinfo) {
ret = -ENOMEM;
MOD_DEC_USE_COUNT;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/ipv6/proc.c linux-2.5.21.24110.updated/net/ipv6/proc.c
--- linux-2.5.21.24110/net/ipv6/proc.c Wed Feb 20 17:57:22 2002
+++ linux-2.5.21.24110.updated/net/ipv6/proc.c Tue Jun 11 13:53:32 2002
@@ -31,8 +31,8 @@
int res = 0;
int cpu;

- for (cpu=0; cpu<smp_num_cpus; cpu++)
- res += proto->stats[cpu_logical_map(cpu)].inuse;
+ for (cpu=0; cpu<NR_CPUS; cpu++)
+ res += proto->stats[cpu].inuse;

return res;
}
@@ -140,9 +140,9 @@
unsigned long res = 0;
int i;

- for (i=0; i<smp_num_cpus; i++) {
- res += ptr[2*cpu_logical_map(i)*size];
- res += ptr[(2*cpu_logical_map(i)+1)*size];
+ for (i=0; i<NR_CPUS; i++) {
+ res += ptr[2*i*size];
+ res += ptr[(2*i+1)*size];
}

return res;
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21.24110/net/socket.c linux-2.5.21.24110.updated/net/socket.c
--- linux-2.5.21.24110/net/socket.c Thu Mar 21 14:14:57 2002
+++ linux-2.5.21.24110.updated/net/socket.c Tue Jun 11 13:53:32 2002
@@ -1773,8 +1773,8 @@
int len, cpu;
int counter = 0;

- for (cpu=0; cpu<smp_num_cpus; cpu++)
- counter += sockets_in_use[cpu_logical_map(cpu)].counter;
+ for (cpu=0; cpu<NR_CPUS; cpu++)
+ counter += sockets_in_use[cpu].counter;

/* It can be negative, by the way. 8) */
if (counter < 0)


2002-06-11 07:39:18

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Rusty Russell wrote:
>
> Linus, please apply. Tested on my dual x86 box.
>
> This patch removes smp_num_cpus, cpu_number_map and cpu_logical_map
> from generic code, and uses cpu_online(cpu) instead, in preparation
> for hotplug CPUS.

umm. This patch does introduce a non-zero amount of bloat:

> ...
> - ntfs_compression_buffers = (u8**)kmalloc(smp_num_cpus * sizeof(u8*),
> + ntfs_compression_buffers = (u8**)kmalloc(NR_CPUS * sizeof(u8*),

and slowdown:

> ...
> --- linux-2.5.21.24110/kernel/sched.c Mon Jun 10 16:03:56 2002
> +++ linux-2.5.21.24110.updated/kernel/sched.c Tue Jun 11 13:53:32 2002
> ...
> @@ -530,15 +530,16 @@
>
> busiest = NULL;
> max_load = 1;
> - for (i = 0; i < smp_num_cpus; i++) {
> - int logical = cpu_logical_map(i);
> + for (i = 0; i < NR_CPUS; i++) {
> + if (!cpu_online(i))
> + continue;
>

and for the majority of SMP machines it gives nothing back, yes?

Is there some way of optimising all that?


-

2002-06-11 09:09:19

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <[email protected]> you write:
> and slowdown:

ARGH! STOP IT! I realize it's 'leet to be continually worrying about
possible microoptimizations, but I challenge you to *measure* the
slowdown between:

> > - for (i = 0; i < smp_num_cpus; i++) {
> > - int logical = cpu_logical_map(i);

and

> > + for (i = 0; i < NR_CPUS; i++) {
> > + if (!cpu_online(i))
> > + continue;

*Especially* in this context. Sure, a new "max_cpu_number" or
"cpu_for_each(i)" macro would fix this, but at the expense of using up
additional stack in the reader's brain.

Let's not perpetuate the myth that everything in the kernel needs to
be tuned to the last cycle at all costs, hm?

Yes, you stepped on a sore point 8)
Rusty.
PS. Of course, you know the correct answer, anyway.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-06-11 09:15:26

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

From: Rusty Russell <[email protected]>
Date: Tue, 11 Jun 2002 19:09:44 +1000

In message <[email protected]> you write:
> and slowdown:

ARGH! STOP IT! I realize it's 'leet to be continually worrying about
possible microoptimizations, but I challenge you to *measure* the
slowdown between:

Regardless, his space arguments still hold.

I don't like having everyone eat the overhead that hotplugging cpus
seem to entail.

And remember, it's the anal "every microoptimization at all costs"
people that keep the kernel sane and from running out of control bloat
wise. Yes, I realize it's a pain in the ass because you might have to
use your brain from time to time to reimplement things to make the
cycle counters happy, but such is life.

2002-06-11 09:23:50

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Rusty Russell wrote:
>
> ...
> Let's not perpetuate the myth that everything in the kernel needs to
> be tuned to the last cycle at all costs, hm?

I was more concerned about the RAM use, actually.

This patch is an additional reason for CONFIG_NR_CPUS, but I've rather
gone cold on that idea because the "proper fix" is to make all those
huge per-cpu arrays dynamically allocated. So you can run a 64p kernel
on 2p without losing hundreds of k of memory and kernel address space.

But it looks like all those dynamically-allocated structures would
have to be allocated out to NR_CPUS anyway, to support hotplug, yes?

In which case, CONFIG_NR_CPUS is the only way to get the memory
back...

-

2002-06-11 10:53:48

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 08:42 11/06/02, Andrew Morton wrote:
>Rusty Russell wrote:
> >
> > Linus, please apply. Tested on my dual x86 box.
> >
> > This patch removes smp_num_cpus, cpu_number_map and cpu_logical_map
> > from generic code, and uses cpu_online(cpu) instead, in preparation
> > for hotplug CPUS.
>
>umm. This patch does introduce a non-zero amount of bloat:
>
> > ...
> > - ntfs_compression_buffers = (u8**)kmalloc(smp_num_cpus *
> sizeof(u8*),
> > + ntfs_compression_buffers = (u8**)kmalloc(NR_CPUS * sizeof(u8*),

This is crazy! It means you are allocating 2MiB of memory instead of just
128kiB on a 2 CPU system, which will be about 99% of the SMP systems in
use, at my guess. So your change is throwing away 1920kiB of kernel ram for
no reason at all. And that is just ntfs...

CPU hot plugging is an extremely specialised corner case so can you please
make it a config option and not get rid of smp_num_cpus? If people enable
the option make smp_num_cpus be the same as NR_CPUS and if not leave it be
as it is now.

Anything else penalizes the majority of users just to allow a tiny minority
to do strange things like swap cpus without rebooting...

Anton



--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-11 11:18:29

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 10:27 11/06/02, Andrew Morton wrote:
>Rusty Russell wrote:
> >
> > ...
> > Let's not perpetuate the myth that everything in the kernel needs to
> > be tuned to the last cycle at all costs, hm?
>
>I was more concerned about the RAM use, actually.
>
>This patch is an additional reason for CONFIG_NR_CPUS, but I've rather
>gone cold on that idea because the "proper fix" is to make all those
>huge per-cpu arrays dynamically allocated. So you can run a 64p kernel
>on 2p without losing hundreds of k of memory and kernel address space.
>
>But it looks like all those dynamically-allocated structures would
>have to be allocated out to NR_CPUS anyway, to support hotplug, yes?
>
>In which case, CONFIG_NR_CPUS is the only way to get the memory
>back...

Why? You can get rid of all uses of NR_CPUS (except for using it as a max
capping value so none goes above it) and always use smp_num_cpus instead.
And make the cpu hotplug code update smp_num_cpus as appropriate.

All code relying on smp_num_cpus for per-cpu buffers can do a check whether
the current cpu is greater than the value of smp_num_cpus at per-cpu buffer
allocation time and if so lock the kernel (or only the buffers if possible)
and grow the buffer allocation up to the new smp_num_cpus value. And all
that can be done nicely out of line in a really, really, snail speed slow
path... The fastpath only needs to contain:

cpu = smp_processor_id();
#ifdef CONFIG_HOTPLUG_CPU
if (unlikely(cpu >= old_smp_num_cpus))
goto snail_path;
snail_path_done:
#endif

So zero penalty for non-hotplug users and loads of penalty for hotplug
users but frankly I couldn't care less for those. The slow path will
trigger so seldom it is not worth thinking about the performance hit there.

You could even make the above look nicer by making it a function like:

cpu = smp_processor_id();
check_for_cpu_hotplug_event(cpu, old_smp_num_cpus, our_hotplug_callback);

And let our_hotplug_callback() deal with the case where cpu is >=
old_smp_num_cpus, for example for ntfs that would involve extending the
number of per-cpu buffers. And in the !CONFIG_HOTPLUG_CPU case the whole
check_for_cpu_hotplug_event function becomes a NOP. All in the spirit of
not having #ifdefs sprinkled around the code.

There are a lot of ways to deal with this corner case dynamically, so
please use one of them. I don't buy the "lets penalise 99% of users for the
sake of a feature that almost noone will ever use" argument.

Best regards,

Anton



--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-11 14:33:30

by Denis Vlasenko

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On 11 June 2002 08:57, Anton Altaparmakov wrote:
> At 08:42 11/06/02, Andrew Morton wrote:
> >Rusty Russell wrote:
> > > Linus, please apply. Tested on my dual x86 box.
> > >
> > > This patch removes smp_num_cpus, cpu_number_map and cpu_logical_map
> > > from generic code, and uses cpu_online(cpu) instead, in preparation
> > > for hotplug CPUS.
> >
> >umm. This patch does introduce a non-zero amount of bloat:
> > > ...
> > > - ntfs_compression_buffers = (u8**)kmalloc(smp_num_cpus *
> >
> > sizeof(u8*),
> >
> > > + ntfs_compression_buffers = (u8**)kmalloc(NR_CPUS *
> > > sizeof(u8*),
>
> This is crazy! It means you are allocating 2MiB of memory instead of just
> 128kiB on a 2 CPU system, which will be about 99% of the SMP systems in
> use, at my guess. So your change is throwing away 1920kiB of kernel ram for
> no reason at all. And that is just ntfs...

Wait a minute.
These buffers are allocated per CPU. Can we allocate additional ones when
new CPU is added? I do hope these buffers aren't allocated an boot time but
at mount time, are they?

I'm sorry it sounds like NTFS code needs rework, not Rusty's patch.
Feel free to enlighten me why I am wrong.
--
vda

2002-06-11 14:54:20

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 20:29 11/06/02, Denis Vlasenko wrote:
>On 11 June 2002 08:57, Anton Altaparmakov wrote:
> > At 08:42 11/06/02, Andrew Morton wrote:
> > >Rusty Russell wrote:
> > > > Linus, please apply. Tested on my dual x86 box.
> > > >
> > > > This patch removes smp_num_cpus, cpu_number_map and cpu_logical_map
> > > > from generic code, and uses cpu_online(cpu) instead, in preparation
> > > > for hotplug CPUS.
> > >
> > >umm. This patch does introduce a non-zero amount of bloat:
> > > > ...
> > > > - ntfs_compression_buffers = (u8**)kmalloc(smp_num_cpus *
> > >
> > > sizeof(u8*),
> > >
> > > > + ntfs_compression_buffers = (u8**)kmalloc(NR_CPUS *
> > > > sizeof(u8*),
> >
> > This is crazy! It means you are allocating 2MiB of memory instead of just
> > 128kiB on a 2 CPU system, which will be about 99% of the SMP systems in
> > use, at my guess. So your change is throwing away 1920kiB of kernel ram for
> > no reason at all. And that is just ntfs...
>
>Wait a minute.
>These buffers are allocated per CPU. Can we allocate additional ones when
>new CPU is added?

Of course, see my suggestion for how to handle this in the post after the
one you replied to.

>I do hope these buffers aren't allocated an boot time but at mount time,
>are they?

At mount time and only if the volume supports compression. And they are
ntfs global, i.e. not per mount point. That is still a big ram waste.

>I'm sorry it sounds like NTFS code needs rework, not Rusty's patch.

Sorry to disappoint you but my code is as efficient as possible while
NR_CPUs is as ugly and inefficient as hell.

>Feel free to enlighten me why I am wrong.

I hope I have managed to do that. (-:

Best regards,

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-11 17:59:31

by Robert Love

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Tue, 2002-06-11 at 03:57, Anton Altaparmakov wrote:

> This is crazy! It means you are allocating 2MiB of memory instead of just
> 128kiB on a 2 CPU system, which will be about 99% of the SMP systems in
> use, at my guess. So your change is throwing away 1920kiB of kernel ram for
> no reason at all. And that is just ntfs...
>
> CPU hot plugging is an extremely specialised corner case so can you please
> make it a config option and not get rid of smp_num_cpus? If people enable
> the option make smp_num_cpus be the same as NR_CPUS and if not leave it be
> as it is now.

I agree. One can argue these rants are just for "micro optimizations"
(although I disagree the size issue is "micro") but someone has to stay
on top of these issues...

Hot swappable CPUs is incredibly specialized and corner-cased.

> Anything else penalizes the majority of users just to allow a tiny minority
> to do strange things like swap cpus without rebooting...

It is by no means a solution, but I just posted a patch to configure
NR_CPUS... so setting it to, say, 2 on your dual box should help you
out. On the converse, however, it introduces a default of 64 on 64-bit
boxen so it compounds the problem for users who don't tweak the
setting... something still needs to be done with the hotplug code.

Robert Love

2002-06-11 18:02:56

by Robert Love

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Tue, 2002-06-11 at 12:29, Denis Vlasenko wrote:

> I'm sorry it sounds like NTFS code needs rework, not Rusty's patch.
> Feel free to enlighten me why I am wrong.

Uh no. We have both static (NR_CPUS) and dynamic (smp_num_cpus) code in
the kernel... both are legit for different purposes.

This patch takes Anton's code and swaps a kmalloc based on smp_num_cpus
to NR_CPUS. I.e., on my 2-way machine I use 16x more memory.

Robert Love

2002-06-12 00:51:41

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <[email protected]> you write:
> Rusty Russell wrote:
> >
> > ...
> > Let's not perpetuate the myth that everything in the kernel needs to
> > be tuned to the last cycle at all costs, hm?
>
> I was more concerned about the RAM use, actually.
>
> This patch is an additional reason for CONFIG_NR_CPUS, but I've rather
> gone cold on that idea because the "proper fix" is to make all those
> huge per-cpu arrays dynamically allocated. So you can run a 64p kernel
> on 2p without losing hundreds of k of memory and kernel address space.
>
> But it looks like all those dynamically-allocated structures would
> have to be allocated out to NR_CPUS anyway, to support hotplug, yes?
>
> In which case, CONFIG_NR_CPUS is the only way to get the memory
> back...

Precisely. Previously, the assumption was that if you're SMP, memory
is cheap. To be frank, it's still true, but I don't want to
discourage any sign of a "small is beautiful" mindset 8)

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-06-12 04:41:58

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <1023818365.21176.237.camel@sinai> you write:
> On Tue, 2002-06-11 at 03:57, Anton Altaparmakov wrote:
>
> > This is crazy! It means you are allocating 2MiB of memory instead of just
> > 128kiB on a 2 CPU system, which will be about 99% of the SMP systems in
> > use, at my guess. So your change is throwing away 1920kiB of kernel ram for

> > no reason at all. And that is just ntfs...
> >
> > CPU hot plugging is an extremely specialised corner case so can you please
> > make it a config option and not get rid of smp_num_cpus? If people enable
> > the option make smp_num_cpus be the same as NR_CPUS and if not leave it be
> > as it is now.
>
> I agree. One can argue these rants are just for "micro optimizations"
> (although I disagree the size issue is "micro") but someone has to stay
> on top of these issues...
>
> Hot swappable CPUs is incredibly specialized and corner-cased.

Not once the boot sequence is changed to plug CPUs in: then every SMP
box becomes "hot plug".

> It is by no means a solution, but I just posted a patch to configure
> NR_CPUS... so setting it to, say, 2 on your dual box should help you
> out. On the converse, however, it introduces a default of 64 on 64-bit
> boxen so it compounds the problem for users who don't tweak the
> setting... something still needs to be done with the hotplug code.

Andrew Morton did as well. Better would probably be to replace
CONFIG_SMP with CONFIG_NUM_CPUS (tested patch below, 'cept UP doesn't
seem to build anyway). Note that NR_CPUS is in fact the ceiling of
smp_processor_id(), which on some architectures may mean that NR_CPUS
still has to be (say) 32 even if you only have 2 CPUs.

(Note to self: check each arch's Config.help for x86isms),
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/include/linux/threads.h working-2.5.21-numcpus/include/linux/threads.h
--- linux-2.5.21/include/linux/threads.h Sat May 18 15:53:43 2002
+++ working-2.5.21-numcpus/include/linux/threads.h Wed Jun 12 12:55:58 2002
@@ -7,12 +7,8 @@
* The default limit for the nr of threads is now in
* /proc/sys/kernel/threads-max.
*/
-
-#ifdef CONFIG_SMP
-#define NR_CPUS 32 /* Max processors that can be running in SMP */
-#else
-#define NR_CPUS 1
-#endif
+
+#define NR_CPUS CONFIG_MAX_CPUS

#define MIN_THREADS_LEFT_FOR_ROOT 4

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/Documentation/DocBook/kernel-locking.tmpl working-2.5.21-numcpus/Documentation/DocBook/kernel-locking.tmpl
--- linux-2.5.21/Documentation/DocBook/kernel-locking.tmpl Fri Mar 8 14:49:09 2002
+++ working-2.5.21-numcpus/Documentation/DocBook/kernel-locking.tmpl Wed Jun 12 13:33:57 2002
@@ -220,15 +220,15 @@
<title>Locks and Uniprocessor Kernels</title>

<para>
- For kernels compiled without <symbol>CONFIG_SMP</symbol>, spinlocks
+ For kernels compiled with <symbol>CONFIG_MAX_CPUS</symbol> set to 1, spinlocks
do not exist at all. This is an excellent design decision: when
no-one else can run at the same time, there is no reason to
have a lock at all.
</para>

<para>
- You should always test your locking code with <symbol>CONFIG_SMP</symbol>
- enabled, even if you don't have an SMP test box, because it
+ You should always test your locking code with <symbol>CONFIG_MAX_CPUS</symbol>
+ set to 2 or more, even if you don't have an SMP test box, because it
will still catch some (simple) kinds of deadlock.
</para>

@@ -546,7 +546,7 @@
Both of these are called deadlock, and as shown above, it can
occur even with a single CPU (although not on UP compiles,
since spinlocks vanish on kernel compiles with
- <symbol>CONFIG_SMP</symbol>=n. You'll still get data corruption
+ <symbol>CONFIG_MAX_CPUS</symbol>=1. You'll still get data corruption
in the second example).
</para>

@@ -1157,7 +1157,7 @@
<glossdef>
<para>
Symmetric Multi-Processor: kernels compiled for multiple-CPU
- machines. (CONFIG_SMP=y).
+ machines. (CONFIG_MAX_CPUS > 1).
</para>
</glossdef>
</glossentry>
@@ -1200,7 +1200,7 @@
<glossterm><acronym>UP</acronym></glossterm>
<glossdef>
<para>
- Uni-Processor: Non-SMP. (CONFIG_SMP=n).
+ Uni-Processor: Non-SMP. (CONFIG_MAX_CPUS=1).
</para>
</glossdef>
</glossentry>
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/alpha/Config.help working-2.5.21-numcpus/arch/alpha/Config.help
--- linux-2.5.21/arch/alpha/Config.help Thu Mar 21 14:14:37 2002
+++ working-2.5.21-numcpus/arch/alpha/Config.help Wed Jun 12 13:25:23 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, enter
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_ALPHA
The Alpha is a 64-bit general-purpose processor designed and
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/alpha/config.in working-2.5.21-numcpus/arch/alpha/config.in
--- linux-2.5.21/arch/alpha/config.in Thu May 30 10:00:46 2002
+++ working-2.5.21-numcpus/arch/alpha/config.in Wed Jun 12 13:37:29 2002
@@ -227,7 +227,11 @@
-o "$CONFIG_ALPHA_TITAN" = "y" -o "$CONFIG_ALPHA_GENERIC" = "y" \
-o "$CONFIG_ALPHA_SHARK" = "y" ]
then
- bool 'Symmetric multi-processing support' CONFIG_SMP
+ int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+ if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+else
+ define_int CONFIG_MAX_CPUS 1
fi

if [ "$CONFIG_SMP" = "y" ]; then
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/i386/Config.help working-2.5.21-numcpus/arch/i386/Config.help
--- linux-2.5.21/arch/i386/Config.help Sat May 25 14:34:36 2002
+++ working-2.5.21-numcpus/arch/i386/Config.help Wed Jun 12 13:23:03 2002
@@ -1,29 +1,25 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
-
- If you say N here, the kernel will run on single and multiprocessor
- machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
- singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
+CONFIG_MAX_CPUS
+ You can enable support for systems with more than one CPU. If you have
+ a system with only one CPU, like most personal computers, say 1 for a
+ smaller, faster kernel. If you have a system with more than one CPU,
+ enter the number of CPUs you have (each extra CPU supported uses a
+ little more memory).

- Note that if you say Y here and choose architecture "586" or
+ Note that if you enter 2 or more here and choose architecture "586" or
"Pentium" under "Processor family", the kernel will not work on 486
architectures. Similarly, multiprocessor kernels for the "PPro"
architecture may not work on all Pentium based boards.

- People using multiprocessor machines who say Y here should also say
+ People using multiprocessor machines who say 2 or more here should also say
Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ Management" code will be disabled if you say 2 or more here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_PREEMPT
This option reduces the latency of the kernel when reacting to
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/i386/config.in working-2.5.21-numcpus/arch/i386/config.in
--- linux-2.5.21/arch/i386/config.in Mon Jun 10 16:03:47 2002
+++ working-2.5.21-numcpus/arch/i386/config.in Wed Jun 12 13:37:39 2002
@@ -185,7 +185,10 @@

bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
-bool 'Symmetric multi-processing support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
bool 'Preemptible Kernel' CONFIG_PREEMPT
if [ "$CONFIG_SMP" != "y" ]; then
bool 'Local APIC support on uniprocessors' CONFIG_X86_UP_APIC
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/i386/kernel/i386_ksyms.c working-2.5.21-numcpus/arch/i386/kernel/i386_ksyms.c
--- linux-2.5.21/arch/i386/kernel/i386_ksyms.c Thu May 30 10:00:47 2002
+++ working-2.5.21-numcpus/arch/i386/kernel/i386_ksyms.c Wed Jun 12 13:51:29 2002
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/tty.h>
+#include <linux/spinlock.h>

#include <asm/semaphore.h>
#include <asm/processor.h>
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/ia64/Config.help working-2.5.21-numcpus/arch/ia64/Config.help
--- linux-2.5.21/arch/ia64/Config.help Thu May 30 10:00:47 2002
+++ working-2.5.21-numcpus/arch/ia64/Config.help Wed Jun 12 13:26:26 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_IA64
The Itanium is Intel's 64-bit successor to the 32-bit X86 line. As
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/ia64/config.in working-2.5.21-numcpus/arch/ia64/config.in
--- linux-2.5.21/arch/ia64/config.in Thu May 30 10:00:47 2002
+++ working-2.5.21-numcpus/arch/ia64/config.in Wed Jun 12 13:37:46 2002
@@ -89,7 +89,10 @@

define_bool CONFIG_KCORE_ELF y # On IA-64, we always want an ELF /proc/kcore.

-bool 'SMP support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
bool 'Support running of Linux/x86 binaries' CONFIG_IA32_SUPPORT
bool 'Performance monitor support' CONFIG_PERFMON
tristate '/proc/pal support' CONFIG_IA64_PALINFO
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/mips/Config.help working-2.5.21-numcpus/arch/mips/Config.help
--- linux-2.5.21/arch/mips/Config.help Thu Mar 21 14:14:41 2002
+++ working-2.5.21-numcpus/arch/mips/Config.help Wed Jun 12 13:27:19 2002
@@ -1,30 +1,3 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
-
- If you say N here, the kernel will run on single and multiprocessor
- machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
- singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
-
- See also the <file:Documentation/smp.tex>,
- <file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
- <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
- <http://www.linuxdoc.org/docs.html#howto>.
-
- If you don't know what to do here, say N.
-
CONFIG_IDE
If you say Y here, your kernel will be able to manage low cost mass
storage units such as ATA/(E)IDE and ATAPI units. The most common
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/mips/config.in working-2.5.21-numcpus/arch/mips/config.in
--- linux-2.5.21/arch/mips/config.in Tue Apr 23 11:39:33 2002
+++ working-2.5.21-numcpus/arch/mips/config.in Wed Jun 12 12:50:53 2002
@@ -4,6 +4,7 @@
#
define_bool CONFIG_MIPS y
define_bool CONFIG_SMP n
+define_bool CONFIG_NUM_CPUS 1

mainmenu_name "Linux Kernel Configuration"

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/mips64/Config.help working-2.5.21-numcpus/arch/mips64/Config.help
--- linux-2.5.21/arch/mips64/Config.help Thu Mar 21 14:14:41 2002
+++ working-2.5.21-numcpus/arch/mips64/Config.help Wed Jun 12 13:27:35 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_IDE
If you say Y here, your kernel will be able to manage low cost mass
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/mips64/config.in working-2.5.21-numcpus/arch/mips64/config.in
--- linux-2.5.21/arch/mips64/config.in Tue Apr 23 11:39:33 2002
+++ working-2.5.21-numcpus/arch/mips64/config.in Wed Jun 12 13:37:50 2002
@@ -19,8 +19,13 @@
bool ' Mapped kernel support' CONFIG_MAPPED_KERNEL
bool ' Kernel text replication support' CONFIG_REPLICATE_KTEXT
bool ' Exception handler replication support' CONFIG_REPLICATE_EXHANDLERS
- bool ' Multi-Processing support' CONFIG_SMP
+ int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+ if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+ fi
#bool ' IP27 XXL' CONFIG_SGI_SN0_XXL
+else
+ define_int CONFIG_MAX_CPUS 1
fi
endmenu

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/parisc/Config.help working-2.5.21-numcpus/arch/parisc/Config.help
--- linux-2.5.21/arch/parisc/Config.help Thu Mar 21 14:14:41 2002
+++ working-2.5.21-numcpus/arch/parisc/Config.help Wed Jun 12 13:27:50 2002
@@ -1,30 +1,3 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
-
- If you say N here, the kernel will run on single and multiprocessor
- machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
- singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
-
- See also the <file:Documentation/smp.tex>,
- <file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
- <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
- <http://www.linuxdoc.org/docs.html#howto>.
-
- If you don't know what to do here, say N.
-
CONFIG_PARISC
The PA-RISC microprocessor is a RISC chip designed by
Hewlett-Packard and used in their line of workstations. The PA-RISC
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/parisc/config.in working-2.5.21-numcpus/arch/parisc/config.in
--- linux-2.5.21/arch/parisc/config.in Wed Feb 20 17:57:02 2002
+++ working-2.5.21-numcpus/arch/parisc/config.in Wed Jun 12 12:53:11 2002
@@ -16,6 +16,7 @@
comment 'General options'

# bool 'Symmetric multi-processing support' CONFIG_SMP
+define_int CONFIG_MAX_CPUS 1
define_bool CONFIG_SMP n

bool 'Kernel Debugger support' CONFIG_KWDB
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/ppc/Config.help working-2.5.21-numcpus/arch/ppc/Config.help
--- linux-2.5.21/arch/ppc/Config.help Thu May 30 10:00:48 2002
+++ working-2.5.21-numcpus/arch/ppc/Config.help Wed Jun 12 13:28:54 2002
@@ -1,25 +1,25 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, say N. If you have a system with more
- than one CPU, say Y. Note that the kernel does not currently
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs. Note that the kernel does not currently
support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
since they have inadequate hardware support for multiprocessor
operation.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on single-processor machines.
- On a single-processor machine, the kernel will run faster if you say
- N here.
+ you say 2 or more here, the kernel will run on many, but not all,
+ singleprocessor machines. On a singleprocessor machine, the kernel
+ will run faster if you say 1 here.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_PREEMPT
This option reduces the latency of the kernel when reacting to
real-time or interactive events by allowing a low priority process to
be preempted even if it is in kernel mode executing a system call.
- Unfortunately the kernel code has some race conditions if both
- CONFIG_SMP and CONFIG_PREEMPT are enabled, so this option is
+ Unfortunately the kernel code has some race conditions if
+ CONFIG_MAX_CPUS is greater than one and CONFIG_PREEMPT is enabled, so this option is
currently disabled if you are building an SMP kernel.

Say Y here if you are building a kernel for a desktop, embedded
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/ppc/config.in working-2.5.21-numcpus/arch/ppc/config.in
--- linux-2.5.21/arch/ppc/config.in Thu May 30 10:00:49 2002
+++ working-2.5.21-numcpus/arch/ppc/config.in Wed Jun 12 13:37:59 2002
@@ -169,7 +169,10 @@
define_bool CONFIG_ALL_PPC n
fi

-bool 'Symmetric multi-processing support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
if [ "$CONFIG_SMP" = "y" ]; then
bool ' Distribute interrupts on all CPUs by default' CONFIG_IRQ_ALL_CPUS
fi
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/ppc64/config.in working-2.5.21-numcpus/arch/ppc64/config.in
--- linux-2.5.21/arch/ppc64/config.in Mon Jun 3 12:21:20 2002
+++ working-2.5.21-numcpus/arch/ppc64/config.in Wed Jun 12 13:38:02 2002
@@ -18,7 +18,10 @@
define_bool CONFIG_PPC y
define_bool CONFIG_PPC64 y

-bool 'Symmetric multi-processing support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
if [ "$CONFIG_SMP" = "y" ]; then
bool ' Distribute interrupts on all CPUs by default' CONFIG_IRQ_ALL_CPUS
if [ "$CONFIG_PPC_PSERIES" = "y" ]; then
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/s390/Config.help working-2.5.21-numcpus/arch/s390/Config.help
--- linux-2.5.21/arch/s390/Config.help Mon Jun 10 16:03:47 2002
+++ working-2.5.21-numcpus/arch/s390/Config.help Wed Jun 12 13:29:33 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_MATHEMU
This option is required for IEEE compliant floating point arithmetic
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/s390/config.in working-2.5.21-numcpus/arch/s390/config.in
--- linux-2.5.21/arch/s390/config.in Mon Jun 10 16:03:47 2002
+++ working-2.5.21-numcpus/arch/s390/config.in Wed Jun 12 13:38:05 2002
@@ -18,7 +18,10 @@

mainmenu_option next_comment
comment 'Processor type and features'
-bool 'Symmetric multi-processing support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
bool 'IEEE FPU emulation' CONFIG_MATHEMU
endmenu

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/s390x/Config.help working-2.5.21-numcpus/arch/s390x/Config.help
--- linux-2.5.21/arch/s390x/Config.help Mon Jun 10 16:03:48 2002
+++ working-2.5.21-numcpus/arch/s390x/Config.help Wed Jun 12 13:29:38 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_ISA
Find out whether you have ISA slots on your motherboard. ISA is the
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/s390x/config.in working-2.5.21-numcpus/arch/s390x/config.in
--- linux-2.5.21/arch/s390x/config.in Mon Jun 10 16:03:48 2002
+++ working-2.5.21-numcpus/arch/s390x/config.in Wed Jun 12 13:38:09 2002
@@ -18,7 +18,10 @@

mainmenu_option next_comment
comment 'Processor type and features'
-bool 'Symmetric multi-processing support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
bool 'Kernel support for 31 bit emulation' CONFIG_S390_SUPPORT
if [ "$CONFIG_S390_SUPPORT" = "y" ]; then
tristate 'Kernel support for 31 bit ELF binaries' CONFIG_BINFMT_ELF32
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/sparc/Config.help working-2.5.21-numcpus/arch/sparc/Config.help
--- linux-2.5.21/arch/sparc/Config.help Mon May 6 11:11:52 2002
+++ working-2.5.21-numcpus/arch/sparc/Config.help Wed Jun 12 13:29:45 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_SPARC32
SPARC is a family of RISC microprocessors designed and marketed by
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/sparc/config.in working-2.5.21-numcpus/arch/sparc/config.in
--- linux-2.5.21/arch/sparc/config.in Mon May 6 11:11:52 2002
+++ working-2.5.21-numcpus/arch/sparc/config.in Wed Jun 12 13:38:13 2002
@@ -15,7 +15,10 @@
define_bool CONFIG_VT y
define_bool CONFIG_VT_CONSOLE y

-bool 'Symmetric multi-processing support (does not work on sun4/sun4c)' CONFIG_SMP
+int 'Maximum CPUs to support (1-32) (must be 1 on sun4/sun4c)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi

# Identify this as a Sparc32 build
define_bool CONFIG_SPARC32 y
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/sparc64/Config.help working-2.5.21-numcpus/arch/sparc64/Config.help
--- linux-2.5.21/arch/sparc64/Config.help Thu Mar 21 14:14:42 2002
+++ working-2.5.21-numcpus/arch/sparc64/Config.help Wed Jun 12 13:30:01 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_PREEMPT
This option reduces the latency of the kernel when reacting to
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/sparc64/config.in working-2.5.21-numcpus/arch/sparc64/config.in
--- linux-2.5.21/arch/sparc64/config.in Mon May 6 16:00:09 2002
+++ working-2.5.21-numcpus/arch/sparc64/config.in Wed Jun 12 13:38:18 2002
@@ -14,7 +14,10 @@
define_bool CONFIG_VT y
define_bool CONFIG_VT_CONSOLE y

-bool 'Symmetric multi-processing support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
bool 'Preemptible kernel' CONFIG_PREEMPT

# Identify this as a Sparc64 build
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/x86_64/Config.help working-2.5.21-numcpus/arch/x86_64/Config.help
--- linux-2.5.21/arch/x86_64/Config.help Tue Apr 23 11:39:33 2002
+++ working-2.5.21-numcpus/arch/x86_64/Config.help Wed Jun 12 13:30:16 2002
@@ -1,29 +1,20 @@
-CONFIG_SMP
- This enables support for systems with more than one CPU. If you have
- a system with only one CPU, like most personal computers, say N. If
- you have a system with more than one CPU, say Y.
+CONFIG_MAX_CPUS
+ If you have a system with only one CPU, like most personal
+ computers, say 1. If you have a system with more than one CPU, say
+ the number of CPUs.

- If you say N here, the kernel will run on single and multiprocessor
+ If you say 1 here, the kernel will run on single and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
- you say Y here, the kernel will run on many, but not all,
+ you say 2 or more here, the kernel will run on many, but not all,
singleprocessor machines. On a singleprocessor machine, the kernel
- will run faster if you say N here.
-
- Note that if you say Y here and choose architecture "586" or
- "Pentium" under "Processor family", the kernel will not work on 486
- architectures. Similarly, multiprocessor kernels for the "PPro"
- architecture may not work on all Pentium based boards.
-
- People using multiprocessor machines who say Y here should also say
- Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
- Management" code will be disabled if you say Y here.
+ will run faster if you say 1 here.

See also the <file:Documentation/smp.tex>,
<file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
<http://www.linuxdoc.org/docs.html#howto>.

- If you don't know what to do here, say N.
+ If you don't know what to do here, say 1.

CONFIG_X86
This is Linux's home port. Linux was originally native to the Intel
diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal linux-2.5.21/arch/x86_64/config.in working-2.5.21-numcpus/arch/x86_64/config.in
--- linux-2.5.21/arch/x86_64/config.in Mon May 6 16:00:09 2002
+++ working-2.5.21-numcpus/arch/x86_64/config.in Wed Jun 12 13:38:21 2002
@@ -43,7 +43,10 @@

#currently broken:
#bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
-bool 'Symmetric multi-processing support' CONFIG_SMP
+int 'Maximum CPUs to support (1-32)' CONFIG_MAX_CPUS 1
+if [ "$CONFIG_MAX_CPUS" != "1" ]; then
+ define_bool CONFIG_SMP y
+fi
bool 'Preemptible Kernel' CONFIG_PREEMPT
if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
define_bool CONFIG_HAVE_DEC_LOCK y

2002-06-12 05:54:10

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <[email protected]> you write:
> >In which case, CONFIG_NR_CPUS is the only way to get the memory
> >back...
>
> Why? You can get rid of all uses of NR_CPUS (except for using it as a max
> capping value so none goes above it) and always use smp_num_cpus instead.
> And make the cpu hotplug code update smp_num_cpus as appropriate.

You remove CPU 2 of 4 and the others renumber? Everyone using per-cpu
buffers needs to write code to move them. And what do apps bound to
CPU 3 do? What about *their* per-cpu data structures?

> So zero penalty for non-hotplug users and loads of penalty for hotplug
> users but frankly I couldn't care less for those. The slow path will
> trigger so seldom it is not worth thinking about the performance hit there.

And a greater requirement for everyone using per-cpu buffers (which
are becoming more common, not less) to write more code. And it
doesn't deal with CPU removal.

> There are a lot of ways to deal with this corner case dynamically, so
> please use one of them. I don't buy the "lets penalise 99% of users for the
> sake of a feature that almost noone will ever use" argument.

Sorry, you're arguing to maintain a traditionally problematic
interface for an unmeasurable time benifit, and a slight space benefit
(on SMP machines, where noone has cared space about until recently).

Now, you *could* only allocate buffers for cpus where cpu_possible(i)
is true, once the rest of the patch goes in. That would be a valid
optimization.

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-06-12 07:18:11

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <[email protected]> you write:
> From: Rusty Russell <[email protected]>
> Date: Tue, 11 Jun 2002 19:09:44 +1000
>
> In message <[email protected]> you write:
> > and slowdown:
>
> ARGH! STOP IT! I realize it's 'leet to be continually worrying about
> possible microoptimizations, but I challenge you to *measure* the
> slowdown between:
>
> Regardless, his space arguments still hold.

You can allocate based on cpu_possible(cpu) (which is in the
next patch) if you like, but I think you're better off fixing the
existing NR_CPUS bloat as well, and keeping all the code simple.

> I don't like having everyone eat the overhead that hotplugging cpus
> seem to entail.

But there's an important difference between something which is
simple and unoptimized, and something which is unoptimizable.

> And remember, it's the anal "every microoptimization at all costs"
> people that keep the kernel sane and from running out of control bloat
> wise.

But it also gave us crap like net/ipv4/route.c:ip_rt_acct_read() 8(

I know *you* benchmark and read the asm during optimization, but it's
quite clear that others are so scared of "bloat" criticism that they
optimize without measuring the straightforward case *first*.

Remember, to be cool:
1) Use bitops and memory barriers not spinlocks,
2) Use inlines everywhere,
3) Use likely()/unlikely() on every branch
4) Use prefetch() everywhere,
5) Use gotos to minimize the path length
6) __set_current_state() not set_current_state()
7) Pass in current as a function param

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-06-12 07:53:47

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 06:57 12/06/02, Rusty Russell wrote:
>In message <[email protected]> you write:
> > >In which case, CONFIG_NR_CPUS is the only way to get the memory
> > >back...
> >
> > Why? You can get rid of all uses of NR_CPUS (except for using it as a max
> > capping value so none goes above it) and always use smp_num_cpus instead.
> > And make the cpu hotplug code update smp_num_cpus as appropriate.
>
>You remove CPU 2 of 4 and the others renumber?

I would hope not! That would be insane. I am only talking about adding
CPUs. Who cares if you remove one. The buffers can stay allocated. Chances
are you will be adding a replacement very soon anyway.

>Everyone using per-cpu buffers needs to write code to move them. And what
>do apps bound to CPU 3 do? What about *their* per-cpu data structures?
>
> > So zero penalty for non-hotplug users and loads of penalty for hotplug
> > users but frankly I couldn't care less for those. The slow path will
> > trigger so seldom it is not worth thinking about the performance hit there.
>
>And a greater requirement for everyone using per-cpu buffers (which
>are becoming more common, not less) to write more code. And it
>doesn't deal with CPU removal.

And it doesn't need to.

> > There are a lot of ways to deal with this corner case dynamically, so
> > please use one of them. I don't buy the "lets penalise 99% of users for
> the
> > sake of a feature that almost noone will ever use" argument.
>
>Sorry, you're arguing to maintain a traditionally problematic
>interface for an unmeasurable time benifit, and a slight space benefit
>(on SMP machines, where noone has cared space about until recently).

I guess we disagree about the definition OS "slight" space benefit... I
used to have a dual-Celeron with 128mb ram for a while. Throwing away 1-2mb
just for a single driver is throwing away 1-2% of all RAM. And I would
think adding more drivers it quickly adds up to 5-10% of RAM. And that
sounds like way too much waste to me.

RAM is cheap if you are using a hot plug 32-CPU system, sure. But if you
are using a low-end SMP system ram is more expensive than the rest of the
system all together so it is not cheap at all. It is just a question of
perspective.

>Now, you *could* only allocate buffers for cpus where cpu_possible(i)
>is true, once the rest of the patch goes in. That would be a valid
>optimization.

Please explain. What is cpu_possible()?

btw. I agree that CONFIG_NR_CPUS or whatever it is called would solve my
problems. It is only in distro kernels where they are likely to leave it to
the maximum value and most people use distro kernels...

Best regards,

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 08:00:48

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <[email protected]> you write:
> > --- linux-2.5.21.24110/fs/ntfs/compress.c Sat May 25 14:34:53 2002
> > return -ENOMEM;
> > - for (i = 0; i < smp_num_cpus; i++) {
> > + for (i = 0; i < NR_CPUS; i++) {
> > ntfs_compression_buffers[i] = (u8*)vmalloc(NTFS_MAX_CB_SIZE);
> > if (!ntfs_compression_buffers[i])
> > break;
>
> 2Mbytes !!!!!!
>
> Add a cpu count changed notifier ?

There is one in the next patch, of course. But with that patch you
also get cpu_possible():

for (i = 0; i < NR_CPUS; i++) {
if (!cpu_possible(i)) {
ntfs_compression_buffers[i] = NULL;
continue;
}

Hope that clarifies,
Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-06-12 08:02:26

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <[email protected]> you write:
> >Now, you *could* only allocate buffers for cpus where cpu_possible(i)
> >is true, once the rest of the patch goes in. That would be a valid
> >optimization.
>
> Please explain. What is cpu_possible()?

>From Hotcpu/hotcpu-boot-i386.patch.gz:

--- working-2.5.19-pre-hotcpu/include/asm-i386/smp.h Tue Jun 4 15:37:09 2002
+++ working-2.5.19-hotcpu/include/asm-i386/smp.h Mon Jun 3 18:00:09 2002
@@ -93,6 +94,8 @@
#define smp_processor_id() (current_thread_info()->cpu)

#define cpu_online(cpu) (cpu_online_map & (1<<(cpu)))
+
+#define cpu_possible(cpu) (phys_cpu_present_map & (1<<(cpu)))

extern inline unsigned int num_online_cpus(void)
{

ie. "Can this CPU number *ever* exist?", for exactly this kind of
optimization. It looks like it was a mistake to leave that to a later
patch, but I didn't appreciate the 64k-per-cpu buffer for NTFS (what
is it for, by the way? per-cpu buffering for a filesystem seems, um,
wierd).

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-06-12 08:22:43

by Alan

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

> --- linux-2.5.21.24110/fs/ntfs/compress.c Sat May 25 14:34:53 2002
> return -ENOMEM;
> - for (i = 0; i < smp_num_cpus; i++) {
> + for (i = 0; i < NR_CPUS; i++) {
> ntfs_compression_buffers[i] = (u8*)vmalloc(NTFS_MAX_CB_SIZE);
> if (!ntfs_compression_buffers[i])
> break;

2Mbytes !!!!!!

Add a cpu count changed notifier ?

2002-06-12 08:24:29

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 09:06 12/06/02, Rusty Russell wrote:
>In message <[email protected]> you write:
> > >Now, you *could* only allocate buffers for cpus where cpu_possible(i)
> > >is true, once the rest of the patch goes in. That would be a valid
> > >optimization.
> >
> > Please explain. What is cpu_possible()?
>
> >From Hotcpu/hotcpu-boot-i386.patch.gz:
>
>--- working-2.5.19-pre-hotcpu/include/asm-i386/smp.h Tue Jun 4
>15:37:09 2002
>+++ working-2.5.19-hotcpu/include/asm-i386/smp.h Mon Jun 3
>18:00:09 2002
>@@ -93,6 +94,8 @@
> #define smp_processor_id() (current_thread_info()->cpu)
>
> #define cpu_online(cpu) (cpu_online_map & (1<<(cpu)))
>+
>+#define cpu_possible(cpu) (phys_cpu_present_map & (1<<(cpu)))
>
> extern inline unsigned int num_online_cpus(void)
> {
>
>ie. "Can this CPU number *ever* exist?", for exactly this kind of
>optimization.

Aha, now we are talking! This looks like it will restore the current memory
usage just fine.

> It looks like it was a mistake to leave that to a later
>patch, but I didn't appreciate the 64k-per-cpu buffer for NTFS (what
>is it for, by the way? per-cpu buffering for a filesystem seems, um,
>wierd).

It is used by the NTFS decompression engine. When implementing
decompression I went for using a single linear buffer holding the
compressed data to avoid having to switch pages midstream of memcpy()s
multi byte assignments, etc. (It could be argued that I was lazy but I
think it makes sense from a performance point of view.) After making that
decision I saw three choices:

1) Use a single buffer and lock it so once one file is under decompression
no other files can be and if multiple compressed files are being accessed
simultaneously on different CPUs only one CPU would be decompressing. The
others would be waiting for the lock. (Obviously scheduling and doing other
stuff.)

2) Use multiple buffers and allocate a buffer every time the decompression
engine is used. Note this means a vmalloc()+vfree() in EVERY ->readpage()
for a compressed file!

3) Use one buffer for each CPU and use a critical section during
decompression (disable preemption, don't sleep). Allocated at mount time of
first partition supporting compression. Freed at umount time of last
partition supporting compression.

I think it is obvious why I went for 3)...

Best regards,

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 08:36:23

by Helge Hafting

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Andrew Morton wrote:
>
> Rusty Russell wrote:
> >
> > ...
> > Let's not perpetuate the myth that everything in the kernel needs to
> > be tuned to the last cycle at all costs, hm?
>
> I was more concerned about the RAM use, actually.
>
> This patch is an additional reason for CONFIG_NR_CPUS, but I've rather
> gone cold on that idea because the "proper fix" is to make all those
> huge per-cpu arrays dynamically allocated. So you can run a 64p kernel
> on 2p without losing hundreds of k of memory and kernel address space.
>
> But it looks like all those dynamically-allocated structures would
> have to be allocated out to NR_CPUS anyway, to support hotplug, yes?
>
> In which case, CONFIG_NR_CPUS is the only way to get the memory
> back...

Re-allocation of tables when adding CPUs is another
option. That means the data moves - so others have to store
array indices instead of direct pointers to stuff they use.
Dynamic allocation not merely at boottime, but everytime.

Adding a CPU becomes more expensive, but that won't
happen hundreds of times a second anyway.

Helge Hafting

2002-06-12 09:31:01

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

From: Rusty Russell <[email protected]>
Date: Wed, 12 Jun 2002 16:58:23 +1000

In message <[email protected]> you write:
> And remember, it's the anal "every microoptimization at all costs"
> people that keep the kernel sane and from running out of control bloat
> wise.

But it also gave us crap like net/ipv4/route.c:ip_rt_acct_read() 8(

That's far from being an attempt optimization :-)
Furthermore, cleanup patches are always happily accepted.
9 out of 10 2.5.x networking patches I apply are cleanups
from Arnaldo these days.

2002-06-12 13:10:58

by Mikael Pettersson

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Rusty,

You've stated that you don't care about micro-optimisations,
but please consider providing an iteration construct with
O(nr_online_cpus) rather than O(NR_CPUS) (*) complexity. One
suitable data structure for this was described by this paper
<http://softlib.rice.edu/MSCP/papers/loplas.sets.ps.gz>
by Preston Briggs and Linda Torzcon.

This means keeping a logical->physical map and iterating like this:

for(i = 0; i < nr_online_cpus; ++i)
do_something_with(cpu_logical_map(i));

but since cpu add/remove events are quite rare, the overhead for
maintaining that map is negligible. Note: a cpu would be identified
by its physical number only; the logical numbers are just for
enumeration and don't need to stay the same over add/remove events.

With this and a callback that informs me of add/remove events,
I would have no problems with the nonlinear CPU patch.

(I care because my performance-monitoring counters driver by necessity
is closely tied to CPU identities and the set of online CPUs.)

/Mikael

(*) At least as long as NR_CPUS defaults to 32 on x86.

2002-06-12 14:36:14

by Denis Vlasenko

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On 11 June 2002 12:54, Anton Altaparmakov wrote:
> > > This is crazy! It means you are allocating 2MiB of memory instead of
> > > just 128kiB on a 2 CPU system, which will be about 99% of the SMP
> > > systems in use, at my guess. So your change is throwing away 1920kiB of
> > > kernel ram for no reason at all. And that is just ntfs...
> >
> >Wait a minute.
> >These buffers are allocated per CPU. Can we allocate additional ones when
> >new CPU is added?
>
> Of course, see my suggestion for how to handle this in the post after the
> one you replied to.
>
> >I do hope these buffers aren't allocated an boot time but at mount time,
> >are they?
>
> At mount time and only if the volume supports compression. And they are
> ntfs global, i.e. not per mount point. That is still a big ram waste.

It's optimal to allocate buffers when they are needed.
Thnk about an NTFS volume without any compressed files at all.

CPU hotswap higlights the fact that per CPU allocation needs to be smarter
about doing its job (i.e. don't allocate if it won't be used ever,
defer allocation to CPU hotswap event).

OTOH, smarter code is longer, more difficult code. One have to weigh
memory benefits for small population of 'hot swappers'
versus code simplicity.
--
vda

2002-06-12 15:07:12

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 20:32 12/06/02, Denis Vlasenko wrote:
>On 11 June 2002 12:54, Anton Altaparmakov wrote:
> > > > This is crazy! It means you are allocating 2MiB of memory instead of
> > > > just 128kiB on a 2 CPU system, which will be about 99% of the SMP
> > > > systems in use, at my guess. So your change is throwing away 1920kiB of
> > > > kernel ram for no reason at all. And that is just ntfs...
> > >
> > >Wait a minute.
> > >These buffers are allocated per CPU. Can we allocate additional ones when
> > >new CPU is added?
> >
> > Of course, see my suggestion for how to handle this in the post after the
> > one you replied to.
> >
> > >I do hope these buffers aren't allocated an boot time but at mount time,
> > >are they?
> >
> > At mount time and only if the volume supports compression. And they are
> > ntfs global, i.e. not per mount point. That is still a big ram waste.
>
>It's optimal to allocate buffers when they are needed.
>Thnk about an NTFS volume without any compressed files at all.

No buffers are allocated if the volume doesn't support compression at all.
But if it does support them then we allocate them, even if then there are
no compressed files as there is no quick way to tell the difference.

Buffer allocation at use time is NOT an option because the buffers are
allocated using vmalloc() which is extremely expensive and we would need to
allocate at every single initial ->readpage() call of a compressed file.

>CPU hotswap higlights the fact that per CPU allocation needs to be smarter
>about doing its job (i.e. don't allocate if it won't be used ever,
>defer allocation to CPU hotswap event).

The former is not possible for ntfs as there is no quick way to tell if use
will use decompression or not. And the latter creates a lot of complexity.
I gave an example using a callback of how it could be done in a previous
post but I don't like introducing complexity for a minority group of users.

>OTOH, smarter code is longer, more difficult code. One have to weigh
>memory benefits for small population of 'hot swappers' versus code simplicity.

Exactly. However all is well, if you have read the whole thread you will
have seen the cpu_possible() optimization which allows allocating for
actual existing CPU slots which means there is no wasted RAM or at least
very little... On a 32 CPU machine I agree that it is irrelevant if you are
wasting a few MiB of ram, you probably have multiple GiB of the stuff anyway...

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 18:21:24

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Jun 12, 2002 16:08 +0100, Anton Altaparmakov wrote:
> Buffer allocation at use time is NOT an option because the buffers are
> allocated using vmalloc() which is extremely expensive and we would need to
> allocate at every single initial ->readpage() call of a compressed file.
>
> >CPU hotswap higlights the fact that per CPU allocation needs to be smarter
> >about doing its job (i.e. don't allocate if it won't be used ever,
> >defer allocation to CPU hotswap event).
>
> The former is not possible for ntfs as there is no quick way to tell if use
> will use decompression or not. And the latter creates a lot of complexity.
> I gave an example using a callback of how it could be done in a previous
> post but I don't like introducing complexity for a minority group of users.

I think the reasonable solution is as follows:
1) Allocate an array of NULL pointers which is NR_CPUs in size (you could do
this all the time, as it would only be a few bytes)
2) If you need to do decompression on a cpu you check the array entry
for that CPU and if is NULL you vmalloc() the decompression buffers once
for that CPU. This avoid vmalloc() overhead for each read.
3) Any allocated buffers are freed in the same manner they are now -
when the last compressed volume is unmounted. There may be some or
all entries that are still NULL.

This also avoids allocating buffers when there are no files which are
actually compressed.

Cheers, Andreas
--
Andreas Dilger
http://www-mddsp.enel.ucalgary.ca/People/adilger/
http://sourceforge.net/projects/ext2resize/

2002-06-12 18:34:14

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 18:36 12/06/02, Andreas Dilger wrote:
>On Jun 12, 2002 16:08 +0100, Anton Altaparmakov wrote:
> > Buffer allocation at use time is NOT an option because the buffers are
> > allocated using vmalloc() which is extremely expensive and we would
> need to
> > allocate at every single initial ->readpage() call of a compressed file.
> >
> > >CPU hotswap higlights the fact that per CPU allocation needs to be smarter
> > >about doing its job (i.e. don't allocate if it won't be used ever,
> > >defer allocation to CPU hotswap event).
> >
> > The former is not possible for ntfs as there is no quick way to tell if
> use
> > will use decompression or not. And the latter creates a lot of complexity.
> > I gave an example using a callback of how it could be done in a previous
> > post but I don't like introducing complexity for a minority group of users.
>
>I think the reasonable solution is as follows:
>1) Allocate an array of NULL pointers which is NR_CPUs in size (you could do
> this all the time, as it would only be a few bytes)

Yes, that is fine.

>2) If you need to do decompression on a cpu you check the array entry
> for that CPU and if is NULL you vmalloc() the decompression buffers once
> for that CPU. This avoid vmalloc() overhead for each read.

The vmalloc() sleeps and by the time you get control back you are executing
on a different CPU. Ooops. The only valid way of treating per-cpu data is:

- disable preemption
- get the cpu number = START OF CRITICAL SECTION: no sleep/schedule allowed
- do work using the cpu number
- reenable preemption = END OF CRITICAL SECTION

The only thing that could possibly be used inside the critical region is
kmalloc(GFP_ATOMIC) but we are allocating 64kiB so that is not an option.
(It would fail very quickly due to memory fragmentation, the order of the
allocation is too high.)

>3) Any allocated buffers are freed in the same manner they are now -
> when the last compressed volume is unmounted. There may be some or
> all entries that are still NULL.
>
>This also avoids allocating buffers when there are no files which are
>actually compressed.

True it does, but unfortunately it doesn't work. )-:

Best regards,

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 19:35:25

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support


Hmm.. Since the cpu_online_map thing can be used to fix this, this doesn't
seem to be a big issue, BUT

On Wed, 12 Jun 2002, Anton Altaparmakov wrote:
>
> 1) Use a single buffer and lock it so once one file is under decompression
> no other files can be and if multiple compressed files are being accessed
> simultaneously on different CPUs only one CPU would be decompressing. The
> others would be waiting for the lock. (Obviously scheduling and doing other
> stuff.)
>
> 2) Use multiple buffers and allocate a buffer every time the decompression
> engine is used. Note this means a vmalloc()+vfree() in EVERY ->readpage()
> for a compressed file!
>
> 3) Use one buffer for each CPU and use a critical section during
> decompression (disable preemption, don't sleep). Allocated at mount time of
> first partition supporting compression. Freed at umount time of last
> partition supporting compression.
>
> I think it is obvious why I went for 3)...

I don't see that as being all that obvious. The _obvious_ choice is just
(1), protected by a simple spinlock. 128kB/CPU seems rather wasteful,
especially as the only thing it buys you is scalability on multiple CPU's
for the case where you have multiple readers all at the same time touching
a new compressed block.

That scalability operation seems dubious, especially since this will only
happen when you just had to do IO anyway, so in order to actually take
advantage of the scalability that IO would have had to happen on multiple
separate controllers.

Ehh?

Linus

2002-06-12 19:41:58

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Jun 12, 2002 19:34 +0100, Anton Altaparmakov wrote:
> At 18:36 12/06/02, Andreas Dilger wrote:
> >1) Allocate an array of NULL pointers which is NR_CPUs in size (you could
> > do this all the time, as it would only be a few bytes)
>
> Yes, that is fine.
>
> >2) If you need to do decompression on a cpu you check the array entry
> > for that CPU and if is NULL you vmalloc() the decompression buffers once
> > for that CPU. This avoid vmalloc() overhead for each read.
>
> The vmalloc() sleeps and by the time you get control back you are executing
> on a different CPU. Ooops. The only valid way of treating per-cpu data is:
>
> - disable preemption
> - get the cpu number = START OF CRITICAL SECTION: no sleep/schedule allowed
> - do work using the cpu number
> - reenable preemption = END OF CRITICAL SECTION
>
> The only thing that could possibly be used inside the critical region is
> kmalloc(GFP_ATOMIC) but we are allocating 64kiB so that is not an option.
> (It would fail very quickly due to memory fragmentation, the order of the
> allocation is too high.)

Well, then you can still do the one-time allocation for that CPU slot,
and re-check the CPU number after vmalloc() returns. If it is different
(or always, for that matter) then you jump back to the "is the array for
this CPU allocated" check until the array _is_ allocated for that CPU
and you don't need to allocate it (so you won't sleep). At most you
will need to loop once for each available CPU if you are unlucky enough
to be rescheduled to a different CPU after each call to vmalloc().

Like:
int cpunum = this_cpu();
char *newbuf = NULL;

while (unlikely(NTFS_SB(sb)->s_compr_array[cpunum] == NULL)) {
newbuf = vmalloc(NTFS_DECOMPR_BUFFER_SIZE);

/* Re-check the buffer case we slept in vmalloc() and
* someone else already allocated a buffer for "this" CPU.
*/
if (likely(NTFS_SB(sb)->s_compr_array[cpunum] == NULL)) {
NTFS_SB(sb)->s_compr_array[cpunum] = newbuf;
newbuf = NULL;
}
cpunum = this_cpu();
}
/* Hmm, we slept in vmalloc and we don't need the new buffer */
if (unlikely(newbuf != NULL))
vfree(newbuf);

> >3) Any allocated buffers are freed in the same manner they are now -
> > when the last compressed volume is unmounted. There may be some or
> > all entries that are still NULL.
> >
> >This also avoids allocating buffers when there are no files which are
> >actually compressed.
>
> True it does, but unfortunately it doesn't work. )-:

Now it does... ;-).

Cheers, Andreas
--
Andreas Dilger
http://www-mddsp.enel.ucalgary.ca/People/adilger/
http://sourceforge.net/projects/ext2resize/

2002-06-12 20:03:24

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Followup to: <[email protected]>
By author: Anton Altaparmakov <[email protected]>
In newsgroup: linux.dev.kernel
>
> >2) If you need to do decompression on a cpu you check the array entry
> > for that CPU and if is NULL you vmalloc() the decompression buffers once
> > for that CPU. This avoid vmalloc() overhead for each read.
>
> The vmalloc() sleeps and by the time you get control back you are executing
> on a different CPU. Ooops. The only valid way of treating per-cpu data is:
>
> - disable preemption
> - get the cpu number = START OF CRITICAL SECTION: no sleep/schedule allowed
> - do work using the cpu number
> - reenable preemption = END OF CRITICAL SECTION
>

Actually, that doesn't matter, because it's a quickly convergent
operation.

Basically, once you've been invoked on a particular CPU once, you are
pretty much guaranteed to get invoked on that same CPU again, so the
fact that you may end up using a different buffer post-allocation is
not an issue.

Have an array and a semaphore called here allocation_semaphore:

/* PSEUDO-CODE */

while ( 1 ) {
disable_preemption();
cpu = current_cpu();
if ( decompression_buffers[cpu] ) {
do_decompression(decompression_buffers[cpu]);
enable_preemption();
break; /* DONE, EXIT LOOP */
} else {
enable_preemption();
down_sem(allocation_semaphore);
/* Avoid race condition here */
if ( !decompression_buffers[cpu] )
decompression_buffers[cpu] = vmalloc(BUFFER_SIZE);
up_sem(allocation_semaphore);
}
}

Note that there is no requirement that we're still on cpu "cpu" when
we allocate the buffer. Furthermore, if we fail, we just loop right
back to the top.

-hpa

--
<[email protected]> at work, <[email protected]> in private!
"Unix gives you enough rope to shoot yourself in the foot."
http://www.zytor.com/~hpa/puzzle.txt <[email protected]>

2002-06-12 20:41:15

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 20:39 12/06/02, Andreas Dilger wrote:
>On Jun 12, 2002 19:34 +0100, Anton Altaparmakov wrote:
> > At 18:36 12/06/02, Andreas Dilger wrote:
> > >1) Allocate an array of NULL pointers which is NR_CPUs in size (you could
> > > do this all the time, as it would only be a few bytes)
> >
> > Yes, that is fine.
> >
> > >2) If you need to do decompression on a cpu you check the array entry
> > > for that CPU and if is NULL you vmalloc() the decompression buffers
> once
> > > for that CPU. This avoid vmalloc() overhead for each read.
> >
> > The vmalloc() sleeps and by the time you get control back you are
> executing
> > on a different CPU. Ooops. The only valid way of treating per-cpu data is:
> >
> > - disable preemption
> > - get the cpu number = START OF CRITICAL SECTION: no sleep/schedule allowed
> > - do work using the cpu number
> > - reenable preemption = END OF CRITICAL SECTION
> >
> > The only thing that could possibly be used inside the critical region is
> > kmalloc(GFP_ATOMIC) but we are allocating 64kiB so that is not an option.
> > (It would fail very quickly due to memory fragmentation, the order of the
> > allocation is too high.)
>
>Well, then you can still do the one-time allocation for that CPU slot,
>and re-check the CPU number after vmalloc() returns. If it is different
>(or always, for that matter) then you jump back to the "is the array for
>this CPU allocated" check until the array _is_ allocated for that CPU
>and you don't need to allocate it (so you won't sleep). At most you
>will need to loop once for each available CPU if you are unlucky enough
>to be rescheduled to a different CPU after each call to vmalloc().
>
>Like:
> int cpunum = this_cpu();
> char *newbuf = NULL;
>
> while (unlikely(NTFS_SB(sb)->s_compr_array[cpunum] == NULL)) {

Um are you suggesting compression buffers to be per mounted volume? That
would be more wasteful than the current approach of one buffer per CPU
globally for all of ntfs driver.

> newbuf = vmalloc(NTFS_DECOMPR_BUFFER_SIZE);
>
> /* Re-check the buffer case we slept in vmalloc() and
> * someone else already allocated a buffer for "this" CPU.
> */
> if (likely(NTFS_SB(sb)->s_compr_array[cpunum] == NULL)) {
> NTFS_SB(sb)->s_compr_array[cpunum] = newbuf;
> newbuf = NULL;
> }
> cpunum = this_cpu();
> }
> /* Hmm, we slept in vmalloc and we don't need the new buffer */
> if (unlikely(newbuf != NULL))
> vfree(newbuf);

vfree() at a guess (I may be completely wrong on that one in which case I
appologize!) can also sleep so that breaks that scheme.

> > >3) Any allocated buffers are freed in the same manner they are now -
> > > when the last compressed volume is unmounted. There may be some or
> > > all entries that are still NULL.
> > >
> > >This also avoids allocating buffers when there are no files which are
> > >actually compressed.
> >
> > True it does, but unfortunately it doesn't work. )-:
>
>Now it does... ;-).

Perhaps. But if doing something like that I might as well use the present
approach and just allocate all buffers at once if they haven't been
allocated yet and be done with it. Then no vfree()s are needed either and
then it really does work. (-;

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 20:54:56

by Robert Love

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Wed, 2002-06-12 at 13:03, H. Peter Anvin wrote:

> while ( 1 ) {
> disable_preemption();
> cpu = current_cpu();
> if ( decompression_buffers[cpu] ) {
> do_decompression(decompression_buffers[cpu]);
> enable_preemption();
> break; /* DONE, EXIT LOOP */
> } else {
> enable_preemption();
> down_sem(allocation_semaphore);
> /* Avoid race condition here */
> if ( !decompression_buffers[cpu] )
> decompression_buffers[cpu] = vmalloc(BUFFER_SIZE);
> up_sem(allocation_semaphore);
> }
> }

Just a note, in 2.5 we recently added put_cpu() and get_cpu() that work
basically like:

int cpu;

cpu = get_cpu();
/* critical non-preemptible section */
put_cpu();

i.e., a preempt-safe interface to smp_processor_id() that disables and
enables preemption for you... makes it a little easier.

Robert Love

2002-06-12 20:56:34

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 21:03 12/06/02, H. Peter Anvin wrote:
>Followup to: <[email protected]>
>By author: Anton Altaparmakov <[email protected]>
>In newsgroup: linux.dev.kernel
> >
> > >2) If you need to do decompression on a cpu you check the array entry
> > > for that CPU and if is NULL you vmalloc() the decompression
> buffers once
> > > for that CPU. This avoid vmalloc() overhead for each read.
> >
> > The vmalloc() sleeps and by the time you get control back you are
> executing
> > on a different CPU. Ooops. The only valid way of treating per-cpu data is:
> >
> > - disable preemption
> > - get the cpu number = START OF CRITICAL SECTION: no sleep/schedule allowed
> > - do work using the cpu number
> > - reenable preemption = END OF CRITICAL SECTION
>
>Actually, that doesn't matter, because it's a quickly convergent
>operation.
>
>Basically, once you've been invoked on a particular CPU once, you are
>pretty much guaranteed to get invoked on that same CPU again, so the
>fact that you may end up using a different buffer post-allocation is
>not an issue.
>
>Have an array and a semaphore called here allocation_semaphore:

I have a semaphore serializing allocation already. (-:

>/* PSEUDO-CODE */
>
>while ( 1 ) {
> disable_preemption();
> cpu = current_cpu();
> if ( decompression_buffers[cpu] ) {
> do_decompression(decompression_buffers[cpu]);
> enable_preemption();
> break; /* DONE, EXIT LOOP */
> } else {
> enable_preemption();
> down_sem(allocation_semaphore);
> /* Avoid race condition here */
> if ( !decompression_buffers[cpu] )
> decompression_buffers[cpu] = vmalloc(BUFFER_SIZE);
> up_sem(allocation_semaphore);
> }
>}
>
>Note that there is no requirement that we're still on cpu "cpu" when
>we allocate the buffer. Furthermore, if we fail, we just loop right
>back to the top.

What is the point though? Why not just:

if (!unlikely(decompression_buffers)) {
down_sem();
allocate_decompression_buffers();
up_sem();
}

And be done with it?

I don't see any justification for the increased complexity...

Best regards,

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 21:06:32

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Jun 12, 2002 21:41 +0100, Anton Altaparmakov wrote:
> At 20:39 12/06/02, Andreas Dilger wrote:
> >At most you
> >will need to loop once for each available CPU if you are unlucky enough
> >to be rescheduled to a different CPU after each call to vmalloc().
>
> Um are you suggesting compression buffers to be per mounted volume? That
> would be more wasteful than the current approach of one buffer per CPU
> globally for all of ntfs driver.

No, my mistake. You should check whatever array you want.

> vfree() at a guess (I may be completely wrong on that one in which case I
> appologize!) can also sleep so that breaks that scheme.

Well, just get rid of the while loop then and use an if+goto for both
the vmalloc and the vfree case. At most we can loop NR_CPUS times.


char *newbuf = NULL;
int cpunum;

recheck:
cpunum = current_cpu();
if (unlikely(ntfs_compr_array[cpunum] == NULL)) {
newbuf = vmalloc(NTFS_DECOMPR_BUFFER_SIZE);

/*
* Re-check the buffer case we slept in vmalloc() and
* someone else already allocated a buffer for "this" CPU.
*/
if (likely(ntfs_compr_array[cpunum] == NULL)) {
ntfs_compr_array[cpunum] = newbuf;
newbuf = NULL;
}
goto recheck;
}
/* Hmm, we slept in vmalloc and we don't need the new buffer */
if (unlikely(newbuf != NULL)) {
vfree(newbuf);
goto recheck;
}

> But if doing something like that I might as well use the present
> approach and just allocate all buffers at once if they haven't been
> allocated yet and be done with it. Then no vfree()s are needed either and
> then it really does work. (-;

But then you may be allocating a lot of memory for CPUs that don't
even exist, which is the whole point of this exercise. Better to do
it on-demand and loop for the very few times needed.

Cheers, Andreas
--
Andreas Dilger
http://www-mddsp.enel.ucalgary.ca/People/adilger/
http://sourceforge.net/projects/ext2resize/

2002-06-12 21:15:30

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 22:03 12/06/02, Andreas Dilger wrote:
>On Jun 12, 2002 21:41 +0100, Anton Altaparmakov wrote:
> > But if doing something like that I might as well use the present
> > approach and just allocate all buffers at once if they haven't been
> > allocated yet and be done with it. Then no vfree()s are needed either and
> > then it really does work. (-;
>
>But then you may be allocating a lot of memory for CPUs that don't
>even exist, which is the whole point of this exercise. Better to do
>it on-demand and loop for the very few times needed.

Sorry I ommitted a step from my trend of thought when writing the above. I
was assuming that I would be using the cpu_possible() macro that is to be
introduced so that only buffers for actually existing cpu sockets get
allocated.

Best regards,

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 21:29:57

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

At 20:32 12/06/02, Linus Torvalds wrote:

>Hmm.. Since the cpu_online_map thing can be used to fix this, this doesn't
>seem to be a big issue

Yes, we are all just nitpicking now. (-;

>, BUT
>
>On Wed, 12 Jun 2002, Anton Altaparmakov wrote:
> >
> > 1) Use a single buffer and lock it so once one file is under decompression
> > no other files can be and if multiple compressed files are being accessed
> > simultaneously on different CPUs only one CPU would be decompressing. The
> > others would be waiting for the lock. (Obviously scheduling and doing
> other
> > stuff.)
> >
> > 2) Use multiple buffers and allocate a buffer every time the decompression
> > engine is used. Note this means a vmalloc()+vfree() in EVERY ->readpage()
> > for a compressed file!
> >
> > 3) Use one buffer for each CPU and use a critical section during
> > decompression (disable preemption, don't sleep). Allocated at mount
> time of
> > first partition supporting compression. Freed at umount time of last
> > partition supporting compression.
> >
> > I think it is obvious why I went for 3)...
>
>I don't see that as being all that obvious. The _obvious_ choice is just
>(1), protected by a simple spinlock. 128kB/CPU seems rather wasteful,
>especially as the only thing it buys you is scalability on multiple CPU's
>for the case where you have multiple readers all at the same time touching
>a new compressed block.
>
>That scalability operation seems dubious, especially since this will only
>happen when you just had to do IO anyway, so in order to actually take
>advantage of the scalability that IO would have had to happen on multiple
>separate controllers.
>
>Ehh?

That is a fair point from a reality check point of view, I freely admit to
being one of the people who count the bytes and cycles... But I do think it
is quite legitimate to have two different controllers or at least two
different disks (/me ignorant: SCSI can operate multiple disks
simultaneously on same controller, can it not?) or as I do quite a lot
myself, have one disk on IDE controller and one via NBD device over 100MBit
ethernet. (Mind you I have a single CPU machine...)

Admittedly in reality you would need to have some damn high load on the
ntfs driver for this optimization to make a difference. But lets take as an
example a company who is migrating from windows to Linux but for whatever
reason is keeping their data on NTFS (yes such companies exist (-:). I
could see this optimization bringing making real world difference (albeit a
small one!) to a big web/file server.

I know ntfs is currently read-only but it is not going to stay this way and
I see the possibility of people using ntfs on Linux quite extensively, so I
am trying to make it as robust and as fast as possible. - Quite a few
companies keep asking me when write support will be available so they can
install Linux shared with windows, run their Linux based app in Windows, do
antivirus checks/cleaning from Linux, do backup recovery of windows from
Linux, the list goes on, I have lost track of all the things people want it
for. (-:

Best regards,

Anton


--
"I've not lost my mind. It's backed up on tape somewhere." - Unknown
--
Anton Altaparmakov <aia21 at cantab.net> (replace at with @)
Linux NTFS Maintainer / IRC: #ntfs on irc.openprojects.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/

2002-06-12 22:22:11

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Anton Altaparmakov wrote:
>>
>> Note that there is no requirement that we're still on cpu "cpu" when
>> we allocate the buffer. Furthermore, if we fail, we just loop right
>> back to the top.
>
>
> What is the point though? Why not just:
>
> if (!unlikely(decompression_buffers)) {
> down_sem();
> allocate_decompression_buffers();
> up_sem();
> }
>
> And be done with it?
>
> I don't see any justification for the increased complexity...
>

Race condition -- you have to drop out of the critical section before
you grab the allocation sempahore, and another CPU can grab the
semaphore in that time.

Thus, the buffers might appear right under your nose.

-hpa


2002-06-13 01:34:06

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Wed, 12 Jun 2002, H. Peter Anvin wrote:
> Anton Altaparmakov wrote:
> >>
> >> Note that there is no requirement that we're still on cpu "cpu" when
> >> we allocate the buffer. Furthermore, if we fail, we just loop right
> >> back to the top.
> >
> > What is the point though? Why not just:
> >
> > if (!unlikely(decompression_buffers)) {
> > down_sem();
> > allocate_decompression_buffers();
> > up_sem();
> > }
> >
> > And be done with it?
> >
> > I don't see any justification for the increased complexity...
>
> Race condition -- you have to drop out of the critical section before
> you grab the allocation sempahore, and another CPU can grab the
> semaphore in that time.
>
> Thus, the buffers might appear right under your nose.

The code would be run outside the critical region... But correct about
the race. I thought that was obvious and wasn't suggesting the above to be
the actual code... That was supposed to be obvious from lack of error
handling etc... Never mind. My mistake, I should have been more precise
the first time round, here is the actual code I had in mind:

[snip]
if (unlikely(!ntfs_compression_buffers)) {
int err;

/*
* This code path only ever triggers once so we take it
* out of line.
*/
if ((err = try_to_allocate_compression_buffers())) {
// TODO: do appropriate cleanups
return err;
}
}
disable_preempt();
cb = ntfs_compression_buffers[smp_processor_id()];
[snip]

and try_to_allocate_compression_buffers would be:

int try_to_allocate_compression_buffers(void)
{
int err = 0;

down(&ntfs_lock);
if (likely(!ntfs_compression_buffers))
err = allocate_compression_buffers();
up(&ntfs_lock);
return err;
}

and allocate_compression_buffers() is the same as it is now. Actually I
was going to fuse try_to_allocate and allocate into one function but as I
am showing above it is clearer to see what I had in mind...

Happy now? This basically just defers the allocation to a bit later. As it
is at the moment the allocation happens at mount time of a partition which
supports compression. Note that the code in super.c would still need to
exist due to reference counting so we know when we can free the buffers
again. The only thing changed in super.c will be to remove the actual call
to allocate_compression_buffers, all else stays in place. Otherwise we
have no way to tell when we can throw away the buffers.

Best regards,

Anton

2002-06-13 01:38:42

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Anton Altaparmakov wrote:
>
> The code would be run outside the critical region... But correct about
> the race. I thought that was obvious and wasn't suggesting the above to be
> the actual code... That was supposed to be obvious from lack of error
> handling etc... Never mind. My mistake, I should have been more precise
> the first time round, here is the actual code I had in mind:
>
> [snip]
> if (unlikely(!ntfs_compression_buffers)) {
> int err;
>
> /*
> * This code path only ever triggers once so we take it
> * out of line.
> */
> if ((err = try_to_allocate_compression_buffers())) {
> // TODO: do appropriate cleanups
> return err;
> }
> }
> disable_preempt();
> cb = ntfs_compression_buffers[smp_processor_id()];
> [snip]
>
> and try_to_allocate_compression_buffers would be:
>
> int try_to_allocate_compression_buffers(void)
> {
> int err = 0;
>
> down(&ntfs_lock);
> if (likely(!ntfs_compression_buffers))
> err = allocate_compression_buffers();
> up(&ntfs_lock);
> return err;
> }
>
> and allocate_compression_buffers() is the same as it is now. Actually I
> was going to fuse try_to_allocate and allocate into one function but as I
> am showing above it is clearer to see what I had in mind...
>
> Happy now? This basically just defers the allocation to a bit later. As it
> is at the moment the allocation happens at mount time of a partition which
> supports compression. Note that the code in super.c would still need to
> exist due to reference counting so we know when we can free the buffers
> again. The only thing changed in super.c will be to remove the actual call
> to allocate_compression_buffers, all else stays in place. Otherwise we
> have no way to tell when we can throw away the buffers.
>

I presume allocate_compression_buffers() allocates *all* buffers, and
doesn't return error if there is nothing to allocate? If so, the above
code should be OK.

If allocate_compression_buffers() either doesn't check if it has already
allocated, or returns an error if buffers were already allocated, then
the above code is OK *EXCEPT IN THE CASE OF HOTSWAP CPUs*.

My originally proposed code allocated one buffer at a time, and should
be correct even in the presence of hotswap CPUs.

-hpa


2002-06-13 01:45:48

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Wed, 12 Jun 2002, H. Peter Anvin wrote:
> Anton Altaparmakov wrote:
> >
> > The code would be run outside the critical region... But correct about
> > the race. I thought that was obvious and wasn't suggesting the above to be
> > the actual code... That was supposed to be obvious from lack of error
> > handling etc... Never mind. My mistake, I should have been more precise
> > the first time round, here is the actual code I had in mind:
> >
> > [snip]
> > if (unlikely(!ntfs_compression_buffers)) {
> > int err;
> >
> > /*
> > * This code path only ever triggers once so we take it
> > * out of line.
> > */
> > if ((err = try_to_allocate_compression_buffers())) {
> > // TODO: do appropriate cleanups
> > return err;
> > }
> > }
> > disable_preempt();
> > cb = ntfs_compression_buffers[smp_processor_id()];
> > [snip]
> >
> > and try_to_allocate_compression_buffers would be:
> >
> > int try_to_allocate_compression_buffers(void)
> > {
> > int err = 0;
> >
> > down(&ntfs_lock);
> > if (likely(!ntfs_compression_buffers))
> > err = allocate_compression_buffers();
> > up(&ntfs_lock);
> > return err;
> > }
> >
> > and allocate_compression_buffers() is the same as it is now. Actually I
> > was going to fuse try_to_allocate and allocate into one function but as I
> > am showing above it is clearer to see what I had in mind...
> >
> > Happy now? This basically just defers the allocation to a bit later. As it
> > is at the moment the allocation happens at mount time of a partition which
> > supports compression. Note that the code in super.c would still need to
> > exist due to reference counting so we know when we can free the buffers
> > again. The only thing changed in super.c will be to remove the actual call
> > to allocate_compression_buffers, all else stays in place. Otherwise we
> > have no way to tell when we can throw away the buffers.
>
> I presume allocate_compression_buffers() allocates *all* buffers, and
> doesn't return error if there is nothing to allocate? If so, the above
> code should be OK.
>
> If allocate_compression_buffers() either doesn't check if it has already
> allocated, or returns an error if buffers were already allocated, then
> the above code is OK *EXCEPT IN THE CASE OF HOTSWAP CPUs*.
>
> My originally proposed code allocated one buffer at a time, and should
> be correct even in the presence of hotswap CPUs.

allocate_compression_buffers() currently allocates all buffers up
smp_num_cpus which is fine without hotswap cpus. Once hotswap cpus path
goes in, then the allocation will be (pseudo code):

for (i = 0; i < NR_CPUS; i++) {
if (cpu_possible(i)) {
ntfs_compression_buffer[i] = vmalloc();
// TODO handle errors
}
}

That means in words that we allocate buffers only once and for all
existing cpu SOCKETS, i.e. including all potentially hotpluggable cpus
which are currently offline. - If someone invents hotpluggable cpu sockets
at some point then they should be burnt at the stake! (-;

Best regards,

Anton

2002-06-13 02:13:50

by H. Peter Anvin

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Anton Altaparmakov wrote:
>
> allocate_compression_buffers() currently allocates all buffers up
> smp_num_cpus which is fine without hotswap cpus. Once hotswap cpus path
> goes in, then the allocation will be (pseudo code):
>
> for (i = 0; i < NR_CPUS; i++) {
> if (cpu_possible(i)) {
> ntfs_compression_buffer[i] = vmalloc();
> // TODO handle errors
> }
> }
>
> That means in words that we allocate buffers only once and for all
> existing cpu SOCKETS, i.e. including all potentially hotpluggable cpus
> which are currently offline. - If someone invents hotpluggable cpu sockets
> at some point then they should be burnt at the stake! (-;
>

Note that with my code, you don't allocate any memory until you have
actually seen a particular CPU being *used.* All very simple...

-hpa


2002-06-13 02:21:25

by Anton Altaparmakov

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Wed, 12 Jun 2002, H. Peter Anvin wrote:
> Anton Altaparmakov wrote:
> >
> > allocate_compression_buffers() currently allocates all buffers up
> > smp_num_cpus which is fine without hotswap cpus. Once hotswap cpus path
> > goes in, then the allocation will be (pseudo code):
> >
> > for (i = 0; i < NR_CPUS; i++) {
> > if (cpu_possible(i)) {
> > ntfs_compression_buffer[i] = vmalloc();
> > // TODO handle errors
> > }
> > }
> >
> > That means in words that we allocate buffers only once and for all
> > existing cpu SOCKETS, i.e. including all potentially hotpluggable cpus
> > which are currently offline. - If someone invents hotpluggable cpu sockets
> > at some point then they should be burnt at the stake! (-;
>
> Note that with my code, you don't allocate any memory until you have
> actually seen a particular CPU being *used.* All very simple...

I realise that and I am just saying that doing that is pointless as it
only introduces overhead at no gain at all. If you use one cpu you are
going to use all of them. Snd if you have one compressed file, you are
going to have lots of them. Frankly, I don't care about hotplug cpus. If
someone has a hotplug capable motherboard which costs thousands (tens of
thousands?) of dollars without even starting on the cpus they are not
going to care if the kernel is wasting a few megabytes of ram here or
there... And if they do then they should buy more RAM obviously they can
afford it... What I am worried about is wasting ram on low end systems and
my approach is just as effective as yours except it incurs less overhead,
be it only by a few cycles...

Anton

ps. I am away to catch a plane in a few minutes so won't be replying for a
while...

2002-06-13 02:42:08

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

In message <[email protected]> you write:
> This means keeping a logical->physical map and iterating like this:
>
> for(i = 0; i < nr_online_cpus; ++i)
> do_something_with(cpu_logical_map(i));
>
> (I care because my performance-monitoring counters driver by necessity
> is closely tied to CPU identities and the set of online CPUs.)

I disagreed, so I measured, and you are right 8( I hate that.

Simply reading the performance monitors across all CPUs is a case
where the extra loop overhead is significant (although cache effects
may still dominate): 9 times slower on PPC if there are only 2 CPUs
and NR_CPUS is 32.

I'd definitely prefer a per-arch for_each_cpu() implementation to
exposing a mapping, eg:

/* No hotplug cpus on this arch. */
extern int max_cpu_num;
#define for_each_cpu(__i) for (__i = 0; __i < max_cpu_num; __i++)

OR
extern int cpu_next_map[NR_CPUS];
#define for_each_cpu(__i) \
for (__i = 0; __i < NR_CPUS; __i = cpu_next_map[__i])

[ Soapbox mode: cut here ]

My philosophy is that parts of infrastructure which is not used by >
90% of people tends to get misused. Two recent concrete examples:

cpu_logical_map() is currently a noop on x86
=> Ingo fucked it up in his initial scheduler impl.
copy_from_user() returns POSTIVE on failure
=> 7% of uses of copy_from_user were buggy in 2.5.19.

My feeling is that kernel coding is becoming more challenging (SMP,
preemption, portability), and our bug count and time-to-kernel-mastery
is climbing as a result. One method of countering this is by
carefully designing infrastructure to make the simplest method for
writing common operations also the correct one.

Sometimes old-timers don't see infrastructure they are used to as a
problem, but even they make mistakes.

Rusty.
--
Anyone who quotes me in their sig is an idiot. -- Rusty Russell.

2002-06-13 05:22:33

by Rusty Russell

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

On Wed, 12 Jun 2002 02:26:41 -0700 (PDT)
"David S. Miller" <[email protected]> wrote:

> From: Rusty Russell <[email protected]>
> Date: Wed, 12 Jun 2002 16:58:23 +1000
>
> In message <[email protected]> you write:
> > And remember, it's the anal "every microoptimization at all costs"
> > people that keep the kernel sane and from running out of control bloat
> > wise.
>
> But it also gave us crap like net/ipv4/route.c:ip_rt_acct_read() 8(
>
> That's far from being an attempt optimization :-)

I was giving you the benifit of the doubt, that both the design (binary
data from /proc) and the code (#ifdef, cpu_logical_map(0), and "256"
sprinkled everywhere) were some insane attempt at speed, rather that
a demonstration of sheer programming idiocy.

I stand corrected 8). This papers over the damage:

diff -urN -I \$.*\$ --exclude TAGS -X /home/rusty/current-dontdiff --minimal linux-2.5.21/net/ipv4/route.c working-2.5.21-ipacct/net/ipv4/route.c
--- linux-2.5.21/net/ipv4/route.c Mon May 13 12:00:40 2002
+++ working-2.5.21-ipacct/net/ipv4/route.c Thu Jun 13 14:33:41 2002
@@ -2418,10 +2418,15 @@
#ifdef CONFIG_NET_CLS_ROUTE
struct ip_rt_acct *ip_rt_acct;

+/* This code sucks. But you should have seen it before! --RR */
+
+/* IP route accounting ptr for this logical cpu number. */
+#define IP_RT_ACCT_CPU(i) (ip_rt_acct + cpu_logical_map(i) * 256)
+
static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
int length, int *eof, void *data)
{
- *start = buffer;
+ unsigned int i;

if ((offset & 3) || (length & 3))
return -EIO;
@@ -2430,35 +2435,18 @@
length = sizeof(struct ip_rt_acct) * 256 - offset;
*eof = 1;
}
- if (length > 0) {
- u32 *dst = (u32*)buffer;
- u32 *src = (u32*)(((u8*)ip_rt_acct) + offset);
-
- memcpy(dst, src, length);
-
-#ifdef CONFIG_SMP
- if (smp_num_cpus > 1 || cpu_logical_map(0) != 0) {
- int i;
- int cnt = length / 4;
-
- for (i = 0; i < smp_num_cpus; i++) {
- int cpu = cpu_logical_map(i);
- int k;

- if (cpu == 0)
- continue;
-
- src = (u32*)(((u8*)ip_rt_acct) + offset +
- cpu * 256 * sizeof(struct ip_rt_acct));
+ /* Copy first cpu. */
+ *start = buffer;
+ memcpy(buffer, IP_RT_ACCT_CPU(0), length);

- for (k = 0; k < cnt; k++)
- dst[k] += src[k];
- }
- }
-#endif
- return length;
+ /* Add the other cpus in, one int at a time */
+ for (i = 1; i < smp_num_cpus; i++) {
+ unsigned int j;
+ for (j = 0; j < length/4; j++)
+ ((u32*)buffer)[j] += ((u32*)IP_RT_ACCT_CPU(i))[j];
}
- return 0;
+ return length;
}
#endif

--
there are those who do and those who hang on and you don't see too
many doers quoting their contemporaries. -- Larry McVoy

2002-06-13 05:59:32

by David Miller

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

From: Rusty Russell <[email protected]>
Date: Thu, 13 Jun 2002 15:01:19 +1000

+/* This code sucks. But you should have seen it before! --RR */

Applied :-)

2002-06-13 08:43:34

by Helge Hafting

[permalink] [raw]
Subject: Re: [PATCH] 2.5.21 Nonlinear CPU support

Anton Altaparmakov wrote:

> That means in words that we allocate buffers only once and for all
> existing cpu SOCKETS, i.e. including all potentially hotpluggable cpus
> which are currently offline. - If someone invents hotpluggable cpu sockets
> at some point then they should be burnt at the stake! (-;

How about doing NUMA by hot-plugging PCI cards, each containing
a cpu and some memory? You never know how many of those
they'll plug in.

PCI cards with a x86 isn't new either, although I haven't heard of
them being used in this manner before.

Helge Hafting