Current IA32 CPU hotplug code doesn't allow bringing up processors that were not present in the boot configuration.
To make existing hot plug facility more practical for physical hot plug, possible processors should be encountered
during boot for potentual hot add/replace/remove. On ES7000, ACPI marks all the sockets that are empty or not assigned
to the partitionas as "disabled". The patch allows arrays/masks with APIC info for disabled processors to be
initialized. Then the OS can bring up a processor that was inserted in the socked and brought into configuration
during runtime.
To test the code, one can boot the system with maxcpu=1 and then bring the rest of the processors up, which was not
possible so far (only maxcpus number of nodes were created).
The patch also makes proc entry for interrupts dynamically change to only show current onlined processors.
Signed-off-by: Natalie Protasevich <[email protected]>
---
arch/i386/kernel/acpi/boot.c | 2 ++
arch/i386/kernel/irq.c | 8 ++++----
arch/i386/kernel/mpparse.c | 4 ++++
arch/i386/kernel/smpboot.c | 3 ++-
arch/i386/mach-default/topology.c | 4 ++--
kernel/cpu.c | 4 ++++
6 files changed, 18 insertions(+), 7 deletions(-)
diff -puN arch/i386/kernel/acpi/boot.c~hotcpu-i386 arch/i386/kernel/acpi/boot.c
--- linux-2.6.13-rc6-mm2/arch/i386/kernel/acpi/boot.c~hotcpu-i386 2005-08-31 04:17:20.832038600 -0700
+++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/acpi/boot.c 2005-08-31 04:19:35.259602504 -0700
@@ -255,9 +255,11 @@ acpi_parse_lapic(acpi_table_entry_header
acpi_table_print_madt_entry(header);
+#ifndef CONFIG_HOTPLUG_CPU
/* no utility in registering a disabled processor */
if (processor->flags.enabled == 0)
return 0;
+#endif
x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
diff -puN arch/i386/kernel/mpparse.c~hotcpu-i386 arch/i386/kernel/mpparse.c
--- linux-2.6.13-rc6-mm2/arch/i386/kernel/mpparse.c~hotcpu-i386 2005-08-31 04:17:20.877031760 -0700
+++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/mpparse.c 2005-08-31 04:20:49.297347056 -0700
@@ -125,8 +125,10 @@ static void __init MP_processor_info (st
int ver, apicid, cpu, found_bsp = 0;
physid_mask_t tmp;
+#ifndef CONFIG_HOTPLUG_CPU
if (!(m->mpc_cpuflag & CPU_ENABLED))
return;
+#endif
apicid = mpc_apic_id(m, translation_table[mpc_record]);
@@ -190,11 +192,13 @@ static void __init MP_processor_info (st
return;
}
+#ifndef CONFIG_HOTPLUG_CPU
if (num_processors >= maxcpus) {
printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
" Processor ignored.\n", maxcpus);
return;
}
+#endif
num_processors++;
ver = m->mpc_apicver;
diff -puN arch/i386/kernel/smpboot.c~hotcpu-i386 arch/i386/kernel/smpboot.c
--- linux-2.6.13-rc6-mm2/arch/i386/kernel/smpboot.c~hotcpu-i386 2005-08-31 04:17:20.924024616 -0700
+++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/smpboot.c 2005-08-31 04:21:49.474198784 -0700
@@ -1003,9 +1003,10 @@ int __devinit smp_prepare_cpu(int cpu)
struct warm_boot_cpu_info info;
struct work_struct task;
int apicid, ret;
+ extern u8 bios_cpu_apicid[NR_CPUS];
lock_cpu_hotplug();
- apicid = x86_cpu_to_apicid[cpu];
+ apicid = bios_cpu_apicid[cpu];
if (apicid == BAD_APICID) {
ret = -ENODEV;
goto exit;
diff -puN arch/i386/mach-default/topology.c~hotcpu-i386 arch/i386/mach-default/topology.c
--- linux-2.6.13-rc6-mm2/arch/i386/mach-default/topology.c~hotcpu-i386 2005-08-31 04:17:20.957019600 -0700
+++ linux-2.6.13-rc6-mm2-root/arch/i386/mach-default/topology.c 2005-08-31 04:22:13.020619184 -0700
@@ -76,7 +76,7 @@ static int __init topology_init(void)
for_each_online_node(i)
arch_register_node(i);
- for_each_present_cpu(i)
+ for_each_cpu(i)
arch_register_cpu(i);
return 0;
}
@@ -87,7 +87,7 @@ static int __init topology_init(void)
{
int i;
- for_each_present_cpu(i)
+ for_each_cpu(i)
arch_register_cpu(i);
return 0;
}
diff -puN kernel/cpu.c~hotcpu-i386 kernel/cpu.c
--- linux-2.6.13-rc6-mm2/kernel/cpu.c~hotcpu-i386 2005-08-31 04:17:21.002012760 -0700
+++ linux-2.6.13-rc6-mm2-root/kernel/cpu.c 2005-08-31 04:23:34.378250944 -0700
@@ -158,7 +158,11 @@ int __devinit cpu_up(unsigned int cpu)
if ((ret = down_interruptible(&cpucontrol)) != 0)
return ret;
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu_online(cpu)) {
+#else
if (cpu_online(cpu) || !cpu_present(cpu)) {
+#endif
ret = -EINVAL;
goto out;
}
diff -puN arch/i386/kernel/irq.c~hotcpu-i386 arch/i386/kernel/irq.c
--- linux-2.6.13-rc6-mm2/arch/i386/kernel/irq.c~hotcpu-i386 2005-08-31 04:17:21.047005920 -0700
+++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/irq.c 2005-08-31 04:25:21.761926144 -0700
@@ -248,7 +248,7 @@ int show_interrupts(struct seq_file *p,
if (i == 0) {
seq_printf(p, " ");
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}
@@ -262,7 +262,7 @@ int show_interrupts(struct seq_file *p,
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
@@ -276,12 +276,12 @@ skip:
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
_
Hi,
On Wed, 2005-08-31 at 20:13 +0800, [email protected] wrote:
> Current IA32 CPU hotplug code doesn't allow bringing up processors
> that were not present in the boot configuration.
> To make existing hot plug facility more practical for physical hot
> plug, possible processors should be encountered
> during boot for potentual hot add/replace/remove. On ES7000, ACPI
> marks all the sockets that are empty or not assigned
> to the partitionas as "disabled". The patch allows arrays/masks with
> APIC info for disabled processors to be
> initialized. Then the OS can bring up a processor that was inserted in
> the socked and brought into configuration
> during runtime.
> To test the code, one can boot the system with maxcpu=1 and then bring
> the rest of the processors up, which was not
> possible so far (only maxcpus number of nodes were created).
> The patch also makes proc entry for interrupts dynamically change to
> only show current onlined processors.
Could we clean up the cpu_present_map a bit, like IA64 does? Eg, if a
new CPU is inserted, we allocated cpu id for it and set cpu_present_map.
Current alloc_cpu_id is just a workaround for suspend/resume use, but
isn't ok for physical cpu hotplug to me.
Thanks,
Shaohua
Hallo Natalie,
On Wednesday 31 August 2005 14:13, [email protected] wrote:
> Current IA32 CPU hotplug code doesn't allow bringing up processors that
> were not present in the boot configuration. To make existing hot plug
> facility more practical for physical hot plug, possible processors should
> be encountered during boot for potentual hot add/replace/remove. On ES7000,
> ACPI marks all the sockets that are empty or not assigned to the
> partitionas as "disabled".
Good idea. In fact I always hated the behaviour of the existing
hotplug code that assumes all possible CPUs can be hotplugged.
It would be much nicer to be told be the firmware what CPUs
are hotpluggable. It would be great if all ia32/x86-64 hotplug capable
BIOS behaved like your.
> struct warm_boot_cpu_info info;
> struct work_struct task;
> int apicid, ret;
> + extern u8 bios_cpu_apicid[NR_CPUS];
This should be in some header.
>
> lock_cpu_hotplug();
> - apicid = x86_cpu_to_apicid[cpu];
> + apicid = bios_cpu_apicid[cpu];
Why this change? It seems unrelated.
> if (apicid == BAD_APICID) {
> ret = -ENODEV;
> goto exit;
> diff -puN arch/i386/mach-default/topology.c~hotcpu-i386
> arch/i386/mach-default/topology.c ---
> linux-2.6.13-rc6-mm2/arch/i386/mach-default/topology.c~hotcpu-i386 2005-08-
>31 04:17:20.957019600 -0700 +++
> linux-2.6.13-rc6-mm2-root/arch/i386/mach-default/topology.c 2005-08-31
> 04:22:13.020619184 -0700 @@ -76,7 +76,7 @@ static int __init
> topology_init(void)
> for_each_online_node(i)
> arch_register_node(i);
>
> - for_each_present_cpu(i)
> + for_each_cpu(i)
This looks wrong. The CPUs should be in the present mask
if it's present. Followup code similar.
BTW shouldn't there be some attribute in sysfs that says
"CPU disabled"?
-Andi
We should probably also not to try to boot disabled cpus in smp_boot_cpus()...
--Mika
> Hallo Natalie,
>
> On Wednesday 31 August 2005 14:13,
> [email protected] wrote:
> > Current IA32 CPU hotplug code doesn't allow bringing up processors
> > that were not present in the boot configuration. To make
> existing hot
> > plug facility more practical for physical hot plug, possible
> > processors should be encountered during boot for potentual hot
> > add/replace/remove. On ES7000, ACPI marks all the sockets that are
> > empty or not assigned to the partitionas as "disabled".
>
> Good idea. In fact I always hated the behaviour of the
> existing hotplug code that assumes all possible CPUs can be
> hotplugged.
> It would be much nicer to be told be the firmware what CPUs
> are hotpluggable. It would be great if all ia32/x86-64
> hotplug capable BIOS behaved like your.
>
>
> > struct warm_boot_cpu_info info;
> > struct work_struct task;
> > int apicid, ret;
> > + extern u8 bios_cpu_apicid[NR_CPUS];
>
> This should be in some header.
>
> >
> > lock_cpu_hotplug();
> > - apicid = x86_cpu_to_apicid[cpu];
> > + apicid = bios_cpu_apicid[cpu];
>
> Why this change? It seems unrelated.
The problem is that x86_cpu_to_apicid[] only gets set for processors
that successfully came online in do_boot_cpu():
...
if (boot_error) {
/* Try to put things back the way they were before ...
*/
unmap_cpu_to_logical_apicid(cpu);
cpu_clear(cpu, cpu_callout_map); /* was set here
(do_boot_cpu()) */
cpu_clear(cpu, cpu_initialized); /* was set by
cpu_init() */
cpucount--;
} else {
x86_cpu_to_apicid[cpu] = apicid;
cpu_set(cpu, cpu_present_map);
}
...
And in case of the "new" CPU the "inactive" APIC ID is needed, which was
recorded in bios_cpu_apicid[].
> > if (apicid == BAD_APICID) {
> > ret = -ENODEV;
> > goto exit;
> > diff -puN arch/i386/mach-default/topology.c~hotcpu-i386
> > arch/i386/mach-default/topology.c ---
> >
> linux-2.6.13-rc6-mm2/arch/i386/mach-default/topology.c~hotc
> pu-i386 2005-08-
> >31 04:17:20.957019600 -0700 +++
> > linux-2.6.13-rc6-mm2-root/arch/i386/mach-default/topology.c
> 2005-08-31
> > 04:22:13.020619184 -0700 @@ -76,7 +76,7 @@ static int __init
> > topology_init(void)
> > for_each_online_node(i)
> > arch_register_node(i);
> >
> > - for_each_present_cpu(i)
> > + for_each_cpu(i)
>
> This looks wrong. The CPUs should be in the present mask
> if it's present. Followup code similar.
I changed it from present to possible CPUS to create nodes for both
present and absent CPUs, the latter have online=0. This way we can bring
them up if they become physically available.
> BTW shouldn't there be some attribute in sysfs that says
> "CPU disabled"?
I think that would be great. And maybe __cpu_up() should log a message.
> -Andi
>
> We should probably also not to try to boot disabled cpus in
> smp_boot_cpus()...
>
> --Mika
>
Good point, probably by not setting phys_cpu_present_map for those in
MP_processor_info()...
Thanks,
--Natalie
>
[email protected] wrote:
> +#ifdef CONFIG_HOTPLUG_CPU
> + if (cpu_online(cpu)) {
> +#else
> if (cpu_online(cpu) || !cpu_present(cpu)) {
> +#endif
> ret = -EINVAL;
> goto out;
> }
Why this change? I think the cpu_present check is needed for ppc64
since it has non-present cpus in sysfs.
On Wed, Aug 31, 2005 at 05:13:10AM -0700, [email protected] wrote:
Hi Natalie,
sorry for the late catchup...
Just to make sure we use the cpu maps properly here is the current definition
iam not sure if you are breaking any of these assumptions, cause if you do
you will break existing implementations... so pardon for the verbosity.
cpu_possible_map - must include all possible cpus that can ever exist in this
instance of boot. This map is static, i.e you can grow
or remove bits without causing confution to the rest of the
subsystem. We attempted to change, but there was not much
in return for the savings, for the complexity we would
add to make this map dynamic.
cpu_present_map - indicates that these are present and operational. OS
can decide if they should be brought up or not.
when CONFIG_HOTPLUG_CPU is *not* defined, then cpu_possible_map and
cpu_present_map are the same. When CONFIG_HOTPLUG_CPU=y, then we extend
cpu_possible_map to cover the range of NR_CPUS, but cpu_present_map still shows
what is real.
When a physical hotplug happens, cpu_present_map is updated by who ever manages
the physical hotplug.
>
> Current IA32 CPU hotplug code doesn't allow bringing up processors that were not present in the boot configuration.
> To make existing hot plug facility more practical for physical hot plug, possible processors should be encountered
> during boot for potentual hot add/replace/remove. On ES7000, ACPI marks all the sockets that are empty or not assigned
> to the partitionas as "disabled". The patch allows arrays/masks with APIC info for disabled processors to be
This sounds like a cluge to me. The correct implementation would be you would
need some sysmgmt deamon or something that works with the kernel to notify of
new cpus and populate apicid and grow cpu_present_map. Making an assumption that
disabled APICID are valid for ES7000 sake is not a safe assumption.
> initialized. Then the OS can bring up a processor that was inserted in the socked and brought into configuration
> during runtime.
> To test the code, one can boot the system with maxcpu=1 and then bring the rest of the processors up, which was not
> possible so far (only maxcpus number of nodes were created).
> The patch also makes proc entry for interrupts dynamically change to only show current onlined processors.
>
> Signed-off-by: Natalie Protasevich <[email protected]>
> ---
>
> arch/i386/kernel/acpi/boot.c | 2 ++
> arch/i386/kernel/irq.c | 8 ++++----
> arch/i386/kernel/mpparse.c | 4 ++++
> arch/i386/kernel/smpboot.c | 3 ++-
> arch/i386/mach-default/topology.c | 4 ++--
> kernel/cpu.c | 4 ++++
> 6 files changed, 18 insertions(+), 7 deletions(-)
>
> diff -puN arch/i386/kernel/acpi/boot.c~hotcpu-i386 arch/i386/kernel/acpi/boot.c
> --- linux-2.6.13-rc6-mm2/arch/i386/kernel/acpi/boot.c~hotcpu-i386 2005-08-31 04:17:20.832038600 -0700
> +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/acpi/boot.c 2005-08-31 04:19:35.259602504 -0700
> @@ -255,9 +255,11 @@ acpi_parse_lapic(acpi_table_entry_header
>
> acpi_table_print_madt_entry(header);
>
> +#ifndef CONFIG_HOTPLUG_CPU
> /* no utility in registering a disabled processor */
> if (processor->flags.enabled == 0)
> return 0;
> +#endif
On same lines, the above config code seems not appropriate. When your platform
needs to hot-add a cpu, find a way to set these flags, you cant break
current behaviour as for a platform quirk!
>
> x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
>
> diff -puN arch/i386/kernel/mpparse.c~hotcpu-i386 arch/i386/kernel/mpparse.c
> --- linux-2.6.13-rc6-mm2/arch/i386/kernel/mpparse.c~hotcpu-i386 2005-08-31 04:17:20.877031760 -0700
> +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/mpparse.c 2005-08-31 04:20:49.297347056 -0700
> @@ -125,8 +125,10 @@ static void __init MP_processor_info (st
> int ver, apicid, cpu, found_bsp = 0;
> physid_mask_t tmp;
>
> +#ifndef CONFIG_HOTPLUG_CPU
> if (!(m->mpc_cpuflag & CPU_ENABLED))
> return;
> +#endif
Same here... platform quirk!
>
> apicid = mpc_apic_id(m, translation_table[mpc_record]);
>
> @@ -190,11 +192,13 @@ static void __init MP_processor_info (st
> return;
> }
>
> +#ifndef CONFIG_HOTPLUG_CPU
> if (num_processors >= maxcpus) {
> printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
> " Processor ignored.\n", maxcpus);
> return;
> }
> +#endif
> num_processors++;
> ver = m->mpc_apicver;
>
> diff -puN arch/i386/kernel/smpboot.c~hotcpu-i386 arch/i386/kernel/smpboot.c
> --- linux-2.6.13-rc6-mm2/arch/i386/kernel/smpboot.c~hotcpu-i386 2005-08-31 04:17:20.924024616 -0700
> +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/smpboot.c 2005-08-31 04:21:49.474198784 -0700
> @@ -1003,9 +1003,10 @@ int __devinit smp_prepare_cpu(int cpu)
> struct warm_boot_cpu_info info;
> struct work_struct task;
> int apicid, ret;
> + extern u8 bios_cpu_apicid[NR_CPUS];
>
> lock_cpu_hotplug();
> - apicid = x86_cpu_to_apicid[cpu];
> + apicid = bios_cpu_apicid[cpu];
> if (apicid == BAD_APICID) {
> ret = -ENODEV;
> goto exit;
> diff -puN arch/i386/mach-default/topology.c~hotcpu-i386 arch/i386/mach-default/topology.c
> --- linux-2.6.13-rc6-mm2/arch/i386/mach-default/topology.c~hotcpu-i386 2005-08-31 04:17:20.957019600 -0700
> +++ linux-2.6.13-rc6-mm2-root/arch/i386/mach-default/topology.c 2005-08-31 04:22:13.020619184 -0700
> @@ -76,7 +76,7 @@ static int __init topology_init(void)
> for_each_online_node(i)
> arch_register_node(i);
>
> - for_each_present_cpu(i)
> + for_each_cpu(i)
Nope. Should still be for present_cpus. with NR_CPUS large we would see
way too many files in sysfs than what is really available.
> arch_register_cpu(i);
> return 0;
> }
> @@ -87,7 +87,7 @@ static int __init topology_init(void)
> {
> int i;
>
> - for_each_present_cpu(i)
> + for_each_cpu(i)
> arch_register_cpu(i);
> return 0;
> }
> diff -puN kernel/cpu.c~hotcpu-i386 kernel/cpu.c
> --- linux-2.6.13-rc6-mm2/kernel/cpu.c~hotcpu-i386 2005-08-31 04:17:21.002012760 -0700
> +++ linux-2.6.13-rc6-mm2-root/kernel/cpu.c 2005-08-31 04:23:34.378250944 -0700
> @@ -158,7 +158,11 @@ int __devinit cpu_up(unsigned int cpu)
> if ((ret = down_interruptible(&cpucontrol)) != 0)
> return ret;
>
> +#ifdef CONFIG_HOTPLUG_CPU
> + if (cpu_online(cpu)) {
> +#else
> if (cpu_online(cpu) || !cpu_present(cpu)) {
> +#endif
ditto.....
> ret = -EINVAL;
> goto out;
> }
> diff -puN arch/i386/kernel/irq.c~hotcpu-i386 arch/i386/kernel/irq.c
> --- linux-2.6.13-rc6-mm2/arch/i386/kernel/irq.c~hotcpu-i386 2005-08-31 04:17:21.047005920 -0700
> +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/irq.c 2005-08-31 04:25:21.761926144 -0700
> @@ -248,7 +248,7 @@ int show_interrupts(struct seq_file *p,
>
> if (i == 0) {
> seq_printf(p, " ");
> - for_each_cpu(j)
> + for_each_online_cpu(j)
> seq_printf(p, "CPU%d ",j);
> seq_putc(p, '\n');
> }
> @@ -262,7 +262,7 @@ int show_interrupts(struct seq_file *p,
> #ifndef CONFIG_SMP
> seq_printf(p, "%10u ", kstat_irqs(i));
> #else
> - for_each_cpu(j)
> + for_each_online_cpu(j)
> seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
> #endif
> seq_printf(p, " %14s", irq_desc[i].handler->typename);
> @@ -276,12 +276,12 @@ skip:
> spin_unlock_irqrestore(&irq_desc[i].lock, flags);
> } else if (i == NR_IRQS) {
> seq_printf(p, "NMI: ");
> - for_each_cpu(j)
> + for_each_online_cpu(j)
> seq_printf(p, "%10u ", nmi_count(j));
> seq_putc(p, '\n');
> #ifdef CONFIG_X86_LOCAL_APIC
> seq_printf(p, "LOC: ");
> - for_each_cpu(j)
> + for_each_online_cpu(j)
> seq_printf(p, "%10u ",
> per_cpu(irq_stat,j).apic_timer_irqs);
> seq_putc(p, '\n');
> _
--
Cheers,
Ashok Raj
- Open Source Technology Center
On Thu, Sep 01, 2005 at 10:45:10AM +0200, Andi Kleen wrote:
> Hallo Natalie,
>
> On Wednesday 31 August 2005 14:13, [email protected] wrote:
> > Current IA32 CPU hotplug code doesn't allow bringing up processors that
> > were not present in the boot configuration. To make existing hot plug
> > facility more practical for physical hot plug, possible processors should
> > be encountered during boot for potentual hot add/replace/remove. On ES7000,
> > ACPI marks all the sockets that are empty or not assigned to the
> > partitionas as "disabled".
>
> Good idea. In fact I always hated the behaviour of the existing
> hotplug code that assumes all possible CPUs can be hotplugged.
> It would be much nicer to be told be the firmware what CPUs
> are hotpluggable. It would be great if all ia32/x86-64 hotplug capable
> BIOS behaved like your.
>
Andi, you are getting mixed up with software only ability to offline with
hardware eject capability. ACPI indicates ability to hotplug by the presence
of _EJD in the appropriate scope of the object. So ACPI does have ability to
do what you mention above precicely, but the entire namespace is not known
upfront since some could be dynamically loaded. Which is why we need to show
the entire NR_CPUS as hotpluggable.
Possibly we can keep cpu_possible_map as NR_CPUS only when support for
PHYSICAL_CPU_HOTPLUG is present, otherwise we can keep it
cloned as cpu_present_map. (we dont have a generic PHYSICAL hotplug CONFIG
option today)
What CONFIG_HOTPLUG_CPU=y indicates is ability to offline a processor from the
kernel. It DOES NOT indicate physical hotpluggablity. So we dont need any
hardware support (apart arch/kernel support) for this to work. Support
for physical hotplug is indicated via CONFIG_ACPI_HOTPLUG_CPU.
Be aware that suspend/resume folks using CPU hotplug to offline CPUS except
BSP need just the kernel support to offline. BIOS has nothing to do with
being able to offline a CPU (preferably called as soft-removal).
Cheers,
ashok
> Hi Natalie,
>
> sorry for the late catchup...
>
> Just to make sure we use the cpu maps properly here is the
> current definition iam not sure if you are breaking any of
> these assumptions, cause if you do you will break existing
> implementations... so pardon for the verbosity.
>
> cpu_possible_map - must include all possible cpus that can
> ever exist in this
> instance of boot. This map is static, i.e
> you can grow
> or remove bits without causing confution
> to the rest of the
> subsystem. We attempted to change, but
> there was not much
> in return for the savings, for the
> complexity we would
> add to make this map dynamic.
>
> cpu_present_map - indicates that these are present and operational. OS
> can decide if they should be brought up or not.
>
> when CONFIG_HOTPLUG_CPU is *not* defined, then
> cpu_possible_map and cpu_present_map are the same. When
> CONFIG_HOTPLUG_CPU=y, then we extend cpu_possible_map to
> cover the range of NR_CPUS, but cpu_present_map still shows
> what is real.
>
> When a physical hotplug happens, cpu_present_map is updated
> by who ever manages the physical hotplug.
> >
> > Current IA32 CPU hotplug code doesn't allow bringing up
> processors that were not present in the boot configuration.
> > To make existing hot plug facility more practical for physical hot
> > plug, possible processors should be encountered during boot for
> > potentual hot add/replace/remove. On ES7000, ACPI marks all the
> > sockets that are empty or not assigned to the partitionas as
> > "disabled". The patch allows arrays/masks with APIC info
> for disabled
> > processors to be
>
> This sounds like a cluge to me. The correct implementation
> would be you would need some sysmgmt deamon or something that
> works with the kernel to notify of new cpus and populate
> apicid and grow cpu_present_map. Making an assumption that
> disabled APICID are valid for ES7000 sake is not a safe assumption.
Yes, this is a kludge, I realize that. The AML code was not there so far
(it will be in the next one). I have a point here though that if the
processor is there, but is unusable (what "disabled" means as the ACPI
spec says), meaning bad maybe, then with physical hot plug it can
certainly be made usable and I think it should be taken into
consideration (and into configuration). It should be counted as possible
at least, with hot plug, because it represent existing socket.
> > initialized. Then the OS can bring up a processor that was
> inserted in
> > the socked and brought into configuration during runtime.
> > To test the code, one can boot the system with maxcpu=1 and
> then bring
> > the rest of the processors up, which was not possible so
> far (only maxcpus number of nodes were created).
> > The patch also makes proc entry for interrupts dynamically
> change to only show current onlined processors.
> >
> > Signed-off-by: Natalie Protasevich <[email protected]>
> > ---
> >
> > arch/i386/kernel/acpi/boot.c | 2 ++
> > arch/i386/kernel/irq.c | 8 ++++----
> > arch/i386/kernel/mpparse.c | 4 ++++
> > arch/i386/kernel/smpboot.c | 3 ++-
> > arch/i386/mach-default/topology.c | 4 ++--
> > kernel/cpu.c | 4 ++++
> > 6 files changed, 18 insertions(+), 7 deletions(-)
> >
> > diff -puN arch/i386/kernel/acpi/boot.c~hotcpu-i386
> arch/i386/kernel/acpi/boot.c
> > ---
> linux-2.6.13-rc6-mm2/arch/i386/kernel/acpi/boot.c~hotcpu-i386
> 2005-08-31 04:17:20.832038600 -0700
> > +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/acpi/boot.c
> 2005-08-31 04:19:35.259602504 -0700
> > @@ -255,9 +255,11 @@ acpi_parse_lapic(acpi_table_entry_header
> >
> > acpi_table_print_madt_entry(header);
> >
> > +#ifndef CONFIG_HOTPLUG_CPU
> > /* no utility in registering a disabled processor */
> > if (processor->flags.enabled == 0)
> > return 0;
> > +#endif
>
> On same lines, the above config code seems not appropriate.
> When your platform needs to hot-add a cpu, find a way to set
> these flags, you cant break current behaviour as for a platform quirk!
> >
> > x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
> >
> > diff -puN arch/i386/kernel/mpparse.c~hotcpu-i386
> arch/i386/kernel/mpparse.c
> > ---
> linux-2.6.13-rc6-mm2/arch/i386/kernel/mpparse.c~hotcpu-i386
> 2005-08-31 04:17:20.877031760 -0700
> > +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/mpparse.c
> 2005-08-31 04:20:49.297347056 -0700
> > @@ -125,8 +125,10 @@ static void __init MP_processor_info (st
> > int ver, apicid, cpu, found_bsp = 0;
> > physid_mask_t tmp;
> >
> > +#ifndef CONFIG_HOTPLUG_CPU
> > if (!(m->mpc_cpuflag & CPU_ENABLED))
> > return;
> > +#endif
>
> Same here... platform quirk!
Yes it is indeed :) because it is masking absent processors as disabled.
And again I believe (correct me if I'm wrong) the disabled processors
should be counted as possible with hot plug (physical) in any case. This
patch is not actually covering physical hot plug, all it does is making
room for say hot swap of failed processors. How about making platform
hooks here for non-conforming (but not prohibited!) hot plug methods. In
fact the one used now on ES7000 is very intuitive and based on
assumption that "disabled" case is handled properly.
> >
> > apicid = mpc_apic_id(m, translation_table[mpc_record]);
> >
> > @@ -190,11 +192,13 @@ static void __init MP_processor_info (st
> > return;
> > }
> >
> > +#ifndef CONFIG_HOTPLUG_CPU
> > if (num_processors >= maxcpus) {
> > printk(KERN_WARNING "WARNING: maxcpus limit of
> %i reached."
> > " Processor ignored.\n", maxcpus);
> > return;
> > }
> > +#endif
> > num_processors++;
> > ver = m->mpc_apicver;
> >
> > diff -puN arch/i386/kernel/smpboot.c~hotcpu-i386
> arch/i386/kernel/smpboot.c
> > ---
> linux-2.6.13-rc6-mm2/arch/i386/kernel/smpboot.c~hotcpu-i386
> 2005-08-31 04:17:20.924024616 -0700
> > +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/smpboot.c
> 2005-08-31 04:21:49.474198784 -0700
> > @@ -1003,9 +1003,10 @@ int __devinit smp_prepare_cpu(int cpu)
> > struct warm_boot_cpu_info info;
> > struct work_struct task;
> > int apicid, ret;
> > + extern u8 bios_cpu_apicid[NR_CPUS];
> >
> > lock_cpu_hotplug();
> > - apicid = x86_cpu_to_apicid[cpu];
> > + apicid = bios_cpu_apicid[cpu];
> > if (apicid == BAD_APICID) {
> > ret = -ENODEV;
> > goto exit;
> > diff -puN arch/i386/mach-default/topology.c~hotcpu-i386
> arch/i386/mach-default/topology.c
> > ---
> linux-2.6.13-rc6-mm2/arch/i386/mach-default/topology.c~hotc
> pu-i386 2005-08-31 04:17:20.957019600 -0700
> > +++
> linux-2.6.13-rc6-mm2-root/arch/i386/mach-default/topology.c
> 2005-08-31 04:22:13.020619184 -0700
> > @@ -76,7 +76,7 @@ static int __init topology_init(void)
> > for_each_online_node(i)
> > arch_register_node(i);
> >
> > - for_each_present_cpu(i)
> > + for_each_cpu(i)
>
> Nope. Should still be for present_cpus. with NR_CPUS large we
> would see way too many files in sysfs than what is really available.
Actually, it will only create as many nodes as listed in ACPI (enabled
or disabled). If you run it on regular system with maxcpus=1, you'll
only see the actual processor nodes (offlined beyond maxcpus).
> > arch_register_cpu(i);
> > return 0;
> > }
> > @@ -87,7 +87,7 @@ static int __init topology_init(void) {
> > int i;
> >
> > - for_each_present_cpu(i)
> > + for_each_cpu(i)
> > arch_register_cpu(i);
> > return 0;
> > }
> > diff -puN kernel/cpu.c~hotcpu-i386 kernel/cpu.c
> > --- linux-2.6.13-rc6-mm2/kernel/cpu.c~hotcpu-i386
> 2005-08-31 04:17:21.002012760 -0700
> > +++ linux-2.6.13-rc6-mm2-root/kernel/cpu.c 2005-08-31
> 04:23:34.378250944 -0700
> > @@ -158,7 +158,11 @@ int __devinit cpu_up(unsigned int cpu)
> > if ((ret = down_interruptible(&cpucontrol)) != 0)
> > return ret;
> >
> > +#ifdef CONFIG_HOTPLUG_CPU
> > + if (cpu_online(cpu)) {
> > +#else
> > if (cpu_online(cpu) || !cpu_present(cpu)) {
> > +#endif
>
> ditto.....
I'm quite agree with you on proper implementation of physical hot plug,
but "disabled" case has to be handled correctly, and it should be some
flexibility built in. I don't think there is enough right now even with
proper ACPI based implementation.
Thanks,
--Natalie
> > ret = -EINVAL;
> > goto out;
> > }
> > diff -puN arch/i386/kernel/irq.c~hotcpu-i386 arch/i386/kernel/irq.c
> > --- linux-2.6.13-rc6-mm2/arch/i386/kernel/irq.c~hotcpu-i386
> 2005-08-31 04:17:21.047005920 -0700
> > +++ linux-2.6.13-rc6-mm2-root/arch/i386/kernel/irq.c
> 2005-08-31 04:25:21.761926144 -0700
> > @@ -248,7 +248,7 @@ int show_interrupts(struct seq_file *p,
> >
> > if (i == 0) {
> > seq_printf(p, " ");
> > - for_each_cpu(j)
> > + for_each_online_cpu(j)
> > seq_printf(p, "CPU%d ",j);
> > seq_putc(p, '\n');
> > }
> > @@ -262,7 +262,7 @@ int show_interrupts(struct seq_file *p,
> #ifndef
> > CONFIG_SMP
> > seq_printf(p, "%10u ", kstat_irqs(i)); #else
> > - for_each_cpu(j)
> > + for_each_online_cpu(j)
> > seq_printf(p, "%10u ",
> kstat_cpu(j).irqs[i]); #endif
> > seq_printf(p, " %14s",
> irq_desc[i].handler->typename); @@ -276,12
> > +276,12 @@ skip:
> > spin_unlock_irqrestore(&irq_desc[i].lock, flags);
> > } else if (i == NR_IRQS) {
> > seq_printf(p, "NMI: ");
> > - for_each_cpu(j)
> > + for_each_online_cpu(j)
> > seq_printf(p, "%10u ", nmi_count(j));
> > seq_putc(p, '\n');
> > #ifdef CONFIG_X86_LOCAL_APIC
> > seq_printf(p, "LOC: ");
> > - for_each_cpu(j)
> > + for_each_online_cpu(j)
> > seq_printf(p, "%10u ",
> > per_cpu(irq_stat,j).apic_timer_irqs);
> > seq_putc(p, '\n');
> > _
>
> --
> Cheers,
> Ashok Raj
> - Open Source Technology Center
>
On Thu, Sep 01, 2005 at 04:09:09PM -0500, Protasevich, Natalie wrote:
> >
> > > Current IA32 CPU hotplug code doesn't allow bringing up
> > processors that were not present in the boot configuration.
> > > To make existing hot plug facility more practical for physical hot
> > > plug, possible processors should be encountered during boot for
> > > potentual hot add/replace/remove. On ES7000, ACPI marks all the
> > > sockets that are empty or not assigned to the partitionas as
> > > "disabled". The patch allows arrays/masks with APIC info
> > for disabled
> > > processors to be
> >
> > This sounds like a cluge to me. The correct implementation
> > would be you would need some sysmgmt deamon or something that
> > works with the kernel to notify of new cpus and populate
> > apicid and grow cpu_present_map. Making an assumption that
> > disabled APICID are valid for ES7000 sake is not a safe assumption.
>
> Yes, this is a kludge, I realize that. The AML code was not there so far
> (it will be in the next one). I have a point here though that if the
> processor is there, but is unusable (what "disabled" means as the ACPI
> spec says), meaning bad maybe, then with physical hot plug it can
> certainly be made usable and I think it should be taken into
> consideration (and into configuration). It should be counted as possible
> at least, with hot plug, because it represent existing socket.
I think marking it as present, and considering in cpu_possible_map is perfectly
ok. But we would need more glue logic, that is if firmware marked it as
disabled, then one would expect you then run _STA and find that the CPU
is now present and functional as reported by _STA, then the CPU is onlinable.
So if _STA can work favorably in your case you can use it to override the
disabled setting at boot time which would be prefectly fine.
>
--
Cheers,
Ashok Raj
- Open Source Technology Center
> -----Original Message-----
> From: Ashok Raj [mailto:[email protected]]
> Sent: Thursday, September 01, 2005 3:27 PM
> To: Protasevich, Natalie
> Cc: Ashok Raj; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Subject: Re: [patch 1/1] Hot plug CPU to support physical add
> of new processors (i386)
>
> On Thu, Sep 01, 2005 at 04:09:09PM -0500, Protasevich, Natalie wrote:
> > >
> > > > Current IA32 CPU hotplug code doesn't allow bringing up
> > > processors that were not present in the boot configuration.
> > > > To make existing hot plug facility more practical for
> physical hot
> > > > plug, possible processors should be encountered during boot for
> > > > potentual hot add/replace/remove. On ES7000, ACPI marks all the
> > > > sockets that are empty or not assigned to the partitionas as
> > > > "disabled". The patch allows arrays/masks with APIC info
> > > for disabled
> > > > processors to be
> > >
> > > This sounds like a cluge to me. The correct
> implementation would be
> > > you would need some sysmgmt deamon or something that
> works with the
> > > kernel to notify of new cpus and populate apicid and grow
> > > cpu_present_map. Making an assumption that disabled
> APICID are valid
> > > for ES7000 sake is not a safe assumption.
> >
> > Yes, this is a kludge, I realize that. The AML code was not
> there so
> > far (it will be in the next one). I have a point here
> though that if
> > the processor is there, but is unusable (what "disabled"
> means as the
> > ACPI spec says), meaning bad maybe, then with physical hot
> plug it can
> > certainly be made usable and I think it should be taken into
> > consideration (and into configuration). It should be counted as
> > possible at least, with hot plug, because it represent
> existing socket.
>
>
> I think marking it as present, and considering in
> cpu_possible_map is perfectly ok. But we would need more glue
> logic, that is if firmware marked it as disabled, then one
> would expect you then run _STA and find that the CPU is now
> present and functional as reported by _STA, then the CPU is onlinable.
>
> So if _STA can work favorably in your case you can use it to
> override the disabled setting at boot time which would be
> prefectly fine.
> >
We have a server control mechanism that sends appropriate messages and
notifications to the OS. The BIOS handles all the hardware level stuff,
including bringing the processor into appropriate state. The user level
driver handles those messages and uses OS facilities to bring up
processors and make them operational.
--Natalie
> --
> Cheers,
> Ashok Raj
> - Open Source Technology Center
>