2010-12-07 02:30:50

by Zheng, Shaohui

[permalink] [raw]
Subject: [5/7,v8] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

From: Shaohui Zheng <[email protected]>

CPU physical hot-add/hot-remove are supported on some hardwares, and it
was already supported in current linux kernel. NUMA Hotplug Emulator provides
a mechanism to emulate the process with software method. It can be used for
testing or debuging purpose.

CPU physical hotplug is different with logical CPU online/offline. Logical
online/offline is controled by interface /sys/device/cpu/cpuX/online. CPU
hotplug emulator uses probe/release interface. It becomes possible to do cpu
hotplug automation and stress

Add cpu interface probe/release under sysfs for x86_64. User can use this
interface to emulate the cpu hot-add and hot-remove process.

Directive:
*) Reserve CPU thru grub parameter like:
maxcpus=4

the rest CPUs will not be initiliazed.

*) Probe CPU
we can use the probe interface to hot-add new CPUs:
echo nid > /sys/devices/system/cpu/probe

*) Release a CPU
echo cpu > /sys/devices/system/cpu/release

A reserved CPU will be hot-added to the specified node.
1) nid == 0, the CPU will be added to the real node which the CPU
should be in
2) nid != 0, add the CPU to node nid even through it is a fake node.

CC: Ingo Molnar <[email protected]>
CC: Len Brown <[email protected]>
CC: Yinghai Lu <[email protected]>
Signed-off-by: Shaohui Zheng <[email protected]>
Signed-off-by: Haicheng Li <[email protected]>
---
Index: linux-hpe4/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/acpi/boot.c 2010-11-26 09:24:40.287725018 +0800
+++ linux-hpe4/arch/x86/kernel/acpi/boot.c 2010-11-26 09:24:53.277724996 +0800
@@ -647,8 +647,44 @@
}
EXPORT_SYMBOL(acpi_map_lsapic);

+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static void acpi_map_cpu2node_emu(int cpu, int physid, int nid)
+{
+#ifdef CONFIG_ACPI_NUMA
+#ifdef CONFIG_X86_64
+ apicid_to_node[physid] = nid;
+ numa_set_node(cpu, nid);
+#else /* CONFIG_X86_32 */
+ apicid_2_node[physid] = nid;
+ cpu_to_node_map[cpu] = nid;
+#endif
+#endif
+}
+
+static u16 cpu_to_apicid_saved[CONFIG_NR_CPUS];
+int __ref acpi_map_lsapic_emu(int pcpu, int nid)
+{
+ /* backup cpu apicid to array cpu_to_apicid_saved */
+ if (cpu_to_apicid_saved[pcpu] == 0 &&
+ per_cpu(x86_cpu_to_apicid, pcpu) != BAD_APICID)
+ cpu_to_apicid_saved[pcpu] = per_cpu(x86_cpu_to_apicid, pcpu);
+
+ per_cpu(x86_cpu_to_apicid, pcpu) = cpu_to_apicid_saved[pcpu];
+ acpi_map_cpu2node_emu(pcpu, per_cpu(x86_cpu_to_apicid, pcpu), nid);
+
+ return pcpu;
+}
+EXPORT_SYMBOL(acpi_map_lsapic_emu);
+#endif
+
int acpi_unmap_lsapic(int cpu)
{
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ /* backup cpu apicid to array cpu_to_apicid_saved */
+ if (cpu_to_apicid_saved[cpu] == 0 &&
+ per_cpu(x86_cpu_to_apicid, cpu) != BAD_APICID)
+ cpu_to_apicid_saved[cpu] = per_cpu(x86_cpu_to_apicid, cpu);
+#endif
per_cpu(x86_cpu_to_apicid, cpu) = -1;
set_cpu_present(cpu, false);
num_processors--;
Index: linux-hpe4/arch/x86/kernel/smpboot.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/smpboot.c 2010-11-26 09:24:40.297724969 +0800
+++ linux-hpe4/arch/x86/kernel/smpboot.c 2010-11-26 12:48:58.977725001 +0800
@@ -107,8 +107,6 @@
mutex_unlock(&x86_cpu_hotplug_driver_mutex);
}

-ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
-ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
#else
static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
Index: linux-hpe4/arch/x86/kernel/topology.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/topology.c 2010-11-26 09:24:52.477725000 +0800
+++ linux-hpe4/arch/x86/kernel/topology.c 2010-11-26 12:48:58.987725001 +0800
@@ -30,6 +30,9 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/cpu.h>
+#include <linux/cpu.h>
+#include <linux/topology.h>
+#include <linux/acpi.h>

static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);

@@ -66,6 +69,74 @@
unregister_cpu(&per_cpu(cpu_devices, num).cpu);
}
EXPORT_SYMBOL(arch_unregister_cpu);
+
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+ int nid = 0;
+ int num = 0, selected = 0;
+
+ /* check parameters */
+ if (!buf || count < 2)
+ return -EPERM;
+
+ nid = simple_strtoul(buf, NULL, 0);
+ printk(KERN_DEBUG "Add a cpu to node : %d\n", nid);
+
+ if (nid < 0 || nid > nr_node_ids - 1) {
+ printk(KERN_ERR "Invalid NUMA node id: %d (0 <= nid < %d).\n",
+ nid, nr_node_ids);
+ return -EPERM;
+ }
+
+ if (!node_online(nid)) {
+ printk(KERN_ERR "NUMA node %d is not online, give up.\n", nid);
+ return -EPERM;
+ }
+
+ /* find first uninitialized cpu */
+ for_each_present_cpu(num) {
+ if (per_cpu(cpu_sys_devices, num) == NULL) {
+ selected = num;
+ break;
+ }
+ }
+
+ if (selected >= num_possible_cpus()) {
+ printk(KERN_ERR "No free cpu, give up cpu probing.\n");
+ return -EPERM;
+ }
+
+ /* register cpu */
+ arch_register_cpu_node(selected, nid);
+ acpi_map_lsapic_emu(selected, nid);
+
+ return count;
+}
+EXPORT_SYMBOL(arch_cpu_probe);
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+ int cpu = 0;
+
+ cpu = simple_strtoul(buf, NULL, 0);
+ /* cpu 0 is not hotplugable */
+ if (cpu == 0) {
+ printk(KERN_ERR "can not release cpu 0.\n");
+ return -EPERM;
+ }
+
+ if (cpu_online(cpu)) {
+ printk(KERN_DEBUG "offline cpu %d.\n", cpu);
+ cpu_down(cpu);
+ }
+
+ arch_unregister_cpu(cpu);
+ acpi_unmap_lsapic(cpu);
+
+ return count;
+}
+EXPORT_SYMBOL(arch_cpu_release);
+
#else /* CONFIG_HOTPLUG_CPU */

static int __init arch_register_cpu(int num)
@@ -83,8 +154,14 @@
register_one_node(i);
#endif

- for_each_present_cpu(i)
- arch_register_cpu(i);
+ /*
+ * when cpu hotplug emulation enabled, register the online cpu only,
+ * the rests are reserved for cpu probe.
+ */
+ for_each_present_cpu(i) {
+ if ((cpu_hpe_on && cpu_online(i)) || !cpu_hpe_on)
+ arch_register_cpu(i);
+ }

return 0;
}
Index: linux-hpe4/arch/x86/mm/numa_64.c
===================================================================
--- linux-hpe4.orig/arch/x86/mm/numa_64.c 2010-11-26 09:24:40.317724965 +0800
+++ linux-hpe4/arch/x86/mm/numa_64.c 2010-11-26 09:24:53.297725001 +0800
@@ -12,6 +12,7 @@
#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
+#include <linux/cpu.h>

#include <asm/e820.h>
#include <asm/proto.h>
@@ -785,6 +786,19 @@
}
#endif

+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static __init int cpu_hpe_setup(char *opt)
+{
+ if (!opt)
+ return -EINVAL;
+
+ if (!strncmp(opt, "on", 2) || !strncmp(opt, "1", 1))
+ cpu_hpe_on = 1;
+
+ return 0;
+}
+early_param("cpu_hpe", cpu_hpe_setup);
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

void __cpuinit numa_set_node(int cpu, int node)
{
Index: linux-hpe4/drivers/acpi/processor_driver.c
===================================================================
--- linux-hpe4.orig/drivers/acpi/processor_driver.c 2010-11-26 09:24:40.327725004 +0800
+++ linux-hpe4/drivers/acpi/processor_driver.c 2010-11-26 09:24:53.297725001 +0800
@@ -530,6 +530,14 @@
goto err_free_cpumask;

sysdev = get_cpu_sysdev(pr->id);
+ /*
+ * Reserve cpu for hotplug emulation, the reserved cpu can be hot-added
+ * throu the cpu probe interface. Return directly.
+ */
+ if (sysdev == NULL) {
+ goto out;
+ }
+
if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
result = -EFAULT;
goto err_remove_fs;
@@ -570,6 +578,7 @@
goto err_remove_sysfs;
}

+out:
return 0;

err_remove_sysfs:
Index: linux-hpe4/drivers/base/cpu.c
===================================================================
--- linux-hpe4.orig/drivers/base/cpu.c 2010-11-26 09:24:52.477725000 +0800
+++ linux-hpe4/drivers/base/cpu.c 2010-11-26 09:24:53.297725001 +0800
@@ -22,9 +22,15 @@
};
EXPORT_SYMBOL(cpu_sysdev_class);

-static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
+DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);

#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu_hpe_on is a switch to enable/disable cpu hotplug emulation. it is
+ * disabled in default, we can enable it throu grub parameter cpu_hpe=on
+ */
+int cpu_hpe_on;
+
static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
char *buf)
{
Index: linux-hpe4/include/linux/acpi.h
===================================================================
--- linux-hpe4.orig/include/linux/acpi.h 2010-11-26 09:24:40.347725041 +0800
+++ linux-hpe4/include/linux/acpi.h 2010-11-26 09:24:53.297725001 +0800
@@ -102,6 +102,7 @@
#ifdef CONFIG_ACPI_HOTPLUG_CPU
/* Arch dependent functions for cpu hotplug support */
int acpi_map_lsapic(acpi_handle handle, int *pcpu);
+int acpi_map_lsapic_emu(int pcpu, int nid);
int acpi_unmap_lsapic(int cpu);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */

Index: linux-hpe4/include/linux/cpu.h
===================================================================
--- linux-hpe4.orig/include/linux/cpu.h 2010-11-26 09:24:52.477725000 +0800
+++ linux-hpe4/include/linux/cpu.h 2010-11-26 09:24:53.297725001 +0800
@@ -30,6 +30,8 @@
struct sys_device sysdev;
};

+DECLARE_PER_CPU(struct sys_device *, cpu_sys_devices);
+
extern int register_cpu_node(struct cpu *cpu, int num, int nid);

static inline int register_cpu(struct cpu *cpu, int num)
@@ -149,6 +151,7 @@
#define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
int cpu_down(unsigned int cpu);
+extern int cpu_hpe_on;

#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
extern void cpu_hotplug_driver_lock(void);
@@ -171,6 +174,7 @@
/* These aren't inline functions due to a GCC bug. */
#define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotcpu_notifier(nb) ({ (void)(nb); })
+static int cpu_hpe_on;
#endif /* CONFIG_HOTPLUG_CPU */

#ifdef CONFIG_PM_SLEEP_SMP
Index: linux-hpe4/Documentation/x86/x86_64/boot-options.txt
===================================================================
--- linux-hpe4.orig/Documentation/x86/x86_64/boot-options.txt 2010-11-26 12:49:44.847725099 +0800
+++ linux-hpe4/Documentation/x86/x86_64/boot-options.txt 2010-11-26 12:55:50.527724999 +0800
@@ -316,3 +316,9 @@
Do not use GB pages for kernel direct mappings.
gbpages
Use GB pages for kernel direct mappings.
+ cpu_hpe=on/off
+ Enable/disable CPU hotplug emulation with software method. When cpu_hpe=on,
+ sysfs provides probe/release interface to hot add/remove CPUs dynamically.
+ We can use maxcpus=<N> to reserve CPUs.
+ This option is disabled by default.
+

--
Thanks & Regards,
Shaohui


2010-12-08 21:36:45

by David Rientjes

[permalink] [raw]
Subject: Re: [5/7,v8] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Tue, 7 Dec 2010, [email protected] wrote:

> From: Shaohui Zheng <[email protected]>
>
> CPU physical hot-add/hot-remove are supported on some hardwares, and it
> was already supported in current linux kernel. NUMA Hotplug Emulator provides
> a mechanism to emulate the process with software method. It can be used for
> testing or debuging purpose.
>
> CPU physical hotplug is different with logical CPU online/offline. Logical
> online/offline is controled by interface /sys/device/cpu/cpuX/online. CPU
> hotplug emulator uses probe/release interface. It becomes possible to do cpu
> hotplug automation and stress
>
> Add cpu interface probe/release under sysfs for x86_64. User can use this
> interface to emulate the cpu hot-add and hot-remove process.
>
> Directive:
> *) Reserve CPU thru grub parameter like:
> maxcpus=4
>
> the rest CPUs will not be initiliazed.
>
> *) Probe CPU
> we can use the probe interface to hot-add new CPUs:
> echo nid > /sys/devices/system/cpu/probe
>
> *) Release a CPU
> echo cpu > /sys/devices/system/cpu/release
>
> A reserved CPU will be hot-added to the specified node.
> 1) nid == 0, the CPU will be added to the real node which the CPU
> should be in
> 2) nid != 0, add the CPU to node nid even through it is a fake node.
>

This patch is undoubtedly going to conflict with Tejun's unification of
the 32 and 64 bit NUMA boot paths, specifically the patch at
http://marc.info/?l=linux-kernel&m=129087151912379.

Tejun, what's the status of that patchset posted on November 27? Any
comments about this change?

> CC: Ingo Molnar <[email protected]>
> CC: Len Brown <[email protected]>
> CC: Yinghai Lu <[email protected]>
> Signed-off-by: Shaohui Zheng <[email protected]>
> Signed-off-by: Haicheng Li <[email protected]>
> ---
> Index: linux-hpe4/arch/x86/kernel/acpi/boot.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/kernel/acpi/boot.c 2010-11-26 09:24:40.287725018 +0800
> +++ linux-hpe4/arch/x86/kernel/acpi/boot.c 2010-11-26 09:24:53.277724996 +0800
> @@ -647,8 +647,44 @@
> }
> EXPORT_SYMBOL(acpi_map_lsapic);
>
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +static void acpi_map_cpu2node_emu(int cpu, int physid, int nid)
> +{
> +#ifdef CONFIG_ACPI_NUMA
> +#ifdef CONFIG_X86_64
> + apicid_to_node[physid] = nid;
> + numa_set_node(cpu, nid);
> +#else /* CONFIG_X86_32 */
> + apicid_2_node[physid] = nid;
> + cpu_to_node_map[cpu] = nid;
> +#endif
> +#endif
> +}
> +
> +static u16 cpu_to_apicid_saved[CONFIG_NR_CPUS];
> +int __ref acpi_map_lsapic_emu(int pcpu, int nid)
> +{
> + /* backup cpu apicid to array cpu_to_apicid_saved */
> + if (cpu_to_apicid_saved[pcpu] == 0 &&
> + per_cpu(x86_cpu_to_apicid, pcpu) != BAD_APICID)
> + cpu_to_apicid_saved[pcpu] = per_cpu(x86_cpu_to_apicid, pcpu);
> +
> + per_cpu(x86_cpu_to_apicid, pcpu) = cpu_to_apicid_saved[pcpu];
> + acpi_map_cpu2node_emu(pcpu, per_cpu(x86_cpu_to_apicid, pcpu), nid);
> +
> + return pcpu;
> +}
> +EXPORT_SYMBOL(acpi_map_lsapic_emu);
> +#endif
> +
> int acpi_unmap_lsapic(int cpu)
> {
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> + /* backup cpu apicid to array cpu_to_apicid_saved */
> + if (cpu_to_apicid_saved[cpu] == 0 &&
> + per_cpu(x86_cpu_to_apicid, cpu) != BAD_APICID)
> + cpu_to_apicid_saved[cpu] = per_cpu(x86_cpu_to_apicid, cpu);
> +#endif
> per_cpu(x86_cpu_to_apicid, cpu) = -1;
> set_cpu_present(cpu, false);
> num_processors--;
> Index: linux-hpe4/arch/x86/kernel/smpboot.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/kernel/smpboot.c 2010-11-26 09:24:40.297724969 +0800
> +++ linux-hpe4/arch/x86/kernel/smpboot.c 2010-11-26 12:48:58.977725001 +0800
> @@ -107,8 +107,6 @@
> mutex_unlock(&x86_cpu_hotplug_driver_mutex);
> }
>
> -ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
> -ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
> #else
> static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
> #define get_idle_for_cpu(x) (idle_thread_array[(x)])
> Index: linux-hpe4/arch/x86/kernel/topology.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/kernel/topology.c 2010-11-26 09:24:52.477725000 +0800
> +++ linux-hpe4/arch/x86/kernel/topology.c 2010-11-26 12:48:58.987725001 +0800
> @@ -30,6 +30,9 @@
> #include <linux/init.h>
> #include <linux/smp.h>
> #include <asm/cpu.h>
> +#include <linux/cpu.h>
> +#include <linux/topology.h>
> +#include <linux/acpi.h>
>
> static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
>
> @@ -66,6 +69,74 @@
> unregister_cpu(&per_cpu(cpu_devices, num).cpu);
> }
> EXPORT_SYMBOL(arch_unregister_cpu);
> +
> +ssize_t arch_cpu_probe(const char *buf, size_t count)
> +{
> + int nid = 0;
> + int num = 0, selected = 0;
> +
> + /* check parameters */
> + if (!buf || count < 2)
> + return -EPERM;
> +
> + nid = simple_strtoul(buf, NULL, 0);
> + printk(KERN_DEBUG "Add a cpu to node : %d\n", nid);
> +
> + if (nid < 0 || nid > nr_node_ids - 1) {
> + printk(KERN_ERR "Invalid NUMA node id: %d (0 <= nid < %d).\n",
> + nid, nr_node_ids);
> + return -EPERM;
> + }
> +
> + if (!node_online(nid)) {
> + printk(KERN_ERR "NUMA node %d is not online, give up.\n", nid);
> + return -EPERM;
> + }
> +
> + /* find first uninitialized cpu */
> + for_each_present_cpu(num) {
> + if (per_cpu(cpu_sys_devices, num) == NULL) {
> + selected = num;
> + break;
> + }
> + }
> +
> + if (selected >= num_possible_cpus()) {
> + printk(KERN_ERR "No free cpu, give up cpu probing.\n");
> + return -EPERM;
> + }
> +
> + /* register cpu */
> + arch_register_cpu_node(selected, nid);
> + acpi_map_lsapic_emu(selected, nid);
> +
> + return count;
> +}
> +EXPORT_SYMBOL(arch_cpu_probe);
> +
> +ssize_t arch_cpu_release(const char *buf, size_t count)
> +{
> + int cpu = 0;
> +
> + cpu = simple_strtoul(buf, NULL, 0);
> + /* cpu 0 is not hotplugable */
> + if (cpu == 0) {
> + printk(KERN_ERR "can not release cpu 0.\n");
> + return -EPERM;
> + }
> +
> + if (cpu_online(cpu)) {
> + printk(KERN_DEBUG "offline cpu %d.\n", cpu);
> + cpu_down(cpu);
> + }
> +
> + arch_unregister_cpu(cpu);
> + acpi_unmap_lsapic(cpu);
> +
> + return count;
> +}
> +EXPORT_SYMBOL(arch_cpu_release);
> +
> #else /* CONFIG_HOTPLUG_CPU */
>
> static int __init arch_register_cpu(int num)
> @@ -83,8 +154,14 @@
> register_one_node(i);
> #endif
>
> - for_each_present_cpu(i)
> - arch_register_cpu(i);
> + /*
> + * when cpu hotplug emulation enabled, register the online cpu only,
> + * the rests are reserved for cpu probe.
> + */
> + for_each_present_cpu(i) {
> + if ((cpu_hpe_on && cpu_online(i)) || !cpu_hpe_on)
> + arch_register_cpu(i);
> + }
>
> return 0;
> }
> Index: linux-hpe4/arch/x86/mm/numa_64.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/mm/numa_64.c 2010-11-26 09:24:40.317724965 +0800
> +++ linux-hpe4/arch/x86/mm/numa_64.c 2010-11-26 09:24:53.297725001 +0800
> @@ -12,6 +12,7 @@
> #include <linux/module.h>
> #include <linux/nodemask.h>
> #include <linux/sched.h>
> +#include <linux/cpu.h>
>
> #include <asm/e820.h>
> #include <asm/proto.h>
> @@ -785,6 +786,19 @@
> }
> #endif
>
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +static __init int cpu_hpe_setup(char *opt)
> +{
> + if (!opt)
> + return -EINVAL;
> +
> + if (!strncmp(opt, "on", 2) || !strncmp(opt, "1", 1))
> + cpu_hpe_on = 1;
> +
> + return 0;
> +}
> +early_param("cpu_hpe", cpu_hpe_setup);
> +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
>
> void __cpuinit numa_set_node(int cpu, int node)
> {
> Index: linux-hpe4/drivers/acpi/processor_driver.c
> ===================================================================
> --- linux-hpe4.orig/drivers/acpi/processor_driver.c 2010-11-26 09:24:40.327725004 +0800
> +++ linux-hpe4/drivers/acpi/processor_driver.c 2010-11-26 09:24:53.297725001 +0800
> @@ -530,6 +530,14 @@
> goto err_free_cpumask;
>
> sysdev = get_cpu_sysdev(pr->id);
> + /*
> + * Reserve cpu for hotplug emulation, the reserved cpu can be hot-added
> + * throu the cpu probe interface. Return directly.
> + */
> + if (sysdev == NULL) {
> + goto out;
> + }
> +
> if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
> result = -EFAULT;
> goto err_remove_fs;
> @@ -570,6 +578,7 @@
> goto err_remove_sysfs;
> }
>
> +out:
> return 0;
>
> err_remove_sysfs:
> Index: linux-hpe4/drivers/base/cpu.c
> ===================================================================
> --- linux-hpe4.orig/drivers/base/cpu.c 2010-11-26 09:24:52.477725000 +0800
> +++ linux-hpe4/drivers/base/cpu.c 2010-11-26 09:24:53.297725001 +0800
> @@ -22,9 +22,15 @@
> };
> EXPORT_SYMBOL(cpu_sysdev_class);
>
> -static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
> +DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
>
> #ifdef CONFIG_HOTPLUG_CPU
> +/*
> + * cpu_hpe_on is a switch to enable/disable cpu hotplug emulation. it is
> + * disabled in default, we can enable it throu grub parameter cpu_hpe=on
> + */
> +int cpu_hpe_on;
> +
> static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
> char *buf)
> {
> Index: linux-hpe4/include/linux/acpi.h
> ===================================================================
> --- linux-hpe4.orig/include/linux/acpi.h 2010-11-26 09:24:40.347725041 +0800
> +++ linux-hpe4/include/linux/acpi.h 2010-11-26 09:24:53.297725001 +0800
> @@ -102,6 +102,7 @@
> #ifdef CONFIG_ACPI_HOTPLUG_CPU
> /* Arch dependent functions for cpu hotplug support */
> int acpi_map_lsapic(acpi_handle handle, int *pcpu);
> +int acpi_map_lsapic_emu(int pcpu, int nid);
> int acpi_unmap_lsapic(int cpu);
> #endif /* CONFIG_ACPI_HOTPLUG_CPU */
>
> Index: linux-hpe4/include/linux/cpu.h
> ===================================================================
> --- linux-hpe4.orig/include/linux/cpu.h 2010-11-26 09:24:52.477725000 +0800
> +++ linux-hpe4/include/linux/cpu.h 2010-11-26 09:24:53.297725001 +0800
> @@ -30,6 +30,8 @@
> struct sys_device sysdev;
> };
>
> +DECLARE_PER_CPU(struct sys_device *, cpu_sys_devices);
> +
> extern int register_cpu_node(struct cpu *cpu, int num, int nid);
>
> static inline int register_cpu(struct cpu *cpu, int num)
> @@ -149,6 +151,7 @@
> #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
> #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
> int cpu_down(unsigned int cpu);
> +extern int cpu_hpe_on;
>
> #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> extern void cpu_hotplug_driver_lock(void);
> @@ -171,6 +174,7 @@
> /* These aren't inline functions due to a GCC bug. */
> #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
> #define unregister_hotcpu_notifier(nb) ({ (void)(nb); })
> +static int cpu_hpe_on;
> #endif /* CONFIG_HOTPLUG_CPU */
>
> #ifdef CONFIG_PM_SLEEP_SMP
> Index: linux-hpe4/Documentation/x86/x86_64/boot-options.txt
> ===================================================================
> --- linux-hpe4.orig/Documentation/x86/x86_64/boot-options.txt 2010-11-26 12:49:44.847725099 +0800
> +++ linux-hpe4/Documentation/x86/x86_64/boot-options.txt 2010-11-26 12:55:50.527724999 +0800
> @@ -316,3 +316,9 @@
> Do not use GB pages for kernel direct mappings.
> gbpages
> Use GB pages for kernel direct mappings.
> + cpu_hpe=on/off
> + Enable/disable CPU hotplug emulation with software method. When cpu_hpe=on,
> + sysfs provides probe/release interface to hot add/remove CPUs dynamically.
> + We can use maxcpus=<N> to reserve CPUs.
> + This option is disabled by default.
> +
>
> --
> Thanks & Regards,
> Shaohui
>
>
>

2010-12-09 09:38:12

by Tejun Heo

[permalink] [raw]
Subject: Re: [5/7,v8] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

Hello,

On 12/08/2010 10:36 PM, David Rientjes wrote:
> On Tue, 7 Dec 2010, [email protected] wrote:
>
>> From: Shaohui Zheng <[email protected]>
>>
>> CPU physical hot-add/hot-remove are supported on some hardwares, and it
>> was already supported in current linux kernel. NUMA Hotplug Emulator provides
>> a mechanism to emulate the process with software method. It can be used for
>> testing or debuging purpose.
>>
>> CPU physical hotplug is different with logical CPU online/offline. Logical
>> online/offline is controled by interface /sys/device/cpu/cpuX/online. CPU
>> hotplug emulator uses probe/release interface. It becomes possible to do cpu
>> hotplug automation and stress
>>
>> Add cpu interface probe/release under sysfs for x86_64. User can use this
>> interface to emulate the cpu hot-add and hot-remove process.
>>
>> Directive:
>> *) Reserve CPU thru grub parameter like:
>> maxcpus=4
>>
>> the rest CPUs will not be initiliazed.
>>
>> *) Probe CPU
>> we can use the probe interface to hot-add new CPUs:
>> echo nid > /sys/devices/system/cpu/probe
>>
>> *) Release a CPU
>> echo cpu > /sys/devices/system/cpu/release
>>
>> A reserved CPU will be hot-added to the specified node.
>> 1) nid == 0, the CPU will be added to the real node which the CPU
>> should be in
>> 2) nid != 0, add the CPU to node nid even through it is a fake node.
>>
>
> This patch is undoubtedly going to conflict with Tejun's unification of
> the 32 and 64 bit NUMA boot paths, specifically the patch at
> http://marc.info/?l=linux-kernel&m=129087151912379.

Oh yeah, it definitely looks like it will collide with the unification
patch. The problem is more fundamental than the actual patch
collisions tho. During x86_32/64 merge, some parts were left unmerged
- some reflect actual differences between 32 and 64 but more were
probably because it was too much work.

These subtle diversions make the code unnecessarily complicated,
fragile and difficult to maintain, so, in general, I think we should
be heading toward unifying 32 and 64 unless the difference is caused
by actual hardware even when the feature or code might not be too
useful for 32bit.

So, the same thing holds for NUMA hotplug emulator. 32bit supports
NUMA and there already is 64bit only NUMA emulator. I think it would
be much better if we take this chance to unify 32 and 64bit code paths
on this area rather than going further toward the wrong direction.

> Tejun, what's the status of that patchset posted on November 27? Any
> comments about this change?

I don't know. I pinged Ingo yesterday. Ingo?

Thanks.

--
tejun

2010-12-10 01:35:26

by Zheng, Shaohui

[permalink] [raw]
Subject: RE: [5/7,v8] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

Both Tejun's and my patches are under review process, the hotplug emulator patchset is much earlier than Tejun's patch. Currently, I did not know how to handle this situation.

It seems that I have 3 options:
1) continue to send this patchset based on current upstream kernel
2) continue to send this patchset based on upstream kernel + Tejun's patch
3) Postpone the patchset until Tejun's patches are accepted.

Can someone provide some suggestions? Thanks so much.

Thanks & Regards,
Shaohui


-----Original Message-----
From: David Rientjes [mailto:[email protected]]
Sent: Thursday, December 09, 2010 5:37 AM
To: Zheng, Shaohui; Tejun Heo
Cc: Andrew Morton; [email protected]; [email protected]; [email protected]; [email protected]; Andi Kleen; [email protected]; Greg Kroah-Hartman; Ingo Molnar; Brown, Len; Yinghai Lu; Li, Haicheng
Subject: Re: [5/7,v8] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Tue, 7 Dec 2010, [email protected] wrote:

> From: Shaohui Zheng <[email protected]>
>

This patch is undoubtedly going to conflict with Tejun's unification of
the 32 and 64 bit NUMA boot paths, specifically the patch at
http://marc.info/?l=linux-kernel&m=129087151912379.

Tejun, what's the status of that patchset posted on November 27? Any
comments about this change?

2010-12-10 08:01:46

by Zheng, Shaohui

[permalink] [raw]
Subject: RE: [5/7,v8] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

The unification numa code of 32 and 64 bit make the codes much simpler to maintain. It is good direction.

I already rework this patch based on your unification numa code, add I add you in the CC list in my patch.

Thanks & Regards,
Shaohui


-----Original Message-----
From: Tejun Heo [mailto:[email protected]]
Sent: Thursday, December 09, 2010 5:37 PM
To: David Rientjes
Cc: Zheng, Shaohui; Andrew Morton; [email protected]; [email protected]; [email protected]; [email protected]; Andi Kleen; [email protected]; Greg Kroah-Hartman; Ingo Molnar; Brown, Len; Yinghai Lu; Li, Haicheng
Subject: Re: [5/7,v8] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

Hello,

On 12/08/2010 10:36 PM, David Rientjes wrote:
> On Tue, 7 Dec 2010, [email protected] wrote:
>
>> From: Shaohui Zheng <[email protected]>
>>
>> CPU physical hot-add/hot-remove are supported on some hardwares, and it
>> was already supported in current linux kernel. NUMA Hotplug Emulator provides
>> a mechanism to emulate the process with software method. It can be used for
>> testing or debuging purpose.
>>
>> CPU physical hotplug is different with logical CPU online/offline. Logical
>> online/offline is controled by interface /sys/device/cpu/cpuX/online. CPU
>> hotplug emulator uses probe/release interface. It becomes possible to do cpu
>> hotplug automation and stress
>>
>> Add cpu interface probe/release under sysfs for x86_64. User can use this
>> interface to emulate the cpu hot-add and hot-remove process.
>>
>> Directive:
>> *) Reserve CPU thru grub parameter like:
>> maxcpus=4
>>
>> the rest CPUs will not be initiliazed.
>>
>> *) Probe CPU
>> we can use the probe interface to hot-add new CPUs:
>> echo nid > /sys/devices/system/cpu/probe
>>
>> *) Release a CPU
>> echo cpu > /sys/devices/system/cpu/release
>>
>> A reserved CPU will be hot-added to the specified node.
>> 1) nid == 0, the CPU will be added to the real node which the CPU
>> should be in
>> 2) nid != 0, add the CPU to node nid even through it is a fake node.
>>
>
> This patch is undoubtedly going to conflict with Tejun's unification of
> the 32 and 64 bit NUMA boot paths, specifically the patch at
> http://marc.info/?l=linux-kernel&m=129087151912379.

Oh yeah, it definitely looks like it will collide with the unification
patch. The problem is more fundamental than the actual patch
collisions tho. During x86_32/64 merge, some parts were left unmerged
- some reflect actual differences between 32 and 64 but more were
probably because it was too much work.

These subtle diversions make the code unnecessarily complicated,
fragile and difficult to maintain, so, in general, I think we should
be heading toward unifying 32 and 64 unless the difference is caused
by actual hardware even when the feature or code might not be too
useful for 32bit.

So, the same thing holds for NUMA hotplug emulator. 32bit supports
NUMA and there already is 64bit only NUMA emulator. I think it would
be much better if we take this chance to unify 32 and 64bit code paths
on this area rather than going further toward the wrong direction.

> Tejun, what's the status of that patchset posted on November 27? Any
> comments about this change?

I don't know. I pinged Ingo yesterday. Ingo?

Thanks.

--
tejun