2010-12-10 09:02:27

by Zheng, Shaohui

[permalink] [raw]
Subject: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

From: Shaohui Zheng <[email protected]>

CPU physical hot-add/hot-remove are supported on some hardwares, and it
was already supported in current linux kernel. NUMA Hotplug Emulator provides
a mechanism to emulate the process with software method. It can be used for
testing or debuging purpose.

CPU physical hotplug is different with logical CPU online/offline. Logical
online/offline is controled by interface /sys/device/cpu/cpuX/online. CPU
hotplug emulator uses probe/release interface. It becomes possible to do cpu
hotplug automation and stress

Add cpu interface probe/release under sysfs for x86_64. User can use this
interface to emulate the cpu hot-add and hot-remove process.

Directive:
*) Reserve CPU thru grub parameter like:
maxcpus=4

the rest CPUs will not be initiliazed.

*) Probe CPU
we can use the probe interface to hot-add new CPUs:
echo nid > /sys/devices/system/cpu/probe

*) Release a CPU
echo cpu > /sys/devices/system/cpu/release

A reserved CPU will be hot-added to the specified node.
1) nid == 0, the CPU will be added to the real node which the CPU
should be in
2) nid != 0, add the CPU to node nid even through it is a fake node.

CC: Ingo Molnar <[email protected]>
CC: Len Brown <[email protected]>
CC: Yinghai Lu <[email protected]>
CC: Tejun Heo <[email protected]>
Signed-off-by: Shaohui Zheng <[email protected]>
Signed-off-by: Haicheng Li <[email protected]>
---
This patch is based on Tejun's unification of the 32 and 64 bit NUMA boot paths,
specifically the patch at http://marc.info/?l=linux-kernel&m=129087151912379.
Index: linux-hpe4/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/acpi/boot.c 2010-12-10 13:42:34.553331000 +0800
+++ linux-hpe4/arch/x86/kernel/acpi/boot.c 2010-12-10 14:48:32.113331001 +0800
@@ -668,8 +668,39 @@
}
EXPORT_SYMBOL(acpi_map_lsapic);

+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static void acpi_map_cpu2node_emu(int cpu, int physid, int nid)
+{
+#ifdef CONFIG_ACPI_NUMA
+ set_apicid_to_node(physid, nid);
+ numa_set_node(cpu, nid);
+#endif
+}
+
+static u16 cpu_to_apicid_saved[CONFIG_NR_CPUS];
+int __ref acpi_map_lsapic_emu(int pcpu, int nid)
+{
+ /* backup cpu apicid to array cpu_to_apicid_saved */
+ if (cpu_to_apicid_saved[pcpu] == 0 &&
+ per_cpu(x86_cpu_to_apicid, pcpu) != BAD_APICID)
+ cpu_to_apicid_saved[pcpu] = per_cpu(x86_cpu_to_apicid, pcpu);
+
+ per_cpu(x86_cpu_to_apicid, pcpu) = cpu_to_apicid_saved[pcpu];
+ acpi_map_cpu2node_emu(pcpu, per_cpu(x86_cpu_to_apicid, pcpu), nid);
+
+ return pcpu;
+}
+EXPORT_SYMBOL(acpi_map_lsapic_emu);
+#endif
+
int acpi_unmap_lsapic(int cpu)
{
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ /* backup cpu apicid to array cpu_to_apicid_saved */
+ if (cpu_to_apicid_saved[cpu] == 0 &&
+ per_cpu(x86_cpu_to_apicid, cpu) != BAD_APICID)
+ cpu_to_apicid_saved[cpu] = per_cpu(x86_cpu_to_apicid, cpu);
+#endif
per_cpu(x86_cpu_to_apicid, cpu) = -1;
set_cpu_present(cpu, false);
num_processors--;
Index: linux-hpe4/arch/x86/kernel/smpboot.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/smpboot.c 2010-12-10 13:42:34.563331000 +0800
+++ linux-hpe4/arch/x86/kernel/smpboot.c 2010-12-10 14:48:32.113331001 +0800
@@ -103,8 +103,6 @@
mutex_unlock(&x86_cpu_hotplug_driver_mutex);
}

-ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
-ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
#else
static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
Index: linux-hpe4/arch/x86/kernel/topology.c
===================================================================
--- linux-hpe4.orig/arch/x86/kernel/topology.c 2010-12-10 14:39:43.333331000 +0800
+++ linux-hpe4/arch/x86/kernel/topology.c 2010-12-10 14:49:56.043331000 +0800
@@ -30,6 +30,9 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/cpu.h>
+#include <linux/cpu.h>
+#include <linux/topology.h>
+#include <linux/acpi.h>

static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);

@@ -66,6 +69,78 @@
unregister_cpu(&per_cpu(cpu_devices, num).cpu);
}
EXPORT_SYMBOL(arch_unregister_cpu);
+
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+ int nid = 0;
+ int num = 0, selected = 0;
+
+ /* check parameters */
+ if (!buf || count < 2)
+ return -EPERM;
+
+ nid = simple_strtoul(buf, NULL, 0);
+ printk(KERN_DEBUG "Add a cpu to node : %d\n", nid);
+
+ if (nid < 0 || nid > nr_node_ids - 1) {
+ printk(KERN_ERR "Invalid NUMA node id: %d (0 <= nid < %d).\n",
+ nid, nr_node_ids);
+ return -EPERM;
+ }
+
+ if (!node_online(nid)) {
+ printk(KERN_ERR "NUMA node %d is not online, give up.\n", nid);
+ return -EPERM;
+ }
+
+ /* find first uninitialized cpu */
+ for_each_present_cpu(num) {
+ if (per_cpu(cpu_sys_devices, num) == NULL) {
+ selected = num;
+ break;
+ }
+ }
+
+ if (selected >= num_possible_cpus()) {
+ printk(KERN_ERR "No free cpu, give up cpu probing.\n");
+ return -EPERM;
+ }
+
+ /* register cpu */
+ arch_register_cpu_node(selected, nid);
+ acpi_map_lsapic_emu(selected, nid);
+
+ return count;
+}
+EXPORT_SYMBOL(arch_cpu_probe);
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+ int cpu = 0;
+
+ cpu = simple_strtoul(buf, NULL, 0);
+ /* cpu 0 is not hotplugable */
+ if (cpu == 0) {
+ printk(KERN_ERR "can not release cpu 0.\n");
+ return -EPERM;
+ }
+
+ if (cpu_online(cpu)) {
+ printk(KERN_DEBUG "offline cpu %d.\n", cpu);
+ if (!cpu_down(cpu)) {
+ printk(KERN_ERR "fail to offline cpu %d, give up.\n", cpu);
+ return -EPERM;
+ }
+
+ }
+
+ arch_unregister_cpu(cpu);
+ acpi_unmap_lsapic(cpu);
+
+ return count;
+}
+EXPORT_SYMBOL(arch_cpu_release);
+
#else /* CONFIG_HOTPLUG_CPU */

static int __init arch_register_cpu(int num)
@@ -83,8 +158,14 @@
register_one_node(i);
#endif

- for_each_present_cpu(i)
- arch_register_cpu(i);
+ /*
+ * when cpu hotplug emulation enabled, register the online cpu only,
+ * the rests are reserved for cpu probe.
+ */
+ for_each_present_cpu(i) {
+ if ((cpu_hpe_on && cpu_online(i)) || !cpu_hpe_on)
+ arch_register_cpu(i);
+ }

return 0;
}
Index: linux-hpe4/arch/x86/mm/numa_64.c
===================================================================
--- linux-hpe4.orig/arch/x86/mm/numa_64.c 2010-12-10 14:39:37.153331000 +0800
+++ linux-hpe4/arch/x86/mm/numa_64.c 2010-12-10 14:48:32.123331001 +0800
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/nodemask.h>
#include <linux/sched.h>
+#include <linux/cpu.h>

#include <asm/e820.h>
#include <asm/proto.h>
@@ -667,3 +668,17 @@
return __apicid_to_node[apicid];
return NUMA_NO_NODE;
}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static __init int cpu_hpe_setup(char *opt)
+{
+ if (!opt)
+ return -EINVAL;
+
+ if (!strncmp(opt, "on", 2) || !strncmp(opt, "1", 1))
+ cpu_hpe_on = 1;
+
+ return 0;
+}
+early_param("cpu_hpe", cpu_hpe_setup);
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
Index: linux-hpe4/drivers/acpi/processor_driver.c
===================================================================
--- linux-hpe4.orig/drivers/acpi/processor_driver.c 2010-12-10 13:42:34.593331000 +0800
+++ linux-hpe4/drivers/acpi/processor_driver.c 2010-12-10 14:48:32.143331001 +0800
@@ -542,6 +542,14 @@
goto err_free_cpumask;

sysdev = get_cpu_sysdev(pr->id);
+ /*
+ * Reserve cpu for hotplug emulation, the reserved cpu can be hot-added
+ * throu the cpu probe interface. Return directly.
+ */
+ if (sysdev == NULL) {
+ goto out;
+ }
+
if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
result = -EFAULT;
goto err_remove_fs;
@@ -582,6 +590,7 @@
goto err_remove_sysfs;
}

+out:
return 0;

err_remove_sysfs:
Index: linux-hpe4/drivers/base/cpu.c
===================================================================
--- linux-hpe4.orig/drivers/base/cpu.c 2010-12-10 14:39:43.333331000 +0800
+++ linux-hpe4/drivers/base/cpu.c 2010-12-10 14:48:32.143331001 +0800
@@ -22,9 +22,15 @@
};
EXPORT_SYMBOL(cpu_sysdev_class);

-static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
+DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);

#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * cpu_hpe_on is a switch to enable/disable cpu hotplug emulation. it is
+ * disabled in default, we can enable it throu grub parameter cpu_hpe=on
+ */
+int cpu_hpe_on;
+
static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
char *buf)
{
Index: linux-hpe4/include/linux/acpi.h
===================================================================
--- linux-hpe4.orig/include/linux/acpi.h 2010-12-10 13:42:34.613331000 +0800
+++ linux-hpe4/include/linux/acpi.h 2010-12-10 14:48:32.153331001 +0800
@@ -102,6 +102,7 @@
#ifdef CONFIG_ACPI_HOTPLUG_CPU
/* Arch dependent functions for cpu hotplug support */
int acpi_map_lsapic(acpi_handle handle, int *pcpu);
+int acpi_map_lsapic_emu(int pcpu, int nid);
int acpi_unmap_lsapic(int cpu);
#endif /* CONFIG_ACPI_HOTPLUG_CPU */

Index: linux-hpe4/include/linux/cpu.h
===================================================================
--- linux-hpe4.orig/include/linux/cpu.h 2010-12-10 14:39:43.333331000 +0800
+++ linux-hpe4/include/linux/cpu.h 2010-12-10 14:48:32.153331001 +0800
@@ -25,6 +25,8 @@
struct sys_device sysdev;
};

+DECLARE_PER_CPU(struct sys_device *, cpu_sys_devices);
+
extern int register_cpu_node(struct cpu *cpu, int num, int nid);

static inline int register_cpu(struct cpu *cpu, int num)
@@ -144,6 +146,7 @@
#define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
int cpu_down(unsigned int cpu);
+extern int cpu_hpe_on;

#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
extern void cpu_hotplug_driver_lock(void);
@@ -166,6 +169,7 @@
/* These aren't inline functions due to a GCC bug. */
#define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotcpu_notifier(nb) ({ (void)(nb); })
+static int cpu_hpe_on;
#endif /* CONFIG_HOTPLUG_CPU */

#ifdef CONFIG_PM_SLEEP_SMP
Index: linux-hpe4/Documentation/x86/x86_64/boot-options.txt
===================================================================
--- linux-hpe4.orig/Documentation/x86/x86_64/boot-options.txt 2010-12-10 14:39:37.153331000 +0800
+++ linux-hpe4/Documentation/x86/x86_64/boot-options.txt 2010-12-10 14:48:32.153331001 +0800
@@ -320,3 +320,8 @@
Do not use GB pages for kernel direct mappings.
gbpages
Use GB pages for kernel direct mappings.
+ cpu_hpe=on/off
+ Enable/disable CPU hotplug emulation with software method. When cpu_hpe=on,
+ sysfs provides probe/release interface to hot add/remove CPUs dynamically.
+ We can use maxcpus=<N> to reserve CPUs.
+ This option is disabled by default.

--
Thanks & Regards,
Shaohui


2010-12-16 16:26:22

by Eric B Munson

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

Shaohui,

What kernel is this series based on? I cannot get it to build when applied
to mainline. I seem to be missing a definition for set_apicid_to_node.

Eric

On Fri, 10 Dec 2010, [email protected] wrote:

> From: Shaohui Zheng <[email protected]>
>
> CPU physical hot-add/hot-remove are supported on some hardwares, and it
> was already supported in current linux kernel. NUMA Hotplug Emulator provides
> a mechanism to emulate the process with software method. It can be used for
> testing or debuging purpose.
>
> CPU physical hotplug is different with logical CPU online/offline. Logical
> online/offline is controled by interface /sys/device/cpu/cpuX/online. CPU
> hotplug emulator uses probe/release interface. It becomes possible to do cpu
> hotplug automation and stress
>
> Add cpu interface probe/release under sysfs for x86_64. User can use this
> interface to emulate the cpu hot-add and hot-remove process.
>
> Directive:
> *) Reserve CPU thru grub parameter like:
> maxcpus=4
>
> the rest CPUs will not be initiliazed.
>
> *) Probe CPU
> we can use the probe interface to hot-add new CPUs:
> echo nid > /sys/devices/system/cpu/probe
>
> *) Release a CPU
> echo cpu > /sys/devices/system/cpu/release
>
> A reserved CPU will be hot-added to the specified node.
> 1) nid == 0, the CPU will be added to the real node which the CPU
> should be in
> 2) nid != 0, add the CPU to node nid even through it is a fake node.
>
> CC: Ingo Molnar <[email protected]>
> CC: Len Brown <[email protected]>
> CC: Yinghai Lu <[email protected]>
> CC: Tejun Heo <[email protected]>
> Signed-off-by: Shaohui Zheng <[email protected]>
> Signed-off-by: Haicheng Li <[email protected]>
> ---
> This patch is based on Tejun's unification of the 32 and 64 bit NUMA boot paths,
> specifically the patch at http://marc.info/?l=linux-kernel&m=129087151912379.
> Index: linux-hpe4/arch/x86/kernel/acpi/boot.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/kernel/acpi/boot.c 2010-12-10 13:42:34.553331000 +0800
> +++ linux-hpe4/arch/x86/kernel/acpi/boot.c 2010-12-10 14:48:32.113331001 +0800
> @@ -668,8 +668,39 @@
> }
> EXPORT_SYMBOL(acpi_map_lsapic);
>
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +static void acpi_map_cpu2node_emu(int cpu, int physid, int nid)
> +{
> +#ifdef CONFIG_ACPI_NUMA
> + set_apicid_to_node(physid, nid);
> + numa_set_node(cpu, nid);
> +#endif
> +}
> +
> +static u16 cpu_to_apicid_saved[CONFIG_NR_CPUS];
> +int __ref acpi_map_lsapic_emu(int pcpu, int nid)
> +{
> + /* backup cpu apicid to array cpu_to_apicid_saved */
> + if (cpu_to_apicid_saved[pcpu] == 0 &&
> + per_cpu(x86_cpu_to_apicid, pcpu) != BAD_APICID)
> + cpu_to_apicid_saved[pcpu] = per_cpu(x86_cpu_to_apicid, pcpu);
> +
> + per_cpu(x86_cpu_to_apicid, pcpu) = cpu_to_apicid_saved[pcpu];
> + acpi_map_cpu2node_emu(pcpu, per_cpu(x86_cpu_to_apicid, pcpu), nid);
> +
> + return pcpu;
> +}
> +EXPORT_SYMBOL(acpi_map_lsapic_emu);
> +#endif
> +
> int acpi_unmap_lsapic(int cpu)
> {
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> + /* backup cpu apicid to array cpu_to_apicid_saved */
> + if (cpu_to_apicid_saved[cpu] == 0 &&
> + per_cpu(x86_cpu_to_apicid, cpu) != BAD_APICID)
> + cpu_to_apicid_saved[cpu] = per_cpu(x86_cpu_to_apicid, cpu);
> +#endif
> per_cpu(x86_cpu_to_apicid, cpu) = -1;
> set_cpu_present(cpu, false);
> num_processors--;
> Index: linux-hpe4/arch/x86/kernel/smpboot.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/kernel/smpboot.c 2010-12-10 13:42:34.563331000 +0800
> +++ linux-hpe4/arch/x86/kernel/smpboot.c 2010-12-10 14:48:32.113331001 +0800
> @@ -103,8 +103,6 @@
> mutex_unlock(&x86_cpu_hotplug_driver_mutex);
> }
>
> -ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
> -ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
> #else
> static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
> #define get_idle_for_cpu(x) (idle_thread_array[(x)])
> Index: linux-hpe4/arch/x86/kernel/topology.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/kernel/topology.c 2010-12-10 14:39:43.333331000 +0800
> +++ linux-hpe4/arch/x86/kernel/topology.c 2010-12-10 14:49:56.043331000 +0800
> @@ -30,6 +30,9 @@
> #include <linux/init.h>
> #include <linux/smp.h>
> #include <asm/cpu.h>
> +#include <linux/cpu.h>
> +#include <linux/topology.h>
> +#include <linux/acpi.h>
>
> static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
>
> @@ -66,6 +69,78 @@
> unregister_cpu(&per_cpu(cpu_devices, num).cpu);
> }
> EXPORT_SYMBOL(arch_unregister_cpu);
> +
> +ssize_t arch_cpu_probe(const char *buf, size_t count)
> +{
> + int nid = 0;
> + int num = 0, selected = 0;
> +
> + /* check parameters */
> + if (!buf || count < 2)
> + return -EPERM;
> +
> + nid = simple_strtoul(buf, NULL, 0);
> + printk(KERN_DEBUG "Add a cpu to node : %d\n", nid);
> +
> + if (nid < 0 || nid > nr_node_ids - 1) {
> + printk(KERN_ERR "Invalid NUMA node id: %d (0 <= nid < %d).\n",
> + nid, nr_node_ids);
> + return -EPERM;
> + }
> +
> + if (!node_online(nid)) {
> + printk(KERN_ERR "NUMA node %d is not online, give up.\n", nid);
> + return -EPERM;
> + }
> +
> + /* find first uninitialized cpu */
> + for_each_present_cpu(num) {
> + if (per_cpu(cpu_sys_devices, num) == NULL) {
> + selected = num;
> + break;
> + }
> + }
> +
> + if (selected >= num_possible_cpus()) {
> + printk(KERN_ERR "No free cpu, give up cpu probing.\n");
> + return -EPERM;
> + }
> +
> + /* register cpu */
> + arch_register_cpu_node(selected, nid);
> + acpi_map_lsapic_emu(selected, nid);
> +
> + return count;
> +}
> +EXPORT_SYMBOL(arch_cpu_probe);
> +
> +ssize_t arch_cpu_release(const char *buf, size_t count)
> +{
> + int cpu = 0;
> +
> + cpu = simple_strtoul(buf, NULL, 0);
> + /* cpu 0 is not hotplugable */
> + if (cpu == 0) {
> + printk(KERN_ERR "can not release cpu 0.\n");
> + return -EPERM;
> + }
> +
> + if (cpu_online(cpu)) {
> + printk(KERN_DEBUG "offline cpu %d.\n", cpu);
> + if (!cpu_down(cpu)) {
> + printk(KERN_ERR "fail to offline cpu %d, give up.\n", cpu);
> + return -EPERM;
> + }
> +
> + }
> +
> + arch_unregister_cpu(cpu);
> + acpi_unmap_lsapic(cpu);
> +
> + return count;
> +}
> +EXPORT_SYMBOL(arch_cpu_release);
> +
> #else /* CONFIG_HOTPLUG_CPU */
>
> static int __init arch_register_cpu(int num)
> @@ -83,8 +158,14 @@
> register_one_node(i);
> #endif
>
> - for_each_present_cpu(i)
> - arch_register_cpu(i);
> + /*
> + * when cpu hotplug emulation enabled, register the online cpu only,
> + * the rests are reserved for cpu probe.
> + */
> + for_each_present_cpu(i) {
> + if ((cpu_hpe_on && cpu_online(i)) || !cpu_hpe_on)
> + arch_register_cpu(i);
> + }
>
> return 0;
> }
> Index: linux-hpe4/arch/x86/mm/numa_64.c
> ===================================================================
> --- linux-hpe4.orig/arch/x86/mm/numa_64.c 2010-12-10 14:39:37.153331000 +0800
> +++ linux-hpe4/arch/x86/mm/numa_64.c 2010-12-10 14:48:32.123331001 +0800
> @@ -13,6 +13,7 @@
> #include <linux/module.h>
> #include <linux/nodemask.h>
> #include <linux/sched.h>
> +#include <linux/cpu.h>
>
> #include <asm/e820.h>
> #include <asm/proto.h>
> @@ -667,3 +668,17 @@
> return __apicid_to_node[apicid];
> return NUMA_NO_NODE;
> }
> +
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +static __init int cpu_hpe_setup(char *opt)
> +{
> + if (!opt)
> + return -EINVAL;
> +
> + if (!strncmp(opt, "on", 2) || !strncmp(opt, "1", 1))
> + cpu_hpe_on = 1;
> +
> + return 0;
> +}
> +early_param("cpu_hpe", cpu_hpe_setup);
> +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
> Index: linux-hpe4/drivers/acpi/processor_driver.c
> ===================================================================
> --- linux-hpe4.orig/drivers/acpi/processor_driver.c 2010-12-10 13:42:34.593331000 +0800
> +++ linux-hpe4/drivers/acpi/processor_driver.c 2010-12-10 14:48:32.143331001 +0800
> @@ -542,6 +542,14 @@
> goto err_free_cpumask;
>
> sysdev = get_cpu_sysdev(pr->id);
> + /*
> + * Reserve cpu for hotplug emulation, the reserved cpu can be hot-added
> + * throu the cpu probe interface. Return directly.
> + */
> + if (sysdev == NULL) {
> + goto out;
> + }
> +
> if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
> result = -EFAULT;
> goto err_remove_fs;
> @@ -582,6 +590,7 @@
> goto err_remove_sysfs;
> }
>
> +out:
> return 0;
>
> err_remove_sysfs:
> Index: linux-hpe4/drivers/base/cpu.c
> ===================================================================
> --- linux-hpe4.orig/drivers/base/cpu.c 2010-12-10 14:39:43.333331000 +0800
> +++ linux-hpe4/drivers/base/cpu.c 2010-12-10 14:48:32.143331001 +0800
> @@ -22,9 +22,15 @@
> };
> EXPORT_SYMBOL(cpu_sysdev_class);
>
> -static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
> +DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
>
> #ifdef CONFIG_HOTPLUG_CPU
> +/*
> + * cpu_hpe_on is a switch to enable/disable cpu hotplug emulation. it is
> + * disabled in default, we can enable it throu grub parameter cpu_hpe=on
> + */
> +int cpu_hpe_on;
> +
> static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
> char *buf)
> {
> Index: linux-hpe4/include/linux/acpi.h
> ===================================================================
> --- linux-hpe4.orig/include/linux/acpi.h 2010-12-10 13:42:34.613331000 +0800
> +++ linux-hpe4/include/linux/acpi.h 2010-12-10 14:48:32.153331001 +0800
> @@ -102,6 +102,7 @@
> #ifdef CONFIG_ACPI_HOTPLUG_CPU
> /* Arch dependent functions for cpu hotplug support */
> int acpi_map_lsapic(acpi_handle handle, int *pcpu);
> +int acpi_map_lsapic_emu(int pcpu, int nid);
> int acpi_unmap_lsapic(int cpu);
> #endif /* CONFIG_ACPI_HOTPLUG_CPU */
>
> Index: linux-hpe4/include/linux/cpu.h
> ===================================================================
> --- linux-hpe4.orig/include/linux/cpu.h 2010-12-10 14:39:43.333331000 +0800
> +++ linux-hpe4/include/linux/cpu.h 2010-12-10 14:48:32.153331001 +0800
> @@ -25,6 +25,8 @@
> struct sys_device sysdev;
> };
>
> +DECLARE_PER_CPU(struct sys_device *, cpu_sys_devices);
> +
> extern int register_cpu_node(struct cpu *cpu, int num, int nid);
>
> static inline int register_cpu(struct cpu *cpu, int num)
> @@ -144,6 +146,7 @@
> #define register_hotcpu_notifier(nb) register_cpu_notifier(nb)
> #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
> int cpu_down(unsigned int cpu);
> +extern int cpu_hpe_on;
>
> #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> extern void cpu_hotplug_driver_lock(void);
> @@ -166,6 +169,7 @@
> /* These aren't inline functions due to a GCC bug. */
> #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; })
> #define unregister_hotcpu_notifier(nb) ({ (void)(nb); })
> +static int cpu_hpe_on;
> #endif /* CONFIG_HOTPLUG_CPU */
>
> #ifdef CONFIG_PM_SLEEP_SMP
> Index: linux-hpe4/Documentation/x86/x86_64/boot-options.txt
> ===================================================================
> --- linux-hpe4.orig/Documentation/x86/x86_64/boot-options.txt 2010-12-10 14:39:37.153331000 +0800
> +++ linux-hpe4/Documentation/x86/x86_64/boot-options.txt 2010-12-10 14:48:32.153331001 +0800
> @@ -320,3 +320,8 @@
> Do not use GB pages for kernel direct mappings.
> gbpages
> Use GB pages for kernel direct mappings.
> + cpu_hpe=on/off
> + Enable/disable CPU hotplug emulation with software method. When cpu_hpe=on,
> + sysfs provides probe/release interface to hot add/remove CPUs dynamically.
> + We can use maxcpus=<N> to reserve CPUs.
> + This option is disabled by default.
>
> --
> Thanks & Regards,
> Shaohui
>
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
>


Attachments:
(No filename) (11.79 kB)
signature.asc (490.00 B)
Digital signature
Download all attachments

2010-12-17 00:58:32

by Shaohui Zheng

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Thu, Dec 16, 2010 at 09:25:41AM -0700, Eric B Munson wrote:
> Shaohui,
>
> What kernel is this series based on? I cannot get it to build when applied
> to mainline. I seem to be missing a definition for set_apicid_to_node.
>
> Eric
>

Eric,
These is a code conflict with Tejun's NUNA unification code, and Tejun's code is still under
review. This patchset solves the code conflict, the v9 emulator is based on his patches, and we
need to wait until his patches was accepted.

Tejun's patch: http://marc.info/?l=linux-kernel&m=129087151912379.

If you are doing some testing, you can try to use v8 emulator.

--
Thanks & Regards,
Shaohui

2010-12-23 00:30:26

by Andrew Morton

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Fri, 10 Dec 2010 15:31:24 +0800
[email protected] wrote:

> From: Shaohui Zheng <[email protected]>
>
> CPU physical hot-add/hot-remove are supported on some hardwares, and it
> was already supported in current linux kernel. NUMA Hotplug Emulator provides
> a mechanism to emulate the process with software method. It can be used for
> testing or debuging purpose.
>
> CPU physical hotplug is different with logical CPU online/offline. Logical
> online/offline is controled by interface /sys/device/cpu/cpuX/online. CPU
> hotplug emulator uses probe/release interface. It becomes possible to do cpu
> hotplug automation and stress
>
> Add cpu interface probe/release under sysfs for x86_64. User can use this
> interface to emulate the cpu hot-add and hot-remove process.
>
> Directive:
> *) Reserve CPU thru grub parameter like:
> maxcpus=4
>
> the rest CPUs will not be initiliazed.
>
> *) Probe CPU
> we can use the probe interface to hot-add new CPUs:
> echo nid > /sys/devices/system/cpu/probe
>
> *) Release a CPU
> echo cpu > /sys/devices/system/cpu/release
>
> A reserved CPU will be hot-added to the specified node.
> 1) nid == 0, the CPU will be added to the real node which the CPU
> should be in
> 2) nid != 0, add the CPU to node nid even through it is a fake node.
>
>
> ...
>
> --- linux-hpe4.orig/arch/x86/kernel/topology.c 2010-12-10 14:39:43.333331000 +0800
> +++ linux-hpe4/arch/x86/kernel/topology.c 2010-12-10 14:49:56.043331000 +0800
> @@ -30,6 +30,9 @@
> #include <linux/init.h>
> #include <linux/smp.h>
> #include <asm/cpu.h>
> +#include <linux/cpu.h>
> +#include <linux/topology.h>
> +#include <linux/acpi.h>
>
> static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
>
> @@ -66,6 +69,78 @@
> unregister_cpu(&per_cpu(cpu_devices, num).cpu);
> }
> EXPORT_SYMBOL(arch_unregister_cpu);
> +
> +ssize_t arch_cpu_probe(const char *buf, size_t count)
> +{
> + int nid = 0;
> + int num = 0, selected = 0;

One definition per line make for more maintainable code.

Two of these initialisations are unnecessary.

> + /* check parameters */
> + if (!buf || count < 2)
> + return -EPERM;
> +
> + nid = simple_strtoul(buf, NULL, 0);

checkpatch?

> + printk(KERN_DEBUG "Add a cpu to node : %d\n", nid);

"Add a CPU to node %d" would make more sense.

> + if (nid < 0 || nid > nr_node_ids - 1) {
> + printk(KERN_ERR "Invalid NUMA node id: %d (0 <= nid < %d).\n",
> + nid, nr_node_ids);
> + return -EPERM;
> + }
> +
> + if (!node_online(nid)) {
> + printk(KERN_ERR "NUMA node %d is not online, give up.\n", nid);

"giving"

> + return -EPERM;
> + }
> +
> + /* find first uninitialized cpu */
> + for_each_present_cpu(num) {

s/num/cpu/ would be conventional. "num" is a pretty poor identifier in
general - it fails to identify what it is counting.

> + if (per_cpu(cpu_sys_devices, num) == NULL) {
> + selected = num;

Similarly, I'd have used "selected_cpu".

> + break;
> + }
> + }
> +
> + if (selected >= num_possible_cpus()) {
> + printk(KERN_ERR "No free cpu, give up cpu probing.\n");
> + return -EPERM;
> + }
> +
> + /* register cpu */
> + arch_register_cpu_node(selected, nid);
> + acpi_map_lsapic_emu(selected, nid);
> +
> + return count;
> +}
> +EXPORT_SYMBOL(arch_cpu_probe);

arch_cpu_probe() is global and exported to modules, but is undocumented.

If it had been documented, I might have been able to work out why arg
`count' is checked, but never used.

> +ssize_t arch_cpu_release(const char *buf, size_t count)
> +{
> + int cpu = 0;
> +
> + cpu = simple_strtoul(buf, NULL, 0);

unneeded initialisation, spurious whitespace, checkpatch.

> + /* cpu 0 is not hotplugable */
> + if (cpu == 0) {
> + printk(KERN_ERR "can not release cpu 0.\n");

It's generally better to make kernel messages self-identifying.
Especially error messages. If someone comes along and sees "can not
release cpu 0" in their logs, they don't have a clue what caused it
unless they download the kernel sources and go grepping.

> + return -EPERM;
> + }
> +
> + if (cpu_online(cpu)) {
> + printk(KERN_DEBUG "offline cpu %d.\n", cpu);
> + if (!cpu_down(cpu)) {
> + printk(KERN_ERR "fail to offline cpu %d, give up.\n", cpu);

"failed", "giving".

> + return -EPERM;
> + }
> +
> + }
> +
> + arch_unregister_cpu(cpu);
> + acpi_unmap_lsapic(cpu);
> +
> + return count;
> +}
> +EXPORT_SYMBOL(arch_cpu_release);

No documentation.

> #else /* CONFIG_HOTPLUG_CPU */
>
> static int __init arch_register_cpu(int num)
> @@ -83,8 +158,14 @@
> register_one_node(i);
> #endif
>
> - for_each_present_cpu(i)
> - arch_register_cpu(i);
> + /*
> + * when cpu hotplug emulation enabled, register the online cpu only,
> + * the rests are reserved for cpu probe.
> + */

Something like "When cpu hotplug emulation is enabled, register only
the online cpu. The remainder are reserved for cpu probing.".


> + for_each_present_cpu(i) {
> + if ((cpu_hpe_on && cpu_online(i)) || !cpu_hpe_on)
> + arch_register_cpu(i);
> + }
>
> return 0;
> }
>
> ...
>
> --- linux-hpe4.orig/drivers/acpi/processor_driver.c 2010-12-10 13:42:34.593331000 +0800
> +++ linux-hpe4/drivers/acpi/processor_driver.c 2010-12-10 14:48:32.143331001 +0800
> @@ -542,6 +542,14 @@
> goto err_free_cpumask;
>
> sysdev = get_cpu_sysdev(pr->id);
> + /*
> + * Reserve cpu for hotplug emulation, the reserved cpu can be hot-added
> + * throu the cpu probe interface. Return directly.

s/emulation, the/emulation. The/
s/throu/through/

> + */
> + if (sysdev == NULL) {
> + goto out;
> + }

Unneeded braces.

> if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
> result = -EFAULT;
> goto err_remove_fs;
> @@ -582,6 +590,7 @@
> goto err_remove_sysfs;
> }
>
> +out:
> return 0;
>
>
> ...
>
> --- linux-hpe4.orig/drivers/base/cpu.c 2010-12-10 14:39:43.333331000 +0800
> +++ linux-hpe4/drivers/base/cpu.c 2010-12-10 14:48:32.143331001 +0800
> @@ -22,9 +22,15 @@
> };
> EXPORT_SYMBOL(cpu_sysdev_class);
>
> -static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
> +DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
>
> #ifdef CONFIG_HOTPLUG_CPU
> +/*
> + * cpu_hpe_on is a switch to enable/disable cpu hotplug emulation. it is

s/it/It/.

> + * disabled in default, we can enable it throu grub parameter cpu_hpe=on

"through".

> + */
> +int cpu_hpe_on;

__read_mostly, perhaps.

> static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
> char *buf)
> {
>
> ...
>

2010-12-23 02:58:41

by Shaohui Zheng

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Wed, Dec 22, 2010 at 04:27:27PM -0800, Andrew Morton wrote:
> On Fri, 10 Dec 2010 15:31:24 +0800
> > +
> > +ssize_t arch_cpu_probe(const char *buf, size_t count)
> > +{
> > + int nid = 0;
> > + int num = 0, selected = 0;
>
> One definition per line make for more maintainable code.
>
> Two of these initialisations are unnecessary.
>
Agree, I will put them into 2 lines, and remove the initialisations.
I always try to initialize them when we define it, it seems that it is a bad habit.

> > + /* check parameters */
> > + if (!buf || count < 2)
> > + return -EPERM;
> > +
> > + nid = simple_strtoul(buf, NULL, 0);
>
> checkpatch?

it is a warning, so I ignore it.
I will solve it.

>
> > + printk(KERN_DEBUG "Add a cpu to node : %d\n", nid);
>
> "Add a CPU to node %d" would make more sense.
>

Get it.

> > + if (nid < 0 || nid > nr_node_ids - 1) {
> > + printk(KERN_ERR "Invalid NUMA node id: %d (0 <= nid < %d).\n",
> > + nid, nr_node_ids);
> > + return -EPERM;
> > + }
> > +
> > + if (!node_online(nid)) {
> > + printk(KERN_ERR "NUMA node %d is not online, give up.\n", nid);
>
> "giving"
>

Get it.

> > + return -EPERM;
> > + }
> > +
> > + /* find first uninitialized cpu */
> > + for_each_present_cpu(num) {
>
> s/num/cpu/ would be conventional. "num" is a pretty poor identifier in
> general - it fails to identify what it is counting.
>

I will replace the identifier 'num' with 'cpu'.

> > + if (per_cpu(cpu_sys_devices, num) == NULL) {
> > + selected = num;
>
> Similarly, I'd have used "selected_cpu".
>

Get it.

> > + break;
> > + }
> > + }
> > +
> > + if (selected >= num_possible_cpus()) {
> > + printk(KERN_ERR "No free cpu, give up cpu probing.\n");
> > + return -EPERM;
> > + }
> > +
> > + /* register cpu */
> > + arch_register_cpu_node(selected, nid);
> > + acpi_map_lsapic_emu(selected, nid);
> > +
> > + return count;
> > +}
> > +EXPORT_SYMBOL(arch_cpu_probe);
>
> arch_cpu_probe() is global and exported to modules, but is undocumented.
>
> If it had been documented, I might have been able to work out why arg
> `count' is checked, but never used.
>

Sorry, Andrew, I did not catch it. Do you mean to add the document before
the definition of the function arch_cpu_probe?

> > +ssize_t arch_cpu_release(const char *buf, size_t count)
> > +{
> > + int cpu = 0;
> > +
> > + cpu = simple_strtoul(buf, NULL, 0);
>
> unneeded initialisation, spurious whitespace, checkpatch.
>

Agree.

> > + /* cpu 0 is not hotplugable */
> > + if (cpu == 0) {
> > + printk(KERN_ERR "can not release cpu 0.\n");
>
> It's generally better to make kernel messages self-identifying.
> Especially error messages. If someone comes along and sees "can not
> release cpu 0" in their logs, they don't have a clue what caused it
> unless they download the kernel sources and go grepping.
>

How about "arch_cpu_release: can not release cpu 0.\n"?

> > + return -EPERM;
> > + }
> > +
> > + if (cpu_online(cpu)) {
> > + printk(KERN_DEBUG "offline cpu %d.\n", cpu);
> > + if (!cpu_down(cpu)) {
> > + printk(KERN_ERR "fail to offline cpu %d, give up.\n", cpu);
>
> "failed", "giving".
>

Get it.

> > + return -EPERM;
> > + }
> > +
> > + }
> > +
> > + arch_unregister_cpu(cpu);
> > + acpi_unmap_lsapic(cpu);
> > +
> > + return count;
> > +}
> > +EXPORT_SYMBOL(arch_cpu_release);
>
> No documentation.
>

Sorry, It is the same with function arch_cpu_probe, I did not catch the
problem, should I add documentation before the definition or declaration? Or
add the documentation into directory Documentation/.

> > #else /* CONFIG_HOTPLUG_CPU */
> >
> > static int __init arch_register_cpu(int num)
> > @@ -83,8 +158,14 @@
> > register_one_node(i);
> > #endif
> >
> > - for_each_present_cpu(i)
> > - arch_register_cpu(i);
> > + /*
> > + * when cpu hotplug emulation enabled, register the online cpu only,
> > + * the rests are reserved for cpu probe.
> > + */
>
> Something like "When cpu hotplug emulation is enabled, register only
> the online cpu. The remainder are reserved for cpu probing.".
>
>

Get it.

> > + for_each_present_cpu(i) {
> > + if ((cpu_hpe_on && cpu_online(i)) || !cpu_hpe_on)
> > + arch_register_cpu(i);
> > + }
> >
> > return 0;
> > }
> >
> > ...
> >
> > --- linux-hpe4.orig/drivers/acpi/processor_driver.c 2010-12-10 13:42:34.593331000 +0800
> > +++ linux-hpe4/drivers/acpi/processor_driver.c 2010-12-10 14:48:32.143331001 +0800
> > @@ -542,6 +542,14 @@
> > goto err_free_cpumask;
> >
> > sysdev = get_cpu_sysdev(pr->id);
> > + /*
> > + * Reserve cpu for hotplug emulation, the reserved cpu can be hot-added
> > + * throu the cpu probe interface. Return directly.
>
> s/emulation, the/emulation. The/
> s/throu/through/
>
> > + */
> > + if (sysdev == NULL) {
> > + goto out;
> > + }
>
> Unneeded braces.
>
> > if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
> > result = -EFAULT;
> > goto err_remove_fs;
> > @@ -582,6 +590,7 @@
> > goto err_remove_sysfs;
> > }
> >
> > +out:
> > return 0;
> >
> >
> > ...
> >
> > --- linux-hpe4.orig/drivers/base/cpu.c 2010-12-10 14:39:43.333331000 +0800
> > +++ linux-hpe4/drivers/base/cpu.c 2010-12-10 14:48:32.143331001 +0800
> > @@ -22,9 +22,15 @@
> > };
> > EXPORT_SYMBOL(cpu_sysdev_class);
> >
> > -static DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
> > +DEFINE_PER_CPU(struct sys_device *, cpu_sys_devices);
> >
> > #ifdef CONFIG_HOTPLUG_CPU
> > +/*
> > + * cpu_hpe_on is a switch to enable/disable cpu hotplug emulation. it is
>
> s/it/It/.
>
> > + * disabled in default, we can enable it throu grub parameter cpu_hpe=on
>
> "through".
>
> > + */
> > +int cpu_hpe_on;
>
> __read_mostly, perhaps.
>

CPU Hotplug emulation is for debug purpose, so cpu_hpe_on is not used very frequently.

> > static ssize_t show_online(struct sys_device *dev, struct sysdev_attribute *attr,
> > char *buf)
> > {
> >
> > ...
> >

--
Thanks & Regards,
Shaohui

2010-12-23 03:25:40

by Andrew Morton

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Thu, 23 Dec 2010 09:34:10 +0800 Shaohui Zheng <[email protected]> wrote:

> On Wed, Dec 22, 2010 at 04:27:27PM -0800, Andrew Morton wrote:
> > On Fri, 10 Dec 2010 15:31:24 +0800
> > > +
> > > +ssize_t arch_cpu_probe(const char *buf, size_t count)
> > > +{
> > > + int nid = 0;
> > > + int num = 0, selected = 0;
> >
> > One definition per line make for more maintainable code.
> >
> > Two of these initialisations are unnecessary.
> >
> Agree, I will put them into 2 lines, and remove the initialisations.
> I always try to initialize them when we define it, it seems that it is a bad habit.
>
> > > + /* check parameters */
> > > + if (!buf || count < 2)
> > > + return -EPERM;
> > > +
> > > + nid = simple_strtoul(buf, NULL, 0);
> >
> > checkpatch?
>
> it is a warning, so I ignore it.

Don't ignore warnings! At least, not until you've understood the
reason for them and have a *reason* to ignore them.

simple_strtoul() will silently accept input of the form "42foo",
treating it as "42". That's a userspace bug and the kernel should
report it. This means that the code should be changed to handle error
returns from strict_strtoul(). And those error paths should be tested.

> > > + break;
> > > + }
> > > + }
> > > +
> > > + if (selected >= num_possible_cpus()) {
> > > + printk(KERN_ERR "No free cpu, give up cpu probing.\n");
> > > + return -EPERM;
> > > + }
> > > +
> > > + /* register cpu */
> > > + arch_register_cpu_node(selected, nid);
> > > + acpi_map_lsapic_emu(selected, nid);
> > > +
> > > + return count;
> > > +}
> > > +EXPORT_SYMBOL(arch_cpu_probe);
> >
> > arch_cpu_probe() is global and exported to modules, but is undocumented.
> >
> > If it had been documented, I might have been able to work out why arg
> > `count' is checked, but never used.
> >
>
> Sorry, Andrew, I did not catch it. Do you mean to add the document before
> the definition of the function arch_cpu_probe?

Sure, add a comment documenting the function.

Why *does* it check `count' and then not use it?

>
> > > + /* cpu 0 is not hotplugable */
> > > + if (cpu == 0) {
> > > + printk(KERN_ERR "can not release cpu 0.\n");
> >
> > It's generally better to make kernel messages self-identifying.
> > Especially error messages. If someone comes along and sees "can not
> > release cpu 0" in their logs, they don't have a clue what caused it
> > unless they download the kernel sources and go grepping.
> >
>
> How about "arch_cpu_release: can not release cpu 0.\n"?

Better, although "arch_cpu_release" isn't very meaningful to an
administrator. "NUMA hotplug remove" or something like that would be
more useful.

All these messages should be looked at from the point of view of the
people who they are to serve. Although in this special case, that's
most likely to be a kernel developer so I guess such clarity isn't
needed.

2010-12-23 03:50:56

by Shaohui Zheng

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Wed, Dec 22, 2010 at 07:21:18PM -0800, Andrew Morton wrote:
> > >
> > > checkpatch?
> >
> > it is a warning, so I ignore it.
>
> Don't ignore warnings! At least, not until you've understood the
> reason for them and have a *reason* to ignore them.
>
> simple_strtoul() will silently accept input of the form "42foo",
> treating it as "42". That's a userspace bug and the kernel should
> report it. This means that the code should be changed to handle error
> returns from strict_strtoul(). And those error paths should be tested.
>

> > > > + break;
> > > > + }
> > > > + }
> > > > +
> > > > + if (selected >= num_possible_cpus()) {
> > > > + printk(KERN_ERR "No free cpu, give up cpu probing.\n");
> > > > + return -EPERM;
> > > > + }
> > > > +
> > > > + /* register cpu */
> > > > + arch_register_cpu_node(selected, nid);
> > > > + acpi_map_lsapic_emu(selected, nid);
> > > > +
> > > > + return count;
> > > > +}
> > > > +EXPORT_SYMBOL(arch_cpu_probe);
> > >
> > > arch_cpu_probe() is global and exported to modules, but is undocumented.
> > >
> > > If it had been documented, I might have been able to work out why arg
> > > `count' is checked, but never used.
> > >
> >
> > Sorry, Andrew, I did not catch it. Do you mean to add the document before
> > the definition of the function arch_cpu_probe?
>
> Sure, add a comment documenting the function.

I understand, I will add comments for both arch_cpu_probe/arch_cpu_release.

>
> Why *does* it check `count' and then not use it?
>

it is a tricky thing. When I debug it under a Virtual Machine, If I do a cpu
probe via sysfs cpu/probe interface, The function arch_cpu_probe will be called
__three__ times, but only one call is valid, so I add a check on `count` to
ignore the invalid calls.

> >
> > > > + /* cpu 0 is not hotplugable */
> > > > + if (cpu == 0) {
> > > > + printk(KERN_ERR "can not release cpu 0.\n");
> > >
> > > It's generally better to make kernel messages self-identifying.
> > > Especially error messages. If someone comes along and sees "can not
> > > release cpu 0" in their logs, they don't have a clue what caused it
> > > unless they download the kernel sources and go grepping.
> > >
> >
> > How about "arch_cpu_release: can not release cpu 0.\n"?
>
> Better, although "arch_cpu_release" isn't very meaningful to an
> administrator. "NUMA hotplug remove" or something like that would be
> more useful.

>
> All these messages should be looked at from the point of view of the
> people who they are to serve. Although in this special case, that's
> most likely to be a kernel developer so I guess such clarity isn't
> needed.
>

It is a good lesson for me, when I meet the similar problem next time, I should
consider more from the point of the user.

--
Thanks & Regards,
Shaohui

2010-12-23 05:35:14

by Andrew Morton

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Thu, 23 Dec 2010 10:24:28 +0800 Shaohui Zheng <[email protected]> wrote:

> >
> > Why *does* it check `count' and then not use it?
> >
>
> it is a tricky thing. When I debug it under a Virtual Machine, If I do a cpu
> probe via sysfs cpu/probe interface, The function arch_cpu_probe will be called
> __three__ times, but only one call is valid, so I add a check on `count` to
> ignore the invalid calls.

hm, why does it get called three times? Is that something which
can/should be fixed in callers rather than in the callee?

2010-12-23 05:54:41

by Shaohui Zheng

[permalink] [raw]
Subject: Re: [5/7, v9] NUMA Hotplug Emulator: Support cpu probe/release in x86_64

On Wed, Dec 22, 2010 at 09:28:04PM -0800, Andrew Morton wrote:
> On Thu, 23 Dec 2010 10:24:28 +0800 Shaohui Zheng <[email protected]> wrote:
>
> > >
> > > Why *does* it check `count' and then not use it?
> > >
> >
> > it is a tricky thing. When I debug it under a Virtual Machine, If I do a cpu
> > probe via sysfs cpu/probe interface, The function arch_cpu_probe will be called
> > __three__ times, but only one call is valid, so I add a check on `count` to
> > ignore the invalid calls.
>
> hm, why does it get called three times? Is that something which
> can/should be fixed in callers rather than in the callee?

It might be a bug in the caller, but just guess currently. I will investigate it.

--
Thanks & Regards,
Shaohui