Hi,
This is version 6 of patch series that provides a framework to choose the state
a pseries CPU must be put to when it is offlined.
Previous versions can be found here:
Version 5: http://lkml.org/lkml/2009/10/30/6
Version 4: http://lkml.org/lkml/2009/10/9/59
Version 3: http://lkml.org/lkml/2009/9/15/164
Version 2: http://lkml.org/lkml/2009/8/28/102
Version 1: http://lkml.org/lkml/2009/8/6/236
Changes from the previous version include:
- Built on Nathan Fontenot's v3 of "Kernel handling of Dynamic
Logical Partitioning" http://lkml.org/lkml/2009/11/25/21
- Rebased to powerpc.git tree and hence dropped 1st and 2nd patch in
the stack since they are already in the powerpc.git tree:
With reference to previous version,
Dropped:
1/4 pSeries: extended_cede_processor() helper function
2/4 pSeries: Add hooks to put the CPU into an appropriate offline state
Posting only:
3/4 pseries: Add code to online/offline CPUs of a DLPAR node
4/4 pseries: Serialize cpu hotplug operations during deactivate Vs deallocate
Minor changes in the above patchs due to changes in Nathan's routines.
Also,
- This approach addresses Peter Z's objections regarding layering
violations. The user simply offlines the cpu and doesn't worry about what
state the CPU should be put into. That part is automatically handled by the
kernel.
Acked-by: Peter Zijlstra <[email protected]>
http://lkml.org/lkml/2009/11/11/328
- It does not add any additional sysfs interface instead uses the existing
sysfs interface to offline CPUs.
- On platforms which do not have support for ceding the vcpu with a
latency specifier value, the offlining mechanism defaults to the current
method of calling rtas_stop_self().
This patchset is based on powerpc.git + Nathan's patches and has been built and
tested on pseries platforms. This series can be applied to powerpc.git after
Nathan's patches.
Thanks,
Vaidy
---
Gautham R Shenoy (2):
pseries: Serialize cpu hotplug operations during deactivate Vs deallocate
pseries: Add code to online/offline CPUs of a DLPAR node
arch/powerpc/platforms/pseries/dlpar.c | 144 ++++++++++++++++++++++++++++++--
drivers/base/cpu.c | 2
include/linux/cpu.h | 13 +++
3 files changed, 150 insertions(+), 9 deletions(-)
--
From: Gautham R Shenoy <[email protected]>
Currently the cpu-allocation/deallocation on pSeries is a
two step process from the Userspace.
- Set the indicators and update the device tree by writing to the sysfs
tunable "probe" during allocation and "release" during deallocation.
- Online / Offline the CPUs of the allocated/would_be_deallocated node by
writing to the sysfs tunable "online".
This patch adds kernel code to online/offline the CPUs soon_after/just_before
they have been allocated/would_be_deallocated. This way, the userspace tool
that performs DLPAR operations would only have to deal with one set of sysfs
tunables namely "probe" and release".
Signed-off-by: Gautham R Shenoy <[email protected]>
Signed-off-by: Nathan Fontenot <[email protected]>
Acked-by: Vaidyanathan Srinivasan <[email protected]>
---
arch/powerpc/platforms/pseries/dlpar.c | 101 ++++++++++++++++++++++++++++++++
1 files changed, 101 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index fe8d4b3..642e1b2 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -16,6 +16,7 @@
#include <linux/proc_fs.h>
#include <linux/spinlock.h>
#include <linux/cpu.h>
+#include "offline_states.h"
#include <asm/prom.h>
#include <asm/machdep.h>
@@ -287,6 +288,98 @@ int dlpar_detach_node(struct device_node *dn)
return 0;
}
+int online_node_cpus(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const u32 *intserv;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != intserv[i])
+ continue;
+ BUG_ON(get_cpu_current_state(cpu)
+ != CPU_STATE_OFFLINE);
+ cpu_maps_update_done();
+ rc = cpu_up(cpu);
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
+
+ break;
+ }
+ if (cpu == num_possible_cpus())
+ printk(KERN_WARNING "Could not find cpu to online "
+ "with physical id 0x%x\n", intserv[i]);
+ }
+ cpu_maps_update_done();
+
+out:
+ return rc;
+
+}
+
+int offline_node_cpus(struct device_node *dn)
+{
+ int rc = 0;
+ unsigned int cpu;
+ int len, nthreads, i;
+ const u32 *intserv;
+
+ intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+ if (!intserv)
+ return -EINVAL;
+
+ nthreads = len / sizeof(u32);
+
+ cpu_maps_update_begin();
+ for (i = 0; i < nthreads; i++) {
+ for_each_present_cpu(cpu) {
+ if (get_hard_smp_processor_id(cpu) != intserv[i])
+ continue;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+ break;
+
+ if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+ cpu_maps_update_done();
+ rc = cpu_down(cpu);
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
+ break;
+
+ }
+
+ /*
+ * The cpu is in CPU_STATE_INACTIVE.
+ * Upgrade it's state to CPU_STATE_OFFLINE.
+ */
+ set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+ BUG_ON(plpar_hcall_norets(H_PROD, intserv[i])
+ != H_SUCCESS);
+ __cpu_die(cpu);
+ break;
+ }
+ if (cpu == num_possible_cpus())
+ printk(KERN_WARNING "Could not find cpu to offline "
+ "with physical id 0x%x\n", intserv[i]);
+ }
+ cpu_maps_update_done();
+
+out:
+ return rc;
+
+}
+
#define DR_ENTITY_SENSE 9003
#define DR_ENTITY_PRESENT 1
#define DR_ENTITY_UNUSABLE 2
@@ -385,6 +478,8 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
dlpar_free_cc_nodes(dn);
}
+ rc = online_node_cpus(dn);
+
return rc ? rc : count;
}
@@ -404,6 +499,12 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
return -EINVAL;
}
+ rc = offline_node_cpus(dn);
+ if (rc) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
rc = dlpar_release_drc(*drc_index);
if (rc) {
of_node_put(dn);
From: Gautham R Shenoy <[email protected]>
Currently the cpu-allocation/deallocation process comprises of two steps:
- Set the indicators and to update the device tree with DLPAR node
information.
- Online/offline the allocated/deallocated CPU.
This is achieved by writing to the sysfs tunables "probe" during allocation
and "release" during deallocation.
At the sametime, the userspace can independently online/offline the CPUs of
the system using the sysfs tunable "online".
It is quite possible that when a userspace tool offlines a CPU
for the purpose of deallocation and is in the process of updating the device
tree, some other userspace tool could bring the CPU back online by writing to
the "online" sysfs tunable thereby causing the deallocate process to fail.
The solution to this is to serialize writes to the "probe/release" sysfs
tunable with the writes to the "online" sysfs tunable.
This patch employs a mutex to provide this serialization, which is a no-op on
all architectures except PPC_PSERIES
Signed-off-by: Gautham R Shenoy <[email protected]>
Acked-by: Vaidyanathan Srinivasan <[email protected]>
---
arch/powerpc/platforms/pseries/dlpar.c | 45 +++++++++++++++++++++++++-------
drivers/base/cpu.c | 2 +
include/linux/cpu.h | 13 +++++++++
3 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 642e1b2..fd2f0af 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -436,6 +436,18 @@ int dlpar_release_drc(u32 drc_index)
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static DEFINE_MUTEX(pseries_cpu_hotplug_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+ mutex_lock(&pseries_cpu_hotplug_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+ mutex_unlock(&pseries_cpu_hotplug_mutex);
+}
+
static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
{
struct device_node *dn;
@@ -443,13 +455,18 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
char *cpu_name;
int rc;
+ cpu_hotplug_driver_lock();
rc = strict_strtoul(buf, 0, &drc_index);
- if (rc)
- return -EINVAL;
+ if (rc) {
+ rc = -EINVAL;
+ goto out;
+ }
dn = dlpar_configure_connector(drc_index);
- if (!dn)
- return -EINVAL;
+ if (!dn) {
+ rc = -EINVAL;
+ goto out;
+ }
/* configure-connector reports cpus as living in the base
* directory of the device tree. CPUs actually live in the
@@ -459,7 +476,8 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
GFP_KERNEL);
if (!cpu_name) {
dlpar_free_cc_nodes(dn);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out;
}
sprintf(cpu_name, "/cpus%s", dn->full_name);
@@ -469,7 +487,8 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
rc = dlpar_acquire_drc(drc_index);
if (rc) {
dlpar_free_cc_nodes(dn);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
rc = dlpar_attach_node(dn);
@@ -479,6 +498,8 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
}
rc = online_node_cpus(dn);
+out:
+ cpu_hotplug_driver_unlock();
return rc ? rc : count;
}
@@ -499,26 +520,30 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
return -EINVAL;
}
+ cpu_hotplug_driver_lock();
rc = offline_node_cpus(dn);
if (rc) {
of_node_put(dn);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
rc = dlpar_release_drc(*drc_index);
if (rc) {
of_node_put(dn);
- return -EINVAL;
+ goto out;
}
rc = dlpar_detach_node(dn);
if (rc) {
dlpar_acquire_drc(*drc_index);
- return rc;
+ goto out;
}
of_node_put(dn);
- return count;
+out:
+ cpu_hotplug_driver_unlock();
+ return rc ? rc : count;
}
static int __init pseries_dlpar_init(void)
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 7c03af7..27fd775 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -35,6 +35,7 @@ static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribut
struct cpu *cpu = container_of(dev, struct cpu, sysdev);
ssize_t ret;
+ cpu_hotplug_driver_lock();
switch (buf[0]) {
case '0':
ret = cpu_down(cpu->sysdev.id);
@@ -49,6 +50,7 @@ static ssize_t __ref store_online(struct sys_device *dev, struct sysdev_attribut
default:
ret = -EINVAL;
}
+ cpu_hotplug_driver_unlock();
if (ret >= 0)
ret = count;
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index c972f7c..e287863 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -117,6 +117,19 @@ extern void put_online_cpus(void);
#define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb)
int cpu_down(unsigned int cpu);
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+extern void cpu_hotplug_driver_lock(void);
+extern void cpu_hotplug_driver_unlock(void);
+#else
+static inline void cpu_hotplug_driver_lock(void)
+{
+}
+
+static inline void cpu_hotplug_driver_unlock(void)
+{
+}
+#endif
+
#else /* CONFIG_HOTPLUG_CPU */
#define get_online_cpus() do { } while (0)
Some of these patches (Gautham or Nathan, I haven't bisected) break
pseries build without CONFIG_SMP. I'll apply them anyways for now
to next and push them out today since nobody ever builds pseries without
CONFIG_SMP but I would appreciate if you could send a fix regardless.
Cheers,
Ben.