2009-10-28 20:47:25

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 0/6 v5] Kernel handling of Dynamic Logical Partitioning

This is a re-send of the entire patch set with updates made from recent
comments received.

The Dynamic Logical Partitioning (DLPAR) capabilities of the powerpc pseries
platform allows for the addition and removal of resources (i.e. cpus,
memory, pci devices) from a partition. The removal of a resource involves
removing the resource's node from the device tree and then returning the
resource to firmware via the rtas set-indicator call. To add a resource, it
is first obtained from firmware via the rtas set-indicator call and then a
new device tree node is created using the ibm,configure-coinnector rtas call
and added to the device tree.

The following set of patches implements the needed infrastructure to have the
kernel handle the DLPAR addition and removal of memory and cpus (other
DLPAR'able items to follow in future patches). The framework for this is
to create a set of probe/release sysfs files that will facilitate
arch-specific call-outs to handle addition and removal of cpus and memory to
the system.

Patches include in this set:
1/6 - DLPAR infracstructure for powerpc/pseries platform.
2/6 - Move the of_drconf_cell struct to prom.h
3/6 - Create memory probe/release files and the powerpc handlers for them
4/6 - Memory DLPAR handling
5/6 - Create sysfs cpu probe/release files and the powerpc handlers for them
6/6 - CPU DLPAR handling

-Nathan Fontenot


2009-10-28 20:53:41

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 1/6 v5] Kernel DLPAR Infrastructure

This patch provides the kernel DLPAR infrastructure in a new filed named
dlpar.c. The functionality provided is for acquiring and releasing a resource
from firmware and the parsing of information returned from the
ibm,configure-connector rtas call. Additionally this exports the pSeries
reconfiguration notifier chain so that it can be invoked when device tree
updates are made.

Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
---

Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-10-28 15:21:38.000000000 -0500
@@ -0,0 +1,414 @@
+/*
+ * dlpar.c - support for dynamic reconfiguration (including PCI
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/pSeries_reconfig.h>
+
+#define CFG_CONN_WORK_SIZE 4096
+static char workarea[CFG_CONN_WORK_SIZE];
+static DEFINE_SPINLOCK(workarea_lock);
+
+struct cc_workarea {
+ u32 drc_index;
+ u32 zero;
+ u32 name_offset;
+ u32 prop_length;
+ u32 prop_offset;
+};
+
+static struct property *parse_cc_property(char *workarea)
+{
+ struct property *prop;
+ struct cc_workarea *ccwa;
+ char *name;
+ char *value;
+
+ prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+ if (!prop)
+ return NULL;
+
+ ccwa = (struct cc_workarea *)workarea;
+ name = workarea + ccwa->name_offset;
+ prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!prop->name) {
+ kfree(prop);
+ return NULL;
+ }
+
+ strcpy(prop->name, name);
+
+ prop->length = ccwa->prop_length;
+ value = workarea + ccwa->prop_offset;
+ prop->value = kzalloc(prop->length, GFP_KERNEL);
+ if (!prop->value) {
+ kfree(prop->name);
+ kfree(prop);
+ return NULL;
+ }
+
+ memcpy(prop->value, value, prop->length);
+ return prop;
+}
+
+static void free_property(struct property *prop)
+{
+ kfree(prop->name);
+ kfree(prop->value);
+ kfree(prop);
+}
+
+static struct device_node *parse_cc_node(char *work_area)
+{
+ struct device_node *dn;
+ struct cc_workarea *ccwa;
+ char *name;
+
+ dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+ if (!dn)
+ return NULL;
+
+ ccwa = (struct cc_workarea *)work_area;
+ name = work_area + ccwa->name_offset;
+ dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!dn->full_name) {
+ kfree(dn);
+ return NULL;
+ }
+
+ strcpy(dn->full_name, name);
+ return dn;
+}
+
+static void free_one_cc_node(struct device_node *dn)
+{
+ struct property *prop;
+
+ while (dn->properties) {
+ prop = dn->properties;
+ dn->properties = prop->next;
+ free_property(prop);
+ }
+
+ kfree(dn->full_name);
+ kfree(dn);
+}
+
+static void free_cc_nodes(struct device_node *dn)
+{
+ if (dn->child)
+ free_cc_nodes(dn->child);
+
+ if (dn->sibling)
+ free_cc_nodes(dn->sibling);
+
+ free_one_cc_node(dn);
+}
+
+#define NEXT_SIBLING 1
+#define NEXT_CHILD 2
+#define NEXT_PROPERTY 3
+#define PREV_PARENT 4
+#define MORE_MEMORY 5
+#define CALL_AGAIN -2
+#define ERR_CFG_USE -9003
+
+struct device_node *configure_connector(u32 drc_index)
+{
+ struct device_node *dn;
+ struct device_node *first_dn = NULL;
+ struct device_node *last_dn = NULL;
+ struct property *property;
+ struct property *last_property = NULL;
+ struct cc_workarea *ccwa;
+ int cc_token;
+ int rc;
+
+ cc_token = rtas_token("ibm,configure-connector");
+ if (cc_token == RTAS_UNKNOWN_SERVICE)
+ return NULL;
+
+ spin_lock(&workarea_lock);
+
+ ccwa = (struct cc_workarea *)&workarea[0];
+ ccwa->drc_index = drc_index;
+ ccwa->zero = 0;
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ while (rc) {
+ switch (rc) {
+ case NEXT_SIBLING:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ dn->parent = last_dn->parent;
+ last_dn->sibling = dn;
+ last_dn = dn;
+ break;
+
+ case NEXT_CHILD:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ if (!first_dn)
+ first_dn = dn;
+ else {
+ dn->parent = last_dn;
+ if (last_dn)
+ last_dn->child = dn;
+ }
+
+ last_dn = dn;
+ break;
+
+ case NEXT_PROPERTY:
+ property = parse_cc_property(workarea);
+ if (!property)
+ goto cc_error;
+
+ if (!last_dn->properties)
+ last_dn->properties = property;
+ else
+ last_property->next = property;
+
+ last_property = property;
+ break;
+
+ case PREV_PARENT:
+ last_dn = last_dn->parent;
+ break;
+
+ case CALL_AGAIN:
+ break;
+
+ case MORE_MEMORY:
+ case ERR_CFG_USE:
+ default:
+ printk(KERN_ERR "Unexpected Error (%d) "
+ "returned from configure-connector\n", rc);
+ goto cc_error;
+ }
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ }
+
+ spin_unlock(&workarea_lock);
+ return first_dn;
+
+cc_error:
+ spin_unlock(&workarea_lock);
+
+ if (first_dn)
+ free_cc_nodes(first_dn);
+
+ return NULL;
+}
+
+static struct device_node *derive_parent(const char *path)
+{
+ struct device_node *parent;
+ char parent_path[128];
+ int parent_path_len;
+
+ parent_path_len = strrchr(path, '/') - path + 1;
+ strlcpy(parent_path, path, parent_path_len);
+
+ parent = of_find_node_by_path(parent_path);
+
+ return parent;
+}
+
+static int add_one_node(struct device_node *dn)
+{
+ struct proc_dir_entry *ent;
+ int rc;
+
+ of_node_set_flag(dn, OF_DYNAMIC);
+ kref_init(&dn->kref);
+ dn->parent = derive_parent(dn->full_name);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_ADD, dn);
+ if (rc == NOTIFY_BAD) {
+ printk(KERN_ERR "Failed to add device node %s\n",
+ dn->full_name);
+ return -ENOMEM; /* For now, safe to assume kmalloc failure */
+ }
+
+ of_attach_node(dn);
+
+#ifdef CONFIG_PROC_DEVICETREE
+ ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
+ if (ent)
+ proc_device_tree_add_node(dn, ent);
+#endif
+
+ of_node_put(dn->parent);
+ return 0;
+}
+
+int add_device_tree_nodes(struct device_node *dn)
+{
+ struct device_node *child = dn->child;
+ struct device_node *sibling = dn->sibling;
+ int rc;
+
+ dn->child = NULL;
+ dn->sibling = NULL;
+ dn->parent = NULL;
+
+ rc = add_one_node(dn);
+ if (rc)
+ return rc;
+
+ if (child) {
+ rc = add_device_tree_nodes(child);
+ if (rc)
+ return rc;
+ }
+
+ if (sibling)
+ rc = add_device_tree_nodes(sibling);
+
+ return rc;
+}
+
+static int remove_one_node(struct device_node *dn)
+{
+ struct device_node *parent = dn->parent;
+ struct property *prop = dn->properties;
+
+#ifdef CONFIG_PROC_DEVICETREE
+ while (prop) {
+ remove_proc_entry(prop->name, dn->pde);
+ prop = prop->next;
+ }
+
+ if (dn->pde)
+ remove_proc_entry(dn->pde->name, parent->pde);
+#endif
+
+ blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_REMOVE, dn);
+ of_detach_node(dn);
+ of_node_put(dn); /* Must decrement the refcount */
+
+ return 0;
+}
+
+static int _remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ if (dn->sibling) {
+ rc = _remove_device_tree_nodes(dn->sibling);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+int remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+#define DR_ENTITY_SENSE 9003
+#define DR_ENTITY_PRESENT 1
+#define DR_ENTITY_UNUSABLE 2
+#define ALLOCATION_STATE 9003
+#define ALLOC_UNUSABLE 0
+#define ALLOC_USABLE 1
+#define ISOLATION_STATE 9001
+#define ISOLATE 0
+#define UNISOLATE 1
+
+int acquire_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_UNUSABLE)
+ return -1;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ if (rc) {
+ rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ return rc;
+ }
+
+ return 0;
+}
+
+int release_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_PRESENT)
+ return -1;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+ if (rc)
+ return rc;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ if (rc) {
+ rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int pseries_dlpar_init(void)
+{
+ if (!machine_is(pseries))
+ return 0;
+
+ return 0;
+}
+device_initcall(pseries_dlpar_init);
Index: powerpc/arch/powerpc/platforms/pseries/Makefile
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/Makefile 2009-10-28 15:20:38.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/Makefile 2009-10-28 15:21:38.000000000 -0500
@@ -8,7 +8,7 @@

obj-y := lpar.o hvCall.o nvram.o reconfig.o \
setup.o iommu.o ras.o rtasd.o \
- firmware.o power.o
+ firmware.o power.o dlpar.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_SCANLOG) += scanlog.o
Index: powerpc/arch/powerpc/include/asm/pSeries_reconfig.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/pSeries_reconfig.h 2009-10-28 15:20:38.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/pSeries_reconfig.h 2009-10-28 15:21:38.000000000 -0500
@@ -17,6 +17,7 @@
#ifdef CONFIG_PPC_PSERIES
extern int pSeries_reconfig_notifier_register(struct notifier_block *);
extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
+extern struct blocking_notifier_head pSeries_reconfig_chain;
#else /* !CONFIG_PPC_PSERIES */
static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
Index: powerpc/arch/powerpc/platforms/pseries/reconfig.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/reconfig.c 2009-10-28 15:20:38.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/reconfig.c 2009-10-28 15:21:38.000000000 -0500
@@ -96,7 +96,7 @@
return parent;
}

-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
+BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);

int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{

2009-10-28 20:54:54

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 2/6 v5] Move of_drconf_cell to prom.h

Move the definition of the of_drconf_cell struct from numa.c to prom.h. This
is needed so that we can parse the ibm,dynamic-memory device-tree property
when DLPAR adding and removing memory.

Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
---

Index: powerpc/arch/powerpc/include/asm/prom.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/prom.h 2009-10-28 15:20:37.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/prom.h 2009-10-28 15:21:44.000000000 -0500
@@ -349,6 +349,18 @@
*/
extern void __iomem *of_iomap(struct device_node *device, int index);

+struct of_drconf_cell {
+ u64 base_addr;
+ u32 drc_index;
+ u32 reserved;
+ u32 aa_index;
+ u32 flags;
+};
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+
/*
* NB: This is here while we transition from using asm/prom.h
* to linux/of.h
Index: powerpc/arch/powerpc/mm/numa.c
===================================================================
--- powerpc.orig/arch/powerpc/mm/numa.c 2009-10-28 15:20:37.000000000 -0500
+++ powerpc/arch/powerpc/mm/numa.c 2009-10-28 15:21:44.000000000 -0500
@@ -296,18 +296,6 @@
return result;
}

-struct of_drconf_cell {
- u64 base_addr;
- u32 drc_index;
- u32 reserved;
- u32 aa_index;
- u32 flags;
-};
-
-#define DRCONF_MEM_ASSIGNED 0x00000008
-#define DRCONF_MEM_AI_INVALID 0x00000040
-#define DRCONF_MEM_RESERVED 0x00000080
-
/*
* Read the next lmb list entry from the ibm,dynamic-memory property
* and return the information in the provided of_drconf_cell structure.

2009-10-28 20:55:59

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 3/6 v5] Memory probe/release files

This patch creates the release sysfs file for memory and updates the
exisiting probe file so both make arch-specific callouts to handle removing
and adding memory to the system. This also creates the powerpc specific stubs
for handling the arch callouts.

The creation and use of these files are governed by the exisitng
CONFIG_ARCH_MEMORY_PROBE and new CONFIG_ARCH_MEMORY_RELEASE config options.

Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
---

Index: powerpc/arch/powerpc/mm/mem.c
===================================================================
--- powerpc.orig/arch/powerpc/mm/mem.c 2009-10-28 15:20:36.000000000 -0500
+++ powerpc/arch/powerpc/mm/mem.c 2009-10-28 15:21:47.000000000 -0500
@@ -110,6 +110,26 @@

#ifdef CONFIG_MEMORY_HOTPLUG

+#ifdef CONFIG_ARCH_MEMORY_RELEASE
+int arch_memory_release(const char *buf, size_t count)
+{
+ if (ppc_md.memory_release)
+ return ppc_md.memory_release(buf, count);
+
+ return -EINVAL;
+}
+#endif
+
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+int arch_memory_probe(u64 phys_addr)
+{
+ if (ppc_md.memory_probe)
+ return ppc_md.memory_probe(phys_addr);
+
+ return -EINVAL;
+}
+#endif
+
#ifdef CONFIG_NUMA
int memory_add_physaddr_to_nid(u64 start)
{
Index: powerpc/drivers/base/memory.c
===================================================================
--- powerpc.orig/drivers/base/memory.c 2009-10-28 15:20:36.000000000 -0500
+++ powerpc/drivers/base/memory.c 2009-10-28 15:21:47.000000000 -0500
@@ -319,6 +319,10 @@

phys_addr = simple_strtoull(buf, NULL, 0);

+ ret = arch_memory_probe(phys_addr);
+ if (ret)
+ return ret;
+
nid = memory_add_physaddr_to_nid(phys_addr);
ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT);

@@ -328,19 +332,35 @@
return count;
}
static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
+#endif

-static int memory_probe_init(void)
+#ifdef CONFIG_ARCH_MEMORY_RELEASE
+static ssize_t
+memory_release_store(struct class *class, const char *buf, size_t count)
{
- return sysfs_create_file(&memory_sysdev_class.kset.kobj,
- &class_attr_probe.attr);
+ return arch_memory_release(buf, count);
}
-#else
-static inline int memory_probe_init(void)
+static CLASS_ATTR(release, S_IWUSR, NULL, memory_release_store);
+#endif
+
+static int memory_probe_release_init(void)
{
- return 0;
-}
+ int rc = 0;
+
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+ rc = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_probe.attr);
+#endif
+
+#ifdef CONFIG_ARCH_MEMORY_RELEASE
+ if (!rc)
+ rc = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_release.attr);
#endif

+ return rc;
+}
+
/*
* Note that phys_device is optional. It is here to allow for
* differentiation between which *physical* devices each
@@ -470,7 +490,7 @@
ret = err;
}

- err = memory_probe_init();
+ err = memory_probe_release_init();
if (!ret)
ret = err;
err = block_size_init();
Index: powerpc/arch/powerpc/include/asm/machdep.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/machdep.h 2009-10-28 15:20:36.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/machdep.h 2009-10-28 15:21:47.000000000 -0500
@@ -266,6 +266,14 @@
void (*suspend_disable_irqs)(void);
void (*suspend_enable_irqs)(void);
#endif
+
+#ifdef CONFIG_ARCH_MEMORY_RELEASE
+ int (*memory_release)(const char *, size_t);
+#endif
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+ int (*memory_probe)(u64);
+#endif
+
};

extern void e500_idle(void);
Index: powerpc/arch/powerpc/Kconfig
===================================================================
--- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:20:36.000000000 -0500
+++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
@@ -414,6 +414,10 @@
def_bool y
depends on MEMORY_HOTPLUG

+config ARCH_MEMORY_RELEASE
+ def_bool y
+ depends on MEMORY_HOTPLUG
+
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
# between a node's start and end pfns, it may not
Index: powerpc/include/linux/memory_hotplug.h
===================================================================
--- powerpc.orig/include/linux/memory_hotplug.h 2009-10-28 15:20:36.000000000 -0500
+++ powerpc/include/linux/memory_hotplug.h 2009-10-28 15:21:47.000000000 -0500
@@ -86,6 +86,14 @@
}
#endif

+#ifdef CONFIG_ARCH_MEMORY_RELEASE
+extern int arch_memory_release(const char *, size_t);
+#endif
+
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+extern int arch_memory_probe(u64);
+#endif
+
#ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION
/*
* For supporting node-hotadd, we have to allocate a new pgdat.

2009-10-28 20:57:20

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 4/6 v5] Memory DLPAR Handling

This adds the capability to DLPAR add and remove memory from the kernel. The
patch registers handlers for the arch-specific probe and release memory
callouts to handle addition/removal of memory to the system and the associated
device tree updates.

Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
---

Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/dlpar.c 2009-10-28 15:21:38.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-10-28 15:21:49.000000000 -0500
@@ -16,6 +16,10 @@
#include <linux/notifier.h>
#include <linux/proc_fs.h>
#include <linux/spinlock.h>
+#include <linux/memory_hotplug.h>
+#include <linux/sysdev.h>
+#include <linux/sysfs.h>
+

#include <asm/prom.h>
#include <asm/machdep.h>
@@ -404,11 +408,189 @@
return 0;
}

+#ifdef CONFIG_MEMORY_HOTPLUG
+
+static struct property *clone_property(struct property *old_prop)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc((sizeof *new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kstrdup(old_prop->name, GFP_KERNEL);
+ new_prop->value = kzalloc(old_prop->length + 1, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ free_property(new_prop);
+ return NULL;
+ }
+
+ memcpy(new_prop->value, old_prop->value, old_prop->length);
+ new_prop->length = old_prop->length;
+
+ return new_prop;
+}
+
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+
+int memory_probe(u64 phys_addr)
+{
+ struct device_node *dn = NULL;
+ struct property *new_prop;
+ struct property *old_prop;
+ struct of_drconf_cell *drmem;
+ const u64 *lmb_size;
+ int num_entries, i;
+ int rc = -EINVAL;
+
+ if (!phys_addr)
+ goto memory_probe_exit;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ goto memory_probe_exit;
+
+ lmb_size = of_get_property(dn, "ibm,lmb-size", NULL);
+ if (!lmb_size)
+ goto memory_probe_exit;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+ if (!old_prop)
+ goto memory_probe_exit;
+
+ num_entries = *(u32 *)old_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)old_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ u64 lmb_end_addr = drmem[i].base_addr + *lmb_size;
+ if (phys_addr >= drmem[i].base_addr
+ && phys_addr < lmb_end_addr)
+ break;
+ }
+
+ if (i >= num_entries)
+ goto memory_probe_exit;
+
+ if (drmem[i].flags & DRCONF_MEM_ASSIGNED) {
+ /* This lmb is already adssigned to the system, nothing to do */
+ rc = 0;
+ goto memory_probe_exit;
+ }
+
+ rc = acquire_drc(drmem[i].drc_index);
+ if (rc) {
+ rc = -EINVAL;
+ goto memory_probe_exit;
+ }
+
+ new_prop = clone_property(old_prop);
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ drmem[i].flags |= DRCONF_MEM_ASSIGNED;
+ rc = prom_update_property(dn, new_prop, old_prop);
+ if (rc) {
+ free_property(new_prop);
+ rc = -EINVAL;
+ goto memory_probe_exit;
+ }
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_ADD,
+ &drmem[i].base_addr);
+ if (rc == NOTIFY_BAD) {
+ prom_update_property(dn, old_prop, new_prop);
+ release_drc(drmem[i].drc_index);
+ rc = -EINVAL;
+ } else
+ rc = 0;
+
+memory_probe_exit:
+ of_node_put(dn);
+ return rc;
+}
+
+#endif /* CONFIG_ARCH_MEMORY_PROBE */
+
+#ifdef CONFIG_ARCH_MEMORY_RELEASE
+
+static int memory_release(const char *buf, size_t count)
+{
+ unsigned long drc_index;
+ struct device_node *dn;
+ struct property *new_prop, *old_prop;
+ struct of_drconf_cell *drmem;
+ int num_entries;
+ int i;
+ int rc = -EINVAL;
+
+ rc = strict_strtoul(buf, 0, &drc_index);
+ if (rc)
+ return rc;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return rc;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+ if (!old_prop)
+ goto memory_release_exit;
+
+ num_entries = *(u32 *)old_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)old_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ if (drmem[i].drc_index == drc_index)
+ break;
+ }
+
+ if (i >= num_entries)
+ goto memory_release_exit;
+
+ new_prop = clone_property(old_prop);
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ drmem[i].flags &= ~DRCONF_MEM_ASSIGNED;
+ rc = prom_update_property(dn, new_prop, old_prop);
+ if (rc) {
+ free_property(new_prop);
+ rc = -EINVAL;
+ goto memory_release_exit;
+ }
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_REMOVE,
+ &drmem[i].base_addr);
+ if (rc != NOTIFY_BAD)
+ rc = release_drc(drc_index);
+
+ if (rc) {
+ prom_update_property(dn, old_prop, new_prop);
+ rc = -EINVAL;
+ }
+
+memory_release_exit:
+ of_node_put(dn);
+ return rc ? rc : count;
+}
+#endif /* CONFIG_ARCH_MEMORY_RELEASE */
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
static int pseries_dlpar_init(void)
{
if (!machine_is(pseries))
return 0;

+#ifdef CONFIG_ARCH_MEMORY_RELEASE
+ ppc_md.memory_release = memory_release;
+#endif
+#ifdef CONFIG_ARCH_MEMORY_PROBE
+ ppc_md.memory_probe = memory_probe;
+#endif
+
return 0;
}
device_initcall(pseries_dlpar_init);

2009-10-28 20:58:38

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 5/6 v5] CPU probe/release files

Create new probe and release sysfs files to facilitate adding and removing
cpus from the system. This also creates the powerpc specific stubs to handle
the arch callouts from writes to the sysfs files.

The creation and use of these files is regulated by the
CONFIG_ARCH_CPU_PROBE_RELEASE option so that only architectures that need the
capability will have the files created.

Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
---

Index: powerpc/drivers/base/cpu.c
===================================================================
--- powerpc.orig/drivers/base/cpu.c 2009-10-28 15:20:34.000000000 -0500
+++ powerpc/drivers/base/cpu.c 2009-10-28 15:21:53.000000000 -0500
@@ -54,6 +54,7 @@
ret = count;
return ret;
}
+
static SYSDEV_ATTR(online, 0644, show_online, store_online);

static void __cpuinit register_cpu_control(struct cpu *cpu)
@@ -72,6 +73,38 @@
per_cpu(cpu_sys_devices, logical_cpu) = NULL;
return;
}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static ssize_t cpu_probe_store(struct class *class, const char *buf,
+ size_t count)
+{
+ return arch_cpu_probe(buf, count);
+}
+
+static ssize_t cpu_release_store(struct class *class, const char *buf,
+ size_t count)
+{
+ return arch_cpu_release(buf, count);
+}
+
+static CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
+
+int __init cpu_probe_release_init(void)
+{
+ int rc;
+
+ rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_probe.attr);
+ if (!rc)
+ rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_release.attr);
+
+ return rc;
+}
+device_initcall(cpu_probe_release_init);
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
#else /* ... !CONFIG_HOTPLUG_CPU */
static inline void register_cpu_control(struct cpu *cpu)
{
Index: powerpc/arch/powerpc/include/asm/machdep.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/machdep.h 2009-10-28 15:21:47.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/machdep.h 2009-10-28 15:21:53.000000000 -0500
@@ -274,6 +274,11 @@
int (*memory_probe)(u64);
#endif

+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ ssize_t (*cpu_probe)(const char *, size_t);
+ ssize_t (*cpu_release)(const char *, size_t);
+#endif
+
};

extern void e500_idle(void);
Index: powerpc/arch/powerpc/kernel/sysfs.c
===================================================================
--- powerpc.orig/arch/powerpc/kernel/sysfs.c 2009-10-28 15:20:34.000000000 -0500
+++ powerpc/arch/powerpc/kernel/sysfs.c 2009-10-28 15:21:53.000000000 -0500
@@ -461,6 +461,25 @@

cacheinfo_cpu_offline(cpu);
}
+
+#ifdef CONFIG_ARCH_PROBE_RELEASE
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+ if (ppc_md.cpu_probe)
+ return ppc_md.cpu_probe(buf, count);
+
+ return -EINVAL;
+}
+
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+ if (ppc_md.cpu_release)
+ return ppc_md.cpu_release(buf, count);
+
+ return -EINVAL;
+}
+#endif /* CONFIG_ARCH_PROBE_RELEASE */
+
#endif /* CONFIG_HOTPLUG_CPU */

static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
Index: powerpc/arch/powerpc/Kconfig
===================================================================
--- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
+++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:53.000000000 -0500
@@ -320,6 +320,10 @@

Say N if you are unsure.

+config ARCH_CPU_PROBE_RELEASE
+ def_bool y
+ depends on HOTPLUG_CPU
+
config ARCH_ENABLE_MEMORY_HOTPLUG
def_bool y

Index: powerpc/include/linux/cpu.h
===================================================================
--- powerpc.orig/include/linux/cpu.h 2009-10-28 15:20:34.000000000 -0500
+++ powerpc/include/linux/cpu.h 2009-10-28 15:21:53.000000000 -0500
@@ -43,6 +43,10 @@

#ifdef CONFIG_HOTPLUG_CPU
extern void unregister_cpu(struct cpu *cpu);
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+extern ssize_t arch_cpu_probe(const char *, size_t);
+extern ssize_t arch_cpu_release(const char *, size_t);
+#endif
#endif

struct notifier_block;
Index: powerpc/arch/powerpc/kernel/smp.c
===================================================================
--- powerpc.orig/arch/powerpc/kernel/smp.c 2009-10-28 15:20:34.000000000 -0500
+++ powerpc/arch/powerpc/kernel/smp.c 2009-10-28 15:21:53.000000000 -0500
@@ -364,7 +364,24 @@
set_cpu_online(cpu, true);
local_irq_enable();
}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ssize_t arch_cpu_probe(const char *buf, size_t count)
+{
+ if (ppc_md.cpu_probe)
+ return ppc_md.cpu_probe(buf, count);
+
+ return -EINVAL;
+}
+ssize_t arch_cpu_release(const char *buf, size_t count)
+{
+ if (ppc_md.cpu_release)
+ return ppc_md.cpu_release(buf, count);
+
+ return -EINVAL;
+}
#endif
+#endif /* CONFIG_HOTPLUG_CPU */

static int __devinit cpu_enable(unsigned int cpu)
{

2009-10-28 20:59:45

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 6/6 v5] CPU DLPAR Handling

Register the pseries specific handler for the powerpc architecture handlers
for the cpu probe and release files. This also implements the cpu DLPAR
addition and removal of CPUS from the system.

Signed-off-by: Nathan Fontenot <nfont at asutin.ibm.com>
---

Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/dlpar.c 2009-10-28 15:21:49.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-10-28 15:21:56.000000000 -0500
@@ -1,11 +1,10 @@
/*
- * dlpar.c - support for dynamic reconfiguration (including PCI
- * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ * Support for dynamic reconfiguration (including PCI, Memory, and CPU
+ * Hotplug and Dynamic Logical Partitioning on PAPR platforms).
*
* Copyright (C) 2009 Nathan Fontenot
* Copyright (C) 2009 IBM Corporation
*
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
@@ -19,7 +18,7 @@
#include <linux/memory_hotplug.h>
#include <linux/sysdev.h>
#include <linux/sysfs.h>
-
+#include <linux/cpu.h>

#include <asm/prom.h>
#include <asm/machdep.h>
@@ -408,6 +407,80 @@
return 0;
}

+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+static ssize_t cpu_probe(const char *buf, size_t count)
+{
+ struct device_node *dn;
+ unsigned long drc_index;
+ char *cpu_name;
+ int rc;
+
+ rc = strict_strtoul(buf, 0, &drc_index);
+ if (rc)
+ return -EINVAL;
+
+ rc = acquire_drc(drc_index);
+ if (rc)
+ return -EINVAL;
+
+ dn = configure_connector(drc_index);
+ if (!dn) {
+ release_drc(drc_index);
+ return -EINVAL;
+ }
+
+ /* fixup dn name */
+ cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus/") + 1,
+ GFP_KERNEL);
+ if (!cpu_name) {
+ free_cc_nodes(dn);
+ release_drc(drc_index);
+ return -ENOMEM;
+ }
+
+ sprintf(cpu_name, "/cpus/%s", dn->full_name);
+ kfree(dn->full_name);
+ dn->full_name = cpu_name;
+
+ rc = add_device_tree_nodes(dn);
+ if (rc)
+ release_drc(drc_index);
+
+ return rc ? -EINVAL : count;
+}
+
+static ssize_t cpu_release(const char *buf, size_t count)
+{
+ struct device_node *dn;
+ const u32 *drc_index;
+ int rc;
+
+ dn = of_find_node_by_path(buf);
+ if (!dn)
+ return -EINVAL;
+
+ drc_index = of_get_property(dn, "ibm,my-drc-index", NULL);
+ if (!drc_index) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ rc = release_drc(*drc_index);
+ if (rc) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ rc = remove_device_tree_nodes(dn);
+ if (rc)
+ acquire_drc(*drc_index);
+
+ of_node_put(dn);
+ return rc ? -EINVAL : count;
+}
+
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
#ifdef CONFIG_MEMORY_HOTPLUG

static struct property *clone_property(struct property *old_prop)
@@ -591,6 +664,11 @@
ppc_md.memory_probe = memory_probe;
#endif

+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+ ppc_md.cpu_release = cpu_release;
+ ppc_md.cpu_probe = cpu_probe;
+#endif
+
return 0;
}
device_initcall(pseries_dlpar_init);

2009-10-29 03:10:59

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 1/6 v5] Kernel DLPAR Infrastructure

On Wed, 2009-10-28 at 15:53 -0500, Nathan Fontenot wrote:
> This patch provides the kernel DLPAR infrastructure in a new filed named
> dlpar.c. The functionality provided is for acquiring and releasing a resource
> from firmware and the parsing of information returned from the
> ibm,configure-connector rtas call. Additionally this exports the pSeries
> reconfiguration notifier chain so that it can be invoked when device tree
> updates are made.
>
> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
> ---

Hi Nathan !

Finally I get to review this stuff :-)

> +#define CFG_CONN_WORK_SIZE 4096
> +static char workarea[CFG_CONN_WORK_SIZE];
> +static DEFINE_SPINLOCK(workarea_lock);

So I'm not a huge fan of this workarea static. First a static is in
effect a global name (as far as System.map etc... are concerned) so it
would warrant a better name. Then, do we really want that 4K of BSS
taken even on platforms that don't do dlpar ? Any reason why you don't
just pop a free page with __get_free_page() inside of
configure_connector() ?

> +struct cc_workarea {
> + u32 drc_index;
> + u32 zero;
> + u32 name_offset;
> + u32 prop_length;
> + u32 prop_offset;
> +};
> +
> +static struct property *parse_cc_property(char *workarea)
> +{
> + struct property *prop;
> + struct cc_workarea *ccwa;
> + char *name;
> + char *value;
> +
> + prop = kzalloc(sizeof(*prop), GFP_KERNEL);
> + if (!prop)
> + return NULL;
> +
> + ccwa = (struct cc_workarea *)workarea;
> + name = workarea + ccwa->name_offset;
> + prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
> + if (!prop->name) {
> + kfree(prop);
> + return NULL;
> + }
> +
> + strcpy(prop->name, name);
> +
> + prop->length = ccwa->prop_length;
> + value = workarea + ccwa->prop_offset;
> + prop->value = kzalloc(prop->length, GFP_KERNEL);
> + if (!prop->value) {
> + kfree(prop->name);
> + kfree(prop);
> + return NULL;
> + }
> +
> + memcpy(prop->value, value, prop->length);
> + return prop;
> +}
> +
> +static void free_property(struct property *prop)
> +{
> + kfree(prop->name);
> + kfree(prop->value);
> + kfree(prop);
> +}
> +
> +static struct device_node *parse_cc_node(char *work_area)
> +{

const char* maybe ?

> + struct device_node *dn;
> + struct cc_workarea *ccwa;
> + char *name;
> +
> + dn = kzalloc(sizeof(*dn), GFP_KERNEL);
> + if (!dn)
> + return NULL;
> +
> + ccwa = (struct cc_workarea *)work_area;
> + name = work_area + ccwa->name_offset;

I'm wondering whether work_area should be a struct cc_workarea * in the
first place with a char data[] at the end, but that would mean probably
tweaking the offsets... no big deal, up to you.

> + dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
> + if (!dn->full_name) {
> + kfree(dn);
> + return NULL;
> + }
> +
> + strcpy(dn->full_name, name);

kstrdup ?

.../...

> +#define NEXT_SIBLING 1
> +#define NEXT_CHILD 2
> +#define NEXT_PROPERTY 3
> +#define PREV_PARENT 4
> +#define MORE_MEMORY 5
> +#define CALL_AGAIN -2
> +#define ERR_CFG_USE -9003
> +
> +struct device_node *configure_connector(u32 drc_index)
> +{

It's a global exported function, I'd rather you call it
dlpar_configure_connector()

> + struct device_node *dn;
> + struct device_node *first_dn = NULL;
> + struct device_node *last_dn = NULL;
> + struct property *property;
> + struct property *last_property = NULL;
> + struct cc_workarea *ccwa;
> + int cc_token;
> + int rc;
> +
> + cc_token = rtas_token("ibm,configure-connector");
> + if (cc_token == RTAS_UNKNOWN_SERVICE)
> + return NULL;
> +
> + spin_lock(&workarea_lock);
> +
> + ccwa = (struct cc_workarea *)&workarea[0];
> + ccwa->drc_index = drc_index;
> + ccwa->zero = 0;

Popping a free page with gfp (or just kmalloc'ing 4K) would avoid the
need for the lock too.

> + rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
> + while (rc) {
> + switch (rc) {
> + case NEXT_SIBLING:
> + dn = parse_cc_node(workarea);
> + if (!dn)
> + goto cc_error;
> +
> + dn->parent = last_dn->parent;
> + last_dn->sibling = dn;
> + last_dn = dn;
> + break;
> +
> + case NEXT_CHILD:
> + dn = parse_cc_node(workarea);
> + if (!dn)
> + goto cc_error;
> +
> + if (!first_dn)
> + first_dn = dn;
> + else {
> + dn->parent = last_dn;
> + if (last_dn)
> + last_dn->child = dn;
> + }
> +
> + last_dn = dn;
> + break;
> +
> + case NEXT_PROPERTY:
> + property = parse_cc_property(workarea);
> + if (!property)
> + goto cc_error;
> +
> + if (!last_dn->properties)
> + last_dn->properties = property;
> + else
> + last_property->next = property;
> +
> + last_property = property;
> + break;
> +
> + case PREV_PARENT:
> + last_dn = last_dn->parent;
> + break;
> +
> + case CALL_AGAIN:
> + break;
> +
> + case MORE_MEMORY:
> + case ERR_CFG_USE:
> + default:
> + printk(KERN_ERR "Unexpected Error (%d) "
> + "returned from configure-connector\n", rc);
> + goto cc_error;
> + }
> +
> + rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
> + }
> +
> + spin_unlock(&workarea_lock);
> + return first_dn;
> +
> +cc_error:
> + spin_unlock(&workarea_lock);
> +
> + if (first_dn)
> + free_cc_nodes(first_dn);
> +
> + return NULL;
> +}
> +
> +static struct device_node *derive_parent(const char *path)
> +{
> + struct device_node *parent;
> + char parent_path[128];
> + int parent_path_len;
> +
> + parent_path_len = strrchr(path, '/') - path + 1;
> + strlcpy(parent_path, path, parent_path_len);
> +
> + parent = of_find_node_by_path(parent_path);
> +
> + return parent;
> +}

This ...

> +static int add_one_node(struct device_node *dn)
> +{
> + struct proc_dir_entry *ent;
> + int rc;
> +
> + of_node_set_flag(dn, OF_DYNAMIC);
> + kref_init(&dn->kref);
> + dn->parent = derive_parent(dn->full_name);
> +
> + rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
> + PSERIES_RECONFIG_ADD, dn);
> + if (rc == NOTIFY_BAD) {
> + printk(KERN_ERR "Failed to add device node %s\n",
> + dn->full_name);
> + return -ENOMEM; /* For now, safe to assume kmalloc failure */
> + }
> +
> + of_attach_node(dn);
> +
> +#ifdef CONFIG_PROC_DEVICETREE
> + ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
> + if (ent)
> + proc_device_tree_add_node(dn, ent);
> +#endif
> +
> + of_node_put(dn->parent);
> + return 0;
> +}

... and this ...

> +int add_device_tree_nodes(struct device_node *dn)
> +{
> + struct device_node *child = dn->child;
> + struct device_node *sibling = dn->sibling;
> + int rc;
> +
> + dn->child = NULL;
> + dn->sibling = NULL;
> + dn->parent = NULL;
> +
> + rc = add_one_node(dn);
> + if (rc)
> + return rc;
> +
> + if (child) {
> + rc = add_device_tree_nodes(child);
> + if (rc)
> + return rc;
> + }
> +
> + if (sibling)
> + rc = add_device_tree_nodes(sibling);
> +
> + return rc;
> +}

... and this ...

> +static int remove_one_node(struct device_node *dn)
> +{
> + struct device_node *parent = dn->parent;
> + struct property *prop = dn->properties;
> +
> +#ifdef CONFIG_PROC_DEVICETREE
> + while (prop) {
> + remove_proc_entry(prop->name, dn->pde);
> + prop = prop->next;
> + }
> +
> + if (dn->pde)
> + remove_proc_entry(dn->pde->name, parent->pde);
> +#endif
> +
> + blocking_notifier_call_chain(&pSeries_reconfig_chain,
> + PSERIES_RECONFIG_REMOVE, dn);
> + of_detach_node(dn);
> + of_node_put(dn); /* Must decrement the refcount */
> +
> + return 0;
> +}

... and this ...

> +static int _remove_device_tree_nodes(struct device_node *dn)
> +{
> + int rc;
> +
> + if (dn->child) {
> + rc = _remove_device_tree_nodes(dn->child);
> + if (rc)
> + return rc;
> + }
> +
> + if (dn->sibling) {
> + rc = _remove_device_tree_nodes(dn->sibling);
> + if (rc)
> + return rc;
> + }
> +
> + rc = remove_one_node(dn);
> + return rc;
> +}

... repeat myself ...

> +int remove_device_tree_nodes(struct device_node *dn)
> +{
> + int rc;
> +
> + if (dn->child) {
> + rc = _remove_device_tree_nodes(dn->child);
> + if (rc)
> + return rc;
> + }
> +
> + rc = remove_one_node(dn);
> + return rc;
> +}

... should probably all go to something like drivers/of/dynamic.c or at
least for now arch/powerpc/kernel/of_dynamic.c along with everything
related to dynamically adding and removing nodes. I see that potentially
useful for more than just DLPAR (though DLPAR is the only user right
now) and should also all be prefixed with of_*

> +#define DR_ENTITY_SENSE 9003
> +#define DR_ENTITY_PRESENT 1
> +#define DR_ENTITY_UNUSABLE 2
> +#define ALLOCATION_STATE 9003
> +#define ALLOC_UNUSABLE 0
> +#define ALLOC_USABLE 1
> +#define ISOLATION_STATE 9001
> +#define ISOLATE 0
> +#define UNISOLATE 1
> +
> +int acquire_drc(u32 drc_index)
> +{
> + int dr_status, rc;
> +
> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
> + DR_ENTITY_SENSE, drc_index);
> + if (rc || dr_status != DR_ENTITY_UNUSABLE)
> + return -1;
> +
> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
> + if (rc)
> + return rc;
> +
> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
> + if (rc) {
> + rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
> + return rc;
> + }
> +
> + return 0;
> +}
> +
> +int release_drc(u32 drc_index)
> +{
> + int dr_status, rc;
> +
> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
> + DR_ENTITY_SENSE, drc_index);
> + if (rc || dr_status != DR_ENTITY_PRESENT)
> + return -1;
> +
> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
> + if (rc)
> + return rc;
> +
> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
> + if (rc) {
> + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
> + return rc;
> + }
> +
> + return 0;
> +}

Both above should have a dlpar_* prefix

> +static int pseries_dlpar_init(void)
> +{
> + if (!machine_is(pseries))
> + return 0;
> +
> + return 0;
> +}
> +device_initcall(pseries_dlpar_init);

What the point ? :-)

Cheers
Ben.

2009-10-29 03:15:37

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 3/6 v5] Memory probe/release files

On Wed, 2009-10-28 at 15:55 -0500, Nathan Fontenot wrote:
> This patch creates the release sysfs file for memory and updates the
> exisiting probe file so both make arch-specific callouts to handle removing
> and adding memory to the system. This also creates the powerpc specific stubs
> for handling the arch callouts.
>
> The creation and use of these files are governed by the exisitng
> CONFIG_ARCH_MEMORY_PROBE and new CONFIG_ARCH_MEMORY_RELEASE config options.
>
> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
> ---

Is there anybody on linux-mm who needs to Ack this patche ?

Cheers,
Ben.


2009-10-29 03:27:14

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 5/6 v5] CPU probe/release files

On Wed, 2009-10-28 at 15:58 -0500, Nathan Fontenot wrote:
> Create new probe and release sysfs files to facilitate adding and removing
> cpus from the system. This also creates the powerpc specific stubs to handle
> the arch callouts from writes to the sysfs files.
>
> The creation and use of these files is regulated by the
> CONFIG_ARCH_CPU_PROBE_RELEASE option so that only architectures that need the
> capability will have the files created.
>
> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
> ---

Same question as before here... need some external acks from others
doing cpu hotplug.

Cheers,
Ben.

> Index: powerpc/drivers/base/cpu.c
> ===================================================================
> --- powerpc.orig/drivers/base/cpu.c 2009-10-28 15:20:34.000000000 -0500
> +++ powerpc/drivers/base/cpu.c 2009-10-28 15:21:53.000000000 -0500
> @@ -54,6 +54,7 @@
> ret = count;
> return ret;
> }
> +
> static SYSDEV_ATTR(online, 0644, show_online, store_online);
>
> static void __cpuinit register_cpu_control(struct cpu *cpu)
> @@ -72,6 +73,38 @@
> per_cpu(cpu_sys_devices, logical_cpu) = NULL;
> return;
> }
> +
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +static ssize_t cpu_probe_store(struct class *class, const char *buf,
> + size_t count)
> +{
> + return arch_cpu_probe(buf, count);
> +}
> +
> +static ssize_t cpu_release_store(struct class *class, const char *buf,
> + size_t count)
> +{
> + return arch_cpu_release(buf, count);
> +}
> +
> +static CLASS_ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
> +static CLASS_ATTR(release, S_IWUSR, NULL, cpu_release_store);
> +
> +int __init cpu_probe_release_init(void)
> +{
> + int rc;
> +
> + rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> + &class_attr_probe.attr);
> + if (!rc)
> + rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> + &class_attr_release.attr);
> +
> + return rc;
> +}
> +device_initcall(cpu_probe_release_init);
> +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
> +
> #else /* ... !CONFIG_HOTPLUG_CPU */
> static inline void register_cpu_control(struct cpu *cpu)
> {
> Index: powerpc/arch/powerpc/include/asm/machdep.h
> ===================================================================
> --- powerpc.orig/arch/powerpc/include/asm/machdep.h 2009-10-28 15:21:47.000000000 -0500
> +++ powerpc/arch/powerpc/include/asm/machdep.h 2009-10-28 15:21:53.000000000 -0500
> @@ -274,6 +274,11 @@
> int (*memory_probe)(u64);
> #endif
>
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> + ssize_t (*cpu_probe)(const char *, size_t);
> + ssize_t (*cpu_release)(const char *, size_t);
> +#endif
> +
> };
>
> extern void e500_idle(void);
> Index: powerpc/arch/powerpc/kernel/sysfs.c
> ===================================================================
> --- powerpc.orig/arch/powerpc/kernel/sysfs.c 2009-10-28 15:20:34.000000000 -0500
> +++ powerpc/arch/powerpc/kernel/sysfs.c 2009-10-28 15:21:53.000000000 -0500
> @@ -461,6 +461,25 @@
>
> cacheinfo_cpu_offline(cpu);
> }
> +
> +#ifdef CONFIG_ARCH_PROBE_RELEASE
> +ssize_t arch_cpu_probe(const char *buf, size_t count)
> +{
> + if (ppc_md.cpu_probe)
> + return ppc_md.cpu_probe(buf, count);
> +
> + return -EINVAL;
> +}
> +
> +ssize_t arch_cpu_release(const char *buf, size_t count)
> +{
> + if (ppc_md.cpu_release)
> + return ppc_md.cpu_release(buf, count);
> +
> + return -EINVAL;
> +}
> +#endif /* CONFIG_ARCH_PROBE_RELEASE */
> +
> #endif /* CONFIG_HOTPLUG_CPU */
>
> static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
> Index: powerpc/arch/powerpc/Kconfig
> ===================================================================
> --- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
> +++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:53.000000000 -0500
> @@ -320,6 +320,10 @@
>
> Say N if you are unsure.
>
> +config ARCH_CPU_PROBE_RELEASE
> + def_bool y
> + depends on HOTPLUG_CPU
> +
> config ARCH_ENABLE_MEMORY_HOTPLUG
> def_bool y
>
> Index: powerpc/include/linux/cpu.h
> ===================================================================
> --- powerpc.orig/include/linux/cpu.h 2009-10-28 15:20:34.000000000 -0500
> +++ powerpc/include/linux/cpu.h 2009-10-28 15:21:53.000000000 -0500
> @@ -43,6 +43,10 @@
>
> #ifdef CONFIG_HOTPLUG_CPU
> extern void unregister_cpu(struct cpu *cpu);
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +extern ssize_t arch_cpu_probe(const char *, size_t);
> +extern ssize_t arch_cpu_release(const char *, size_t);
> +#endif
> #endif
>
> struct notifier_block;
> Index: powerpc/arch/powerpc/kernel/smp.c
> ===================================================================
> --- powerpc.orig/arch/powerpc/kernel/smp.c 2009-10-28 15:20:34.000000000 -0500
> +++ powerpc/arch/powerpc/kernel/smp.c 2009-10-28 15:21:53.000000000 -0500
> @@ -364,7 +364,24 @@
> set_cpu_online(cpu, true);
> local_irq_enable();
> }
> +
> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +ssize_t arch_cpu_probe(const char *buf, size_t count)
> +{
> + if (ppc_md.cpu_probe)
> + return ppc_md.cpu_probe(buf, count);
> +
> + return -EINVAL;
> +}
> +ssize_t arch_cpu_release(const char *buf, size_t count)
> +{
> + if (ppc_md.cpu_release)
> + return ppc_md.cpu_release(buf, count);
> +
> + return -EINVAL;
> +}
> #endif
> +#endif /* CONFIG_HOTPLUG_CPU */
>
> static int __devinit cpu_enable(unsigned int cpu)
> {
> _______________________________________________
> Linuxppc-dev mailing list
> [email protected]
> https://lists.ozlabs.org/listinfo/linuxppc-dev

2009-10-29 03:28:09

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 6/6 v5] CPU DLPAR Handling

On Wed, 2009-10-28 at 15:59 -0500, Nathan Fontenot wrote:

> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
> +static ssize_t cpu_probe(const char *buf, size_t count)

dlpar_cpu_probe() pls

> +static ssize_t cpu_release(const char *buf, size_t count)
> +{

Ditto.

Or else in system.map, backtraces, etc... it's hard to figure out off
hand where it's dying :-)

Cheers,
Ben.

2009-10-29 03:59:21

by Nathan Lynch

[permalink] [raw]
Subject: Re: [PATCH 1/6 v5] Kernel DLPAR Infrastructure

On Thu, 2009-10-29 at 14:08 +1100, Benjamin Herrenschmidt wrote:
> On Wed, 2009-10-28 at 15:53 -0500, Nathan Fontenot wrote:
> > + struct device_node *dn;
> > + struct device_node *first_dn = NULL;
> > + struct device_node *last_dn = NULL;
> > + struct property *property;
> > + struct property *last_property = NULL;
> > + struct cc_workarea *ccwa;
> > + int cc_token;
> > + int rc;
> > +
> > + cc_token = rtas_token("ibm,configure-connector");
> > + if (cc_token == RTAS_UNKNOWN_SERVICE)
> > + return NULL;
> > +
> > + spin_lock(&workarea_lock);
> > +
> > + ccwa = (struct cc_workarea *)&workarea[0];
> > + ccwa->drc_index = drc_index;
> > + ccwa->zero = 0;
>
> Popping a free page with gfp (or just kmalloc'ing 4K) would avoid the
> need for the lock too.

Not kmalloc -- the alignment of the buffer isn't guaranteed when
slub/slab debug is on, and iirc the work area needs to be 4K-aligned.
__get_free_page should be fine, I think.

2009-11-02 16:14:28

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 3/6 v5] Memory probe/release files

Benjamin Herrenschmidt wrote:
> On Wed, 2009-10-28 at 15:55 -0500, Nathan Fontenot wrote:
>> This patch creates the release sysfs file for memory and updates the
>> exisiting probe file so both make arch-specific callouts to handle removing
>> and adding memory to the system. This also creates the powerpc specific stubs
>> for handling the arch callouts.
>>
>> The creation and use of these files are governed by the exisitng
>> CONFIG_ARCH_MEMORY_PROBE and new CONFIG_ARCH_MEMORY_RELEASE config options.
>>
>> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
>> ---
>
> Is there anybody on linux-mm who needs to Ack this patche ?

Not sure. I will cc linux-mm on the next set of updated patches I send out.

-Nathan

>
> Cheers,
> Ben.
>
>
>

2009-11-02 16:15:17

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 6/6 v5] CPU DLPAR Handling

Benjamin Herrenschmidt wrote:
> On Wed, 2009-10-28 at 15:59 -0500, Nathan Fontenot wrote:
>
>> +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
>> +static ssize_t cpu_probe(const char *buf, size_t count)
>
> dlpar_cpu_probe() pls
>
>> +static ssize_t cpu_release(const char *buf, size_t count)
>> +{
>
> Ditto.
>
> Or else in system.map, backtraces, etc... it's hard to figure out off
> hand where it's dying :-)

Good point, I'll fix this.

-Nathan

>
> Cheers,
> Ben.
>
>

2009-11-02 16:27:29

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 1/6 v5] Kernel DLPAR Infrastructure


Benjamin Herrenschmidt wrote:
> On Wed, 2009-10-28 at 15:53 -0500, Nathan Fontenot wrote:
>> This patch provides the kernel DLPAR infrastructure in a new filed named
>> dlpar.c. The functionality provided is for acquiring and releasing a resource
>> from firmware and the parsing of information returned from the
>> ibm,configure-connector rtas call. Additionally this exports the pSeries
>> reconfiguration notifier chain so that it can be invoked when device tree
>> updates are made.
>>
>> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
>> ---
>
> Hi Nathan !
>
> Finally I get to review this stuff :-)
>

Thanks!

>> +#define CFG_CONN_WORK_SIZE 4096
>> +static char workarea[CFG_CONN_WORK_SIZE];
>> +static DEFINE_SPINLOCK(workarea_lock);
>
> So I'm not a huge fan of this workarea static. First a static is in
> effect a global name (as far as System.map etc... are concerned) so it
> would warrant a better name. Then, do we really want that 4K of BSS
> taken even on platforms that don't do dlpar ? Any reason why you don't
> just pop a free page with __get_free_page() inside of
> configure_connector() ?
>

I'm not either, having a static buffer and a lock feels like overkill
for this. I tried kmalloc, but that didn't work. I'll try using
__get_free_page.

>> +struct cc_workarea {
>> + u32 drc_index;
>> + u32 zero;
>> + u32 name_offset;
>> + u32 prop_length;
>> + u32 prop_offset;
>> +};
>> +
>> +static struct property *parse_cc_property(char *workarea)
>> +{
>> + struct property *prop;
>> + struct cc_workarea *ccwa;
>> + char *name;
>> + char *value;
>> +
>> + prop = kzalloc(sizeof(*prop), GFP_KERNEL);
>> + if (!prop)
>> + return NULL;
>> +
>> + ccwa = (struct cc_workarea *)workarea;
>> + name = workarea + ccwa->name_offset;
>> + prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
>> + if (!prop->name) {
>> + kfree(prop);
>> + return NULL;
>> + }
>> +
>> + strcpy(prop->name, name);
>> +
>> + prop->length = ccwa->prop_length;
>> + value = workarea + ccwa->prop_offset;
>> + prop->value = kzalloc(prop->length, GFP_KERNEL);
>> + if (!prop->value) {
>> + kfree(prop->name);
>> + kfree(prop);
>> + return NULL;
>> + }
>> +
>> + memcpy(prop->value, value, prop->length);
>> + return prop;
>> +}
>> +
>> +static void free_property(struct property *prop)
>> +{
>> + kfree(prop->name);
>> + kfree(prop->value);
>> + kfree(prop);
>> +}
>> +
>> +static struct device_node *parse_cc_node(char *work_area)
>> +{
>
> const char* maybe ?

sure.

>
>> + struct device_node *dn;
>> + struct cc_workarea *ccwa;
>> + char *name;
>> +
>> + dn = kzalloc(sizeof(*dn), GFP_KERNEL);
>> + if (!dn)
>> + return NULL;
>> +
>> + ccwa = (struct cc_workarea *)work_area;
>> + name = work_area + ccwa->name_offset;
>
> I'm wondering whether work_area should be a struct cc_workarea * in the
> first place with a char data[] at the end, but that would mean probably
> tweaking the offsets... no big deal, up to you.
>

I'll look onto that. Anything that makes this easier to understand is good.


>> + dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
>> + if (!dn->full_name) {
>> + kfree(dn);
>> + return NULL;
>> + }
>> +
>> + strcpy(dn->full_name, name);
>
> kstrdup ?

yep, should have used kstrdup.

>
> .../...
>
>> +#define NEXT_SIBLING 1
>> +#define NEXT_CHILD 2
>> +#define NEXT_PROPERTY 3
>> +#define PREV_PARENT 4
>> +#define MORE_MEMORY 5
>> +#define CALL_AGAIN -2
>> +#define ERR_CFG_USE -9003
>> +
>> +struct device_node *configure_connector(u32 drc_index)
>> +{
>
> It's a global exported function, I'd rather you call it
> dlpar_configure_connector()
>

ok.

>> + struct device_node *dn;
>> + struct device_node *first_dn = NULL;
>> + struct device_node *last_dn = NULL;
>> + struct property *property;
>> + struct property *last_property = NULL;
>> + struct cc_workarea *ccwa;
>> + int cc_token;
>> + int rc;
>> +
>> + cc_token = rtas_token("ibm,configure-connector");
>> + if (cc_token == RTAS_UNKNOWN_SERVICE)
>> + return NULL;
>> +
>> + spin_lock(&workarea_lock);
>> +
>> + ccwa = (struct cc_workarea *)&workarea[0];
>> + ccwa->drc_index = drc_index;
>> + ccwa->zero = 0;
>
> Popping a free page with gfp (or just kmalloc'ing 4K) would avoid the
> need for the lock too.

yes, see comment at beginning.

>
>> + rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
>> + while (rc) {
>> + switch (rc) {
>> + case NEXT_SIBLING:
>> + dn = parse_cc_node(workarea);
>> + if (!dn)
>> + goto cc_error;
>> +
>> + dn->parent = last_dn->parent;
>> + last_dn->sibling = dn;
>> + last_dn = dn;
>> + break;
>> +
>> + case NEXT_CHILD:
>> + dn = parse_cc_node(workarea);
>> + if (!dn)
>> + goto cc_error;
>> +
>> + if (!first_dn)
>> + first_dn = dn;
>> + else {
>> + dn->parent = last_dn;
>> + if (last_dn)
>> + last_dn->child = dn;
>> + }
>> +
>> + last_dn = dn;
>> + break;
>> +
>> + case NEXT_PROPERTY:
>> + property = parse_cc_property(workarea);
>> + if (!property)
>> + goto cc_error;
>> +
>> + if (!last_dn->properties)
>> + last_dn->properties = property;
>> + else
>> + last_property->next = property;
>> +
>> + last_property = property;
>> + break;
>> +
>> + case PREV_PARENT:
>> + last_dn = last_dn->parent;
>> + break;
>> +
>> + case CALL_AGAIN:
>> + break;
>> +
>> + case MORE_MEMORY:
>> + case ERR_CFG_USE:
>> + default:
>> + printk(KERN_ERR "Unexpected Error (%d) "
>> + "returned from configure-connector\n", rc);
>> + goto cc_error;
>> + }
>> +
>> + rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
>> + }
>> +
>> + spin_unlock(&workarea_lock);
>> + return first_dn;
>> +
>> +cc_error:
>> + spin_unlock(&workarea_lock);
>> +
>> + if (first_dn)
>> + free_cc_nodes(first_dn);
>> +
>> + return NULL;
>> +}
>> +
>> +static struct device_node *derive_parent(const char *path)
>> +{
>> + struct device_node *parent;
>> + char parent_path[128];
>> + int parent_path_len;
>> +
>> + parent_path_len = strrchr(path, '/') - path + 1;
>> + strlcpy(parent_path, path, parent_path_len);
>> +
>> + parent = of_find_node_by_path(parent_path);
>> +
>> + return parent;
>> +}
>
> This ...
>
>> +static int add_one_node(struct device_node *dn)
>> +{
>> + struct proc_dir_entry *ent;
>> + int rc;
>> +
>> + of_node_set_flag(dn, OF_DYNAMIC);
>> + kref_init(&dn->kref);
>> + dn->parent = derive_parent(dn->full_name);
>> +
>> + rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
>> + PSERIES_RECONFIG_ADD, dn);
>> + if (rc == NOTIFY_BAD) {
>> + printk(KERN_ERR "Failed to add device node %s\n",
>> + dn->full_name);
>> + return -ENOMEM; /* For now, safe to assume kmalloc failure */
>> + }
>> +
>> + of_attach_node(dn);
>> +
>> +#ifdef CONFIG_PROC_DEVICETREE
>> + ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
>> + if (ent)
>> + proc_device_tree_add_node(dn, ent);
>> +#endif
>> +
>> + of_node_put(dn->parent);
>> + return 0;
>> +}
>
> ... and this ...
>
>> +int add_device_tree_nodes(struct device_node *dn)
>> +{
>> + struct device_node *child = dn->child;
>> + struct device_node *sibling = dn->sibling;
>> + int rc;
>> +
>> + dn->child = NULL;
>> + dn->sibling = NULL;
>> + dn->parent = NULL;
>> +
>> + rc = add_one_node(dn);
>> + if (rc)
>> + return rc;
>> +
>> + if (child) {
>> + rc = add_device_tree_nodes(child);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + if (sibling)
>> + rc = add_device_tree_nodes(sibling);
>> +
>> + return rc;
>> +}
>
> ... and this ...
>
>> +static int remove_one_node(struct device_node *dn)
>> +{
>> + struct device_node *parent = dn->parent;
>> + struct property *prop = dn->properties;
>> +
>> +#ifdef CONFIG_PROC_DEVICETREE
>> + while (prop) {
>> + remove_proc_entry(prop->name, dn->pde);
>> + prop = prop->next;
>> + }
>> +
>> + if (dn->pde)
>> + remove_proc_entry(dn->pde->name, parent->pde);
>> +#endif
>> +
>> + blocking_notifier_call_chain(&pSeries_reconfig_chain,
>> + PSERIES_RECONFIG_REMOVE, dn);
>> + of_detach_node(dn);
>> + of_node_put(dn); /* Must decrement the refcount */
>> +
>> + return 0;
>> +}
>
> ... and this ...
>
>> +static int _remove_device_tree_nodes(struct device_node *dn)
>> +{
>> + int rc;
>> +
>> + if (dn->child) {
>> + rc = _remove_device_tree_nodes(dn->child);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + if (dn->sibling) {
>> + rc = _remove_device_tree_nodes(dn->sibling);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + rc = remove_one_node(dn);
>> + return rc;
>> +}
>
> ... repeat myself ...
>
>> +int remove_device_tree_nodes(struct device_node *dn)
>> +{
>> + int rc;
>> +
>> + if (dn->child) {
>> + rc = _remove_device_tree_nodes(dn->child);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + rc = remove_one_node(dn);
>> + return rc;
>> +}
>
> ... should probably all go to something like drivers/of/dynamic.c or at
> least for now arch/powerpc/kernel/of_dynamic.c along with everything
> related to dynamically adding and removing nodes. I see that potentially
> useful for more than just DLPAR (though DLPAR is the only user right
> now) and should also all be prefixed with of_*

I agree, there should be at least a powerpc generic implementation of these
routines. The reason I put them here is that I am doing some oddities with
the next, child, and sibling pointers since they point to items not yet in
the device tree.

I saw that Grant Likely is doing updates to all of the of_* stuff right now,
would it be ok to have these routines here, renamed as dlpar_*, and look
to merge them in with Grant's updates when he finishes?

>
>> +#define DR_ENTITY_SENSE 9003
>> +#define DR_ENTITY_PRESENT 1
>> +#define DR_ENTITY_UNUSABLE 2
>> +#define ALLOCATION_STATE 9003
>> +#define ALLOC_UNUSABLE 0
>> +#define ALLOC_USABLE 1
>> +#define ISOLATION_STATE 9001
>> +#define ISOLATE 0
>> +#define UNISOLATE 1
>> +
>> +int acquire_drc(u32 drc_index)
>> +{
>> + int dr_status, rc;
>> +
>> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
>> + DR_ENTITY_SENSE, drc_index);
>> + if (rc || dr_status != DR_ENTITY_UNUSABLE)
>> + return -1;
>> +
>> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
>> + if (rc)
>> + return rc;
>> +
>> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
>> + if (rc) {
>> + rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
>> + return rc;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +int release_drc(u32 drc_index)
>> +{
>> + int dr_status, rc;
>> +
>> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
>> + DR_ENTITY_SENSE, drc_index);
>> + if (rc || dr_status != DR_ENTITY_PRESENT)
>> + return -1;
>> +
>> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
>> + if (rc)
>> + return rc;
>> +
>> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
>> + if (rc) {
>> + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
>> + return rc;
>> + }
>> +
>> + return 0;
>> +}
>
> Both above should have a dlpar_* prefix

will do.

>
>> +static int pseries_dlpar_init(void)
>> +{
>> + if (!machine_is(pseries))
>> + return 0;
>> +
>> + return 0;
>> +}
>> +device_initcall(pseries_dlpar_init);
>
> What the point ? :-)

Yeah, its a bit odd looking but later patches actually add code to the init routine
to set up memory probe/release and cpu probe/release handlers.

I'll look to add ifdef's around the initcall for cases where no work is to be done.

-Nathan Fontenot

>
> Cheers
> Ben.
>

2009-11-02 16:41:15

by Grant Likely

[permalink] [raw]
Subject: Re: [PATCH 1/6 v5] Kernel DLPAR Infrastructure

On Mon, Nov 2, 2009 at 9:27 AM, Nathan Fontenot <[email protected]> wrote:
> I saw that Grant Likely is doing updates to all of the of_* stuff right now,
> would it be ok to have these routines here, renamed as dlpar_*, and look
> to merge them in with Grant's updates when he finishes?

No because then we're stuck with renaming the API at a later date.
Name it what it is, and put it where it belongs. I'll deal with any
merge breakage as it occurs.

g.

2009-11-02 16:47:21

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 1/6 v5] Kernel DLPAR Infrastructure

Grant Likely wrote:
> On Mon, Nov 2, 2009 at 9:27 AM, Nathan Fontenot <[email protected]> wrote:
>> I saw that Grant Likely is doing updates to all of the of_* stuff right now,
>> would it be ok to have these routines here, renamed as dlpar_*, and look
>> to merge them in with Grant's updates when he finishes?
>
> No because then we're stuck with renaming the API at a later date.
> Name it what it is, and put it where it belongs. I'll deal with any
> merge breakage as it occurs.
>

ok. Would this be better off in powerpc code, or should I go ahead and put it
in something like drivers/of/dynamic.c?

-Nathan Fontenot

2009-11-02 16:56:40

by Grant Likely

[permalink] [raw]
Subject: Re: [PATCH 1/6 v5] Kernel DLPAR Infrastructure

On Mon, Nov 2, 2009 at 9:47 AM, Nathan Fontenot <[email protected]> wrote:
> Grant Likely wrote:
>>
>> On Mon, Nov 2, 2009 at 9:27 AM, Nathan Fontenot <[email protected]>
>> wrote:
>>>
>>> I saw that Grant Likely is doing updates to all of the of_* stuff right
>>> now,
>>> would it be ok to have these routines here, renamed as dlpar_*, and look
>>> to merge them in with Grant's updates when he finishes?
>>
>> No because then we're stuck with renaming the API at a later date.
>> Name it what it is, and put it where it belongs. ?I'll deal with any
>> merge breakage as it occurs.
>>
>
> ok. ?Would this be better off in powerpc code, or should I go ahead and put
> it
> in something like drivers/of/dynamic.c?

drivers/of/dynamic.c sounds fine to me. I can always move them if it
find a better place. Send the patch to me and cc: the
[email protected] mailing list.

g.


--
Grant Likely, B.Sc., P.Eng.
Secret Lab Technologies Ltd.

2009-11-05 18:51:28

by Roland Dreier

[permalink] [raw]
Subject: Re: [PATCH 4/6 v5] Memory DLPAR Handling

This isn't a review of this patch -- more a question out of curiousity
about how you actually can do memory remove in practice. Do you have
any coordination between the platform/hypervisor and the kernel to make
sure that a memory region you might want to remove later gets put into
zone_movable so that there's a chance for the kernel to vacate it? Or
do you have some other way to coordinate or at least expose which
regions of memory the kernel will have a chance at releasing?

Thanks,
Roland

2009-12-18 14:33:19

by Andreas Schwab

[permalink] [raw]
Subject: Re: [PATCH 5/6 v5] CPU probe/release files

Nathan Fontenot <[email protected]> writes:

> Index: powerpc/arch/powerpc/Kconfig
> ===================================================================
> --- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
> +++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:53.000000000 -0500
> @@ -320,6 +320,10 @@
>
> Say N if you are unsure.
>
> +config ARCH_CPU_PROBE_RELEASE
> + def_bool y
> + depends on HOTPLUG_CPU
> +

That does not work.

drivers/built-in.o: In function `.store_online':
cpu.c:(.ref.text+0xf5c): undefined reference to `.cpu_hotplug_driver_lock'
cpu.c:(.ref.text+0xfc8): undefined reference to `.cpu_hotplug_driver_unlock'
make: *** [.tmp_vmlinux1] Error 1

cpu_hotplug_driver_lock is only defined on pseries, but HOTPLUG_CPU is
also defined on pmac.

Andreas.

--
Andreas Schwab, [email protected]
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5
"And now for something completely different."

2009-12-18 16:24:57

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 5/6 v5] CPU probe/release files

Andreas Schwab wrote:
> Nathan Fontenot <[email protected]> writes:
>
>> Index: powerpc/arch/powerpc/Kconfig
>> ===================================================================
>> --- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
>> +++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:53.000000000 -0500
>> @@ -320,6 +320,10 @@
>>
>> Say N if you are unsure.
>>
>> +config ARCH_CPU_PROBE_RELEASE
>> + def_bool y
>> + depends on HOTPLUG_CPU
>> +
>
> That does not work.
>
> drivers/built-in.o: In function `.store_online':
> cpu.c:(.ref.text+0xf5c): undefined reference to `.cpu_hotplug_driver_lock'
> cpu.c:(.ref.text+0xfc8): undefined reference to `.cpu_hotplug_driver_unlock'
> make: *** [.tmp_vmlinux1] Error 1
>
> cpu_hotplug_driver_lock is only defined on pseries, but HOTPLUG_CPU is
> also defined on pmac.

These two routines should be defined as a no-op if CONFIG_ARCH_CPU_PROBE_RELEASE
is not defined in linux/cpu.h. The update below should be in the patch set
you are looking at.

from linux/cpu.h:

#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
extern void cpu_hotplug_driver_lock(void);
extern void cpu_hotplug_driver_unlock(void);
#else
static inline void cpu_hotplug_driver_lock(void)
{
}

static inline void cpu_hotplug_driver_unlock(void)
{
}
#endif

-Nathan Fontenot

2009-12-18 17:29:40

by Andreas Schwab

[permalink] [raw]
Subject: Re: [PATCH 5/6 v5] CPU probe/release files

Nathan Fontenot <[email protected]> writes:

> Andreas Schwab wrote:
>> Nathan Fontenot <[email protected]> writes:
>>
>>> Index: powerpc/arch/powerpc/Kconfig
>>> ===================================================================
>>> --- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
>>> +++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:53.000000000 -0500
>>> @@ -320,6 +320,10 @@
>>> Say N if you are unsure.
>>> +config ARCH_CPU_PROBE_RELEASE
>>> + def_bool y
>>> + depends on HOTPLUG_CPU
>>> +
>>
>> That does not work.
>>
>> drivers/built-in.o: In function `.store_online':
>> cpu.c:(.ref.text+0xf5c): undefined reference to `.cpu_hotplug_driver_lock'
>> cpu.c:(.ref.text+0xfc8): undefined reference to `.cpu_hotplug_driver_unlock'
>> make: *** [.tmp_vmlinux1] Error 1
>>
>> cpu_hotplug_driver_lock is only defined on pseries, but HOTPLUG_CPU is
>> also defined on pmac.
>
> These two routines should be defined as a no-op if CONFIG_ARCH_CPU_PROBE_RELEASE
> is not defined in linux/cpu.h.

??? CONFIG_ARCH_CPU_PROBE_RELEASE *is* defined.

Andreas.

--
Andreas Schwab, [email protected]
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5
"And now for something completely different."

2009-12-19 08:49:20

by Benjamin Herrenschmidt

[permalink] [raw]
Subject: Re: [PATCH 5/6 v5] CPU probe/release files

On Fri, 2009-12-18 at 15:33 +0100, Andreas Schwab wrote:
> Nathan Fontenot <[email protected]> writes:
>
> > Index: powerpc/arch/powerpc/Kconfig
> > ===================================================================
> > --- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
> > +++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:53.000000000 -0500
> > @@ -320,6 +320,10 @@
> >
> > Say N if you are unsure.
> >
> > +config ARCH_CPU_PROBE_RELEASE
> > + def_bool y
> > + depends on HOTPLUG_CPU
> > +
>
> That does not work.
>
> drivers/built-in.o: In function `.store_online':
> cpu.c:(.ref.text+0xf5c): undefined reference to `.cpu_hotplug_driver_lock'
> cpu.c:(.ref.text+0xfc8): undefined reference to `.cpu_hotplug_driver_unlock'
> make: *** [.tmp_vmlinux1] Error 1
>
> cpu_hotplug_driver_lock is only defined on pseries, but HOTPLUG_CPU is
> also defined on pmac.

Well, the stuff was merged ... so we need to fix it.

Nathan, the problem is that the above wlil define ARCH_CPU_PROBE_RELEASE
for any powerpc platform that has HOTPLUG_CPU. So a kernel that doesn't
have pSeries support but has hotplug CPU will have a problem, though for
some reason none of my test configs triggered it (I'll have to check why
next week).

The right approach here is to have cpu_hotplug_driver_lock go through
ppc_md. I suppose, along with some of the other CPU hotplug platform
specific bits.

Cheers,
Ben.

2009-12-19 10:11:36

by Andreas Schwab

[permalink] [raw]
Subject: Re: [PATCH 5/6 v5] CPU probe/release files

Benjamin Herrenschmidt <[email protected]> writes:

> Nathan, the problem is that the above wlil define ARCH_CPU_PROBE_RELEASE
> for any powerpc platform that has HOTPLUG_CPU. So a kernel that doesn't
> have pSeries support but has hotplug CPU will have a problem, though for
> some reason none of my test configs triggered it (I'll have to check why
> next week).

Perhaps you didn't enable CONFIG_SUSPEND.

Andreas.

--
Andreas Schwab, [email protected]
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756 01D3 44D5 214B 8276 4ED5
"And now for something completely different."

2009-12-22 02:17:55

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 5/6 v5] CPU probe/release files

Benjamin Herrenschmidt wrote:
> On Fri, 2009-12-18 at 15:33 +0100, Andreas Schwab wrote:
>> Nathan Fontenot <[email protected]> writes:
>>
>>> Index: powerpc/arch/powerpc/Kconfig
>>> ===================================================================
>>> --- powerpc.orig/arch/powerpc/Kconfig 2009-10-28 15:21:47.000000000 -0500
>>> +++ powerpc/arch/powerpc/Kconfig 2009-10-28 15:21:53.000000000 -0500
>>> @@ -320,6 +320,10 @@
>>>
>>> Say N if you are unsure.
>>>
>>> +config ARCH_CPU_PROBE_RELEASE
>>> + def_bool y
>>> + depends on HOTPLUG_CPU
>>> +
>> That does not work.
>>
>> drivers/built-in.o: In function `.store_online':
>> cpu.c:(.ref.text+0xf5c): undefined reference to `.cpu_hotplug_driver_lock'
>> cpu.c:(.ref.text+0xfc8): undefined reference to `.cpu_hotplug_driver_unlock'
>> make: *** [.tmp_vmlinux1] Error 1
>>
>> cpu_hotplug_driver_lock is only defined on pseries, but HOTPLUG_CPU is
>> also defined on pmac.
>
> Well, the stuff was merged ... so we need to fix it.
>
> Nathan, the problem is that the above wlil define ARCH_CPU_PROBE_RELEASE
> for any powerpc platform that has HOTPLUG_CPU. So a kernel that doesn't
> have pSeries support but has hotplug CPU will have a problem, though for
> some reason none of my test configs triggered it (I'll have to check why
> next week).
>
> The right approach here is to have cpu_hotplug_driver_lock go through
> ppc_md. I suppose, along with some of the other CPU hotplug platform
> specific bits.
>

Sounds good to me, I'll get a patch together.

-Nathan Fontenot