2009-09-11 21:08:41

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 0/5] kernel handling of dynamic logical partitioning

The Dynamic Logical Partitioning (DLPAR) capabilities of the powerpc pseries
platform allows for the addition and removal of resources (i.e. cpus,
memory, pci devices) from a partition. The removal of a resource involves
removing the resource's node from the device tree and then returning the
resource to firmware via the rtas set-indicator call. To add a resource, it
is first obtained from firmware via the rtas set-indicator call and then a
new device tree node is created using the ibm,configure-connector rtas call
and added to the device tree.

The following set of patches implements the needed infrastructure to have the
kernel handle the DLPAR addition and removal of memory and cpus (other
DLPAR'able items to follow in future patches). The framework for this is
to create a set of probe/release sysfs files in pseries that will add or
remove the cpu or memory to the system.

The majority of the code is powerpc/pseries specific except for PATCH 3/5, so
I am cc'ing lkml.

Patches include in this set:
1/5 - DLPAR infrastructure for powerpc/pseries platform.
2/5 - Move the of_drconf_cell struct to prom.h
3/5 - Export the memory sysdev class
4/5 - Memory DLPAR handling
5/5 - CPU DLPAR handling

Please pardon the re-send, I borked the lkml address on cc the first time.

-Nathan Fontenot


2009-09-11 21:11:07

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 1/5] dynamic logical partitioning infrastructure

This patch provides the kernel DLPAR infrastructure in a new filed named
dlpar.c. The functionality provided is for acquiring and releasing a
resource from firmware and the parsing of information returned from the
ibm,configure-connector rtas call. Additionally this exports the
pSeries reconfiguration notifier chain so that it can be invoked when
device tree updates are made.

Signed-off-by: Nathan Fontenot <[email protected]>
---

Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-09-11 12:51:52.000000000 -0500
@@ -0,0 +1,416 @@
+/*
+ * dlpar.c - support for dynamic reconfiguration (including PCI
+ * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ *
+ * Copyright (C) 2009 Nathan Fontenot
+ * Copyright (C) 2009 IBM Corporation
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+
+#include <asm/prom.h>
+#include <asm/machdep.h>
+#include <asm/uaccess.h>
+#include <asm/rtas.h>
+#include <asm/pSeries_reconfig.h>
+
+#define CFG_CONN_WORK_SIZE 4096
+static char workarea[CFG_CONN_WORK_SIZE];
+spinlock_t workarea_lock;
+
+static struct property *parse_cc_property(char *workarea)
+{
+ struct property *prop;
+ u32 *work_ints;
+ char *name;
+ char *value;
+
+ prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+ if (!prop)
+ return NULL;
+
+ work_ints = (u32 *)workarea;
+ name = workarea + work_ints[2];
+ prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!prop->name) {
+ kfree(prop);
+ return NULL;
+ }
+
+ strcpy(prop->name, name);
+
+ prop->length = work_ints[3];
+ value = workarea + work_ints[4];
+ prop->value = kzalloc(prop->length, GFP_KERNEL);
+ if (!prop->value) {
+ kfree(prop->name);
+ kfree(prop);
+ return NULL;
+ }
+
+ memcpy(prop->value, value, prop->length);
+ return prop;
+}
+
+static void free_cc_property(struct property *prop)
+{
+ if (prop->name)
+ kfree(prop->name);
+ if (prop->value)
+ kfree(prop->value);
+
+ kfree(prop);
+}
+
+static struct device_node *parse_cc_node(char *work_area)
+{
+ struct device_node *dn;
+ u32 *work_ints;
+ char *name;
+
+ dn = kzalloc(sizeof(*dn), GFP_KERNEL);
+ if (!dn)
+ return NULL;
+
+ work_ints = (u32 *)work_area;
+ name = work_area + work_ints[2];
+ dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
+ if (!dn->full_name) {
+ kfree(dn);
+ return NULL;
+ }
+
+ strcpy(dn->full_name, name);
+ return dn;
+}
+
+static void free_one_cc_node(struct device_node *dn)
+{
+ struct property *prop;
+
+ while (dn->properties) {
+ prop = dn->properties;
+ dn->properties = prop->next;
+ free_cc_property(prop);
+ }
+
+ if (dn->full_name)
+ kfree(dn->full_name);
+
+ kfree(dn);
+}
+
+static void free_cc_nodes(struct device_node *dn)
+{
+ if (dn->child)
+ free_cc_nodes(dn->child);
+
+ if (dn->sibling)
+ free_cc_nodes(dn->sibling);
+
+ free_one_cc_node(dn);
+}
+
+#define NEXT_SIBLING 1
+#define NEXT_CHILD 2
+#define NEXT_PROPERTY 3
+#define PREV_PARENT 4
+#define MORE_MEMORY 5
+#define CALL_AGAIN -2
+#define ERR_CFG_USE -9003
+
+struct device_node *configure_connector(u32 drc_index)
+{
+ struct device_node *dn;
+ struct device_node *first_dn = NULL;
+ struct device_node *last_dn = NULL;
+ struct property *property;
+ struct property *last_property = NULL;
+ u32 *work_int;
+ int cc_token;
+ int rc;
+
+ cc_token = rtas_token("ibm,configure-connector");
+ if (cc_token == RTAS_UNKNOWN_SERVICE)
+ return NULL;
+
+ spin_lock(&workarea_lock);
+
+ work_int = (u32 *)&workarea[0];
+ work_int[0] = drc_index;
+ work_int[1] = 0;
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ while (rc) {
+ switch (rc) {
+ case NEXT_SIBLING:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ dn->parent = last_dn->parent;
+ last_dn->sibling = dn;
+ last_dn = dn;
+ break;
+
+ case NEXT_CHILD:
+ dn = parse_cc_node(workarea);
+ if (!dn)
+ goto cc_error;
+
+ if (!first_dn)
+ first_dn = dn;
+ else {
+ dn->parent = last_dn;
+ if (last_dn)
+ last_dn->child = dn;
+ }
+
+ last_dn = dn;
+ break;
+
+ case NEXT_PROPERTY:
+ property = parse_cc_property(workarea);
+ if (!property)
+ goto cc_error;
+
+ if (!last_dn->properties)
+ last_dn->properties = property;
+ else
+ last_property->next = property;
+
+ last_property = property;
+ break;
+
+ case PREV_PARENT:
+ last_dn = last_dn->parent;
+ break;
+
+ case CALL_AGAIN:
+ break;
+
+ case MORE_MEMORY:
+ case ERR_CFG_USE:
+ default:
+ printk(KERN_ERR "Unexpected Error (%d) "
+ "returned from configure-connector\n",
+ rc);
+ goto cc_error;
+ }
+
+ rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
+ }
+
+ spin_unlock(&workarea_lock);
+ return first_dn;
+
+cc_error:
+ spin_unlock(&workarea_lock);
+
+ if (first_dn)
+ free_cc_nodes(first_dn);
+
+ return NULL;
+}
+
+static struct device_node *derive_parent(const char *path)
+{
+ struct device_node *parent;
+ char parent_path[128];
+ int parent_path_len;
+
+ parent_path_len = strrchr(path, '/') - path + 1;
+ strlcpy(parent_path, path, parent_path_len);
+
+ parent = of_find_node_by_path(parent_path);
+
+ return parent;
+}
+
+static int add_one_node(struct device_node *dn)
+{
+ struct proc_dir_entry *ent;
+ int rc;
+
+ of_node_set_flag(dn, OF_DYNAMIC);
+ kref_init(&dn->kref);
+ dn->parent = derive_parent(dn->full_name);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_ADD, dn);
+ if (rc == NOTIFY_BAD) {
+ printk(KERN_ERR "Failed to add device node %s\n",
+ dn->full_name);
+ return -ENOMEM; /* For now, safe to assume kmalloc failure */
+ }
+
+ of_attach_node(dn);
+
+#ifdef CONFIG_PROC_DEVICETREE
+ ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
+ if (ent)
+ proc_device_tree_add_node(dn, ent);
+#endif
+
+ of_node_put(dn->parent);
+ return 0;
+}
+
+int add_device_tree_nodes(struct device_node *dn)
+{
+ struct device_node *child = dn->child;
+ struct device_node *sibling = dn->sibling;
+ int rc;
+
+ dn->child = NULL;
+ dn->sibling = NULL;
+ dn->parent = NULL;
+
+ rc = add_one_node(dn);
+ if (rc)
+ return rc;
+
+ if (child) {
+ rc = add_device_tree_nodes(child);
+ if (rc)
+ return rc;
+ }
+
+ if (sibling)
+ rc = add_device_tree_nodes(sibling);
+
+ return rc;
+}
+
+static int remove_one_node(struct device_node *dn)
+{
+ struct device_node *parent = dn->parent;
+ struct property *prop = dn->properties;
+
+#ifdef CONFIG_PROC_DEVICETREE
+ while (prop) {
+ remove_proc_entry(prop->name, dn->pde);
+ prop = prop->next;
+ }
+
+ if (dn->pde)
+ remove_proc_entry(dn->pde->name, parent->pde);
+#endif
+
+ blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_RECONFIG_REMOVE, dn);
+ of_detach_node(dn);
+
+ of_node_put(parent);
+ of_node_put(dn); /* Must decrement the refcount */
+
+ return 0;
+}
+
+static int _remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ if (dn->sibling) {
+ rc = _remove_device_tree_nodes(dn->sibling);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+int remove_device_tree_nodes(struct device_node *dn)
+{
+ int rc;
+
+ if (dn->child) {
+ rc = _remove_device_tree_nodes(dn->child);
+ if (rc)
+ return rc;
+ }
+
+ rc = remove_one_node(dn);
+ return rc;
+}
+
+#define DR_ENTITY_SENSE 9003
+#define DR_ENTITY_PRESENT 1
+#define DR_ENTITY_UNUSABLE 2
+#define ALLOCATION_STATE 9003
+#define ALLOC_UNUSABLE 0
+#define ALLOC_USABLE 1
+#define ISOLATION_STATE 9001
+#define ISOLATE 0
+#define UNISOLATE 1
+
+int acquire_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_UNUSABLE)
+ return -1;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
+ if (rc)
+ return -1;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ if (rc) {
+ rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ return -1;
+ }
+
+ return 0;
+}
+
+int release_drc(u32 drc_index)
+{
+ int dr_status, rc;
+
+ rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
+ DR_ENTITY_SENSE, drc_index);
+ if (rc || dr_status != DR_ENTITY_PRESENT)
+ return -1;
+
+ rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
+ if (rc)
+ return -1;
+
+ rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
+ if (rc) {
+ rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int pseries_dlpar_init(void)
+{
+ spin_lock_init(&workarea_lock);
+
+ if (!machine_is(pseries))
+ return 0;
+
+ return 0;
+}
+__initcall(pseries_dlpar_init);
Index: powerpc/arch/powerpc/platforms/pseries/Makefile
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/Makefile 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/Makefile 2009-09-11 12:51:52.000000000 -0500
@@ -8,7 +8,7 @@

obj-y := lpar.o hvCall.o nvram.o reconfig.o \
setup.o iommu.o ras.o rtasd.o \
- firmware.o power.o
+ firmware.o power.o dlpar.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_XICS) += xics.o
obj-$(CONFIG_SCANLOG) += scanlog.o
Index: powerpc/arch/powerpc/include/asm/pSeries_reconfig.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/pSeries_reconfig.h 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/pSeries_reconfig.h 2009-09-11 12:51:52.000000000 -0500
@@ -17,6 +17,7 @@
#ifdef CONFIG_PPC_PSERIES
extern int pSeries_reconfig_notifier_register(struct notifier_block *);
extern void pSeries_reconfig_notifier_unregister(struct notifier_block *);
+extern struct blocking_notifier_head pSeries_reconfig_chain;
#else /* !CONFIG_PPC_PSERIES */
static inline int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{
Index: powerpc/arch/powerpc/platforms/pseries/reconfig.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/reconfig.c 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/reconfig.c 2009-09-11 12:51:52.000000000 -0500
@@ -95,7 +95,7 @@
return parent;
}

-static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
+struct blocking_notifier_head pSeries_reconfig_chain = BLOCKING_NOTIFIER_INIT(pSeries_reconfig_chain);

int pSeries_reconfig_notifier_register(struct notifier_block *nb)
{

2009-09-11 21:12:29

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 2/5] move of_drconf_cell definition to prom.h

Move the definition of the of_drconf_cell struct from numa.c to prom.h. This
is needed so that we can parse the ibm,dynamic-memory device-tree property
when DLPAR adding and removing memory.

Signed-off-by: Nathan Fontenot <[email protected]>
---

Index: powerpc/arch/powerpc/include/asm/prom.h
===================================================================
--- powerpc.orig/arch/powerpc/include/asm/prom.h 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/include/asm/prom.h 2009-09-11 12:52:11.000000000 -0500
@@ -349,6 +349,18 @@
*/
extern void __iomem *of_iomap(struct device_node *device, int index);

+struct of_drconf_cell {
+ u64 base_addr;
+ u32 drc_index;
+ u32 reserved;
+ u32 aa_index;
+ u32 flags;
+};
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+
/*
* NB: This is here while we transition from using asm/prom.h
* to linux/of.h
Index: powerpc/arch/powerpc/mm/numa.c
===================================================================
--- powerpc.orig/arch/powerpc/mm/numa.c 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/mm/numa.c 2009-09-11 12:52:11.000000000 -0500
@@ -296,18 +296,6 @@
return result;
}

-struct of_drconf_cell {
- u64 base_addr;
- u32 drc_index;
- u32 reserved;
- u32 aa_index;
- u32 flags;
-};
-
-#define DRCONF_MEM_ASSIGNED 0x00000008
-#define DRCONF_MEM_AI_INVALID 0x00000040
-#define DRCONF_MEM_RESERVED 0x00000080
-
/*
* Read the next lmb list entry from the ibm,dynamic-memory property
* and return the information in the provided of_drconf_cell structure.

2009-09-11 21:13:24

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 3/5] Export memory_sysdev_class

Export the memory_sysdev_class structure. This is needed so we can create
a 'release' file in sysfs in addition to the existing 'probe' file in
order to support DLPAR removal of memory on the powerpc/pseries platform.
The new 'release' file will be powerpc/pseries only.

Signed-off-by: Nathan Fontenot <[email protected]>
---

Index: powerpc/drivers/base/memory.c
===================================================================
--- powerpc.orig/drivers/base/memory.c 2009-09-11 12:43:40.000000000 -0500
+++ powerpc/drivers/base/memory.c 2009-09-11 12:52:26.000000000 -0500
@@ -28,9 +28,10 @@

#define MEMORY_CLASS_NAME "memory"

-static struct sysdev_class memory_sysdev_class = {
+struct sysdev_class memory_sysdev_class = {
.name = MEMORY_CLASS_NAME,
};
+EXPORT_SYMBOL(memory_sysdev_class);

static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
{
Index: powerpc/include/linux/memory_hotplug.h
===================================================================
--- powerpc.orig/include/linux/memory_hotplug.h 2009-09-11 12:43:44.000000000 -0500
+++ powerpc/include/linux/memory_hotplug.h 2009-09-11 12:52:26.000000000 -0500
@@ -12,6 +12,8 @@

#ifdef CONFIG_MEMORY_HOTPLUG

+extern struct sysdev_class memory_sysdev_class;
+
/*
* Types for free bootmem.
* The normal smallest mapcount is -1. Here is smaller value than it.

2009-09-11 21:14:41

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 4/5] kernel handling of memory DLPAR

This adds the capability to DLPAR add and remove memory from the kernel. The
patch extends the powerpc handling of memory_add_physaddr_to_nid(), which is
called from the sysfs memory 'probe' file to first ensure that the memory
has been added to the system. This is done by creating a platform specific
callout from the routine. The pseries implementation of this handles the
DLPAR work to add the memory to the system and update the device tree.

The patch also creates a pseries only 'release' sys file,
/sys/devices/system/memory/release. This file handles the DLPAR release of
memory back to firmware and updating of the device-tree.

Signed-off-by: Nathan Fontenot <[email protected]>
---

Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/dlpar.c 2009-09-11 12:51:52.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-09-11 13:05:23.000000000 -0500
@@ -16,6 +16,10 @@
#include <linux/notifier.h>
#include <linux/proc_fs.h>
#include <linux/spinlock.h>
+#include <linux/memory_hotplug.h>
+#include <linux/sysdev.h>
+#include <linux/sysfs.h>
+

#include <asm/prom.h>
#include <asm/machdep.h>
@@ -404,13 +408,174 @@
return 0;
}

+static void free_property(struct property *prop)
+{
+ if (prop->name)
+ kfree(prop->name);
+ if (prop->value)
+ kfree(prop->value);
+ kfree(prop);
+}
+
+static struct property *clone_property(struct property *old_prop)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc((sizeof *new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kzalloc(strlen(old_prop->name) + 1, GFP_KERNEL);
+ new_prop->value = kzalloc(old_prop->length + 1, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ free_property(new_prop);
+ return NULL;
+ }
+
+ strcpy(new_prop->name, old_prop->name);
+ memcpy(new_prop->value, old_prop->value, old_prop->length);
+ new_prop->length = old_prop->length;
+
+ return new_prop;
+}
+
+int platform_probe_memory(u64 phys_addr)
+{
+ struct device_node *dn;
+ struct property *new_prop, *old_prop;
+ struct property *lmb_sz_prop;
+ struct of_drconf_cell *drmem;
+ u64 lmb_size;
+ int num_entries, i, rc;
+
+ if (!phys_addr)
+ return -EINVAL;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return -EINVAL;
+
+ lmb_sz_prop = of_find_property(dn, "ibm,lmb-size", NULL);
+ lmb_size = *(u64 *)lmb_sz_prop->value;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+
+ num_entries = *(u32 *)old_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)old_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ u64 lmb_end_addr = drmem[i].base_addr + lmb_size;
+ if (phys_addr >= drmem[i].base_addr
+ && phys_addr < lmb_end_addr)
+ break;
+ }
+
+ if (i >= num_entries) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ if (drmem[i].flags & DRCONF_MEM_ASSIGNED) {
+ of_node_put(dn);
+ return 0;
+ }
+
+ rc = acquire_drc(drmem[i].drc_index);
+ if (rc) {
+ of_node_put(dn);
+ return -1;
+ }
+
+ new_prop = clone_property(old_prop);
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ drmem[i].flags |= DRCONF_MEM_ASSIGNED;
+ prom_update_property(dn, new_prop, old_prop);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_ADD,
+ &drmem[i].base_addr);
+ if (rc == NOTIFY_BAD) {
+ prom_update_property(dn, old_prop, new_prop);
+ release_drc(drmem[i].drc_index);
+ }
+
+ of_node_put(dn);
+ return rc == NOTIFY_BAD ? -1 : 0;
+}
+
+static ssize_t memory_release_store(struct class *class, const char *buf,
+ size_t count)
+{
+ u32 drc_index;
+ struct device_node *dn;
+ struct property *new_prop, *old_prop;
+ struct of_drconf_cell *drmem;
+ int num_entries;
+ int i, rc;
+
+ drc_index = simple_strtoull(buf, NULL, 0);
+ if (!drc_index)
+ return -EINVAL;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return 0;
+
+ old_prop = of_find_property(dn, "ibm,dynamic-memory", NULL);
+ new_prop = clone_property(old_prop);
+
+ num_entries = *(u32 *)new_prop->value;
+ drmem = (struct of_drconf_cell *)
+ ((char *)new_prop->value + sizeof(u32));
+
+ for (i = 0; i < num_entries; i++) {
+ if (drmem[i].drc_index == drc_index)
+ break;
+ }
+
+ if (i >= num_entries) {
+ free_property(new_prop);
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ drmem[i].flags &= ~DRCONF_MEM_ASSIGNED;
+ prom_update_property(dn, new_prop, old_prop);
+
+ rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
+ PSERIES_DRCONF_MEM_REMOVE,
+ &drmem[i].base_addr);
+ if (rc != NOTIFY_BAD)
+ rc = release_drc(drc_index);
+
+ if (rc)
+ prom_update_property(dn, old_prop, new_prop);
+
+ of_node_put(dn);
+ return rc ? -1 : count;
+}
+
+static struct class_attribute class_attr_mem_release =
+ __ATTR(release, S_IWUSR, NULL, memory_release_store);
+
static int pseries_dlpar_init(void)
{
+ int rc;
+
spin_lock_init(&workarea_lock);

if (!machine_is(pseries))
return 0;

+ rc = sysfs_create_file(&memory_sysdev_class.kset.kobj,
+ &class_attr_mem_release.attr);
+ if (rc)
+ printk(KERN_INFO "DLPAR: Could not create sysfs memory "
+ "release file\n");
+
return 0;
}
__initcall(pseries_dlpar_init);
Index: powerpc/arch/powerpc/mm/mem.c
===================================================================
--- powerpc.orig/arch/powerpc/mm/mem.c 2009-09-11 12:43:39.000000000 -0500
+++ powerpc/arch/powerpc/mm/mem.c 2009-09-11 12:52:42.000000000 -0500
@@ -111,8 +111,19 @@
#ifdef CONFIG_MEMORY_HOTPLUG

#ifdef CONFIG_NUMA
+int __attribute ((weak)) platform_probe_memory(u64 start)
+{
+ return 0;
+}
+
int memory_add_physaddr_to_nid(u64 start)
{
+ int rc;
+
+ rc = platform_probe_memory(start);
+ if (rc)
+ return rc;
+
return hot_add_scn_to_nid(start);
}
#endif

2009-09-11 21:15:45

by Nathan Fontenot

[permalink] [raw]
Subject: [PATCH 5/5] kernel handling of CPU DLPAR

This adds the capability to DLPAR add and remove CPUs from the kernel. The
creates two new files /sys/devices/system/cpu/probe and
/sys/devices/system/cpu/release to handle the DLPAR addition and removal of
CPUs respectively.

CPU DLPAR add is accomplished by writing the drc-index of the CPU to the
probe file, and removal is done by writing the device-tree path of the cpu
to the release file.

Signed-off-by: Nathan Fontenot <[email protected]>

Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
===================================================================
--- powerpc.orig/arch/powerpc/platforms/pseries/dlpar.c 2009-09-11 13:05:23.000000000 -0500
+++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-09-11 14:10:28.000000000 -0500
@@ -1,11 +1,11 @@
/*
- * dlpar.c - support for dynamic reconfiguration (including PCI
- * Hotplug and Dynamic Logical Partitioning on RPA platforms).
+ * dlpar.c - support for dynamic reconfiguration (including PCI,
+ * Memory, and CPU Hotplug and Dynamic Logical Partitioning on
+ * PAPR platforms).
*
* Copyright (C) 2009 Nathan Fontenot
* Copyright (C) 2009 IBM Corporation
*
- *
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
@@ -19,6 +19,7 @@
#include <linux/memory_hotplug.h>
#include <linux/sysdev.h>
#include <linux/sysfs.h>
+#include <linux/cpu.h>


#include <asm/prom.h>
@@ -558,8 +559,79 @@
return rc ? -1 : count;
}

+static ssize_t cpu_probe_store(struct class *class, const char *buf,
+ size_t count)
+{
+ struct device_node *dn;
+ u32 drc_index;
+ char *cpu_name;
+ int rc;
+
+ drc_index = simple_strtoull(buf, NULL, 0);
+ if (!drc_index)
+ return -EINVAL;
+
+ rc = acquire_drc(drc_index);
+ if (rc)
+ return rc;
+
+ dn = configure_connector(drc_index);
+ if (!dn) {
+ release_drc(drc_index);
+ return rc;
+ }
+
+ /* fixup dn name */
+ cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus/") + 1,
+ GFP_KERNEL);
+ sprintf(cpu_name, "/cpus/%s", dn->full_name);
+ kfree(dn->full_name);
+ dn->full_name = cpu_name;
+
+ rc = add_device_tree_nodes(dn);
+ if (rc)
+ release_drc(drc_index);
+
+ return rc ? rc : count;
+}
+
+static ssize_t cpu_release_store(struct class *class, const char *buf,
+ size_t count)
+{
+ struct device_node *dn;
+ u32 *drc_index;
+ int rc;
+
+ dn = of_find_node_by_path(buf);
+ if (!dn)
+ return -EINVAL;
+
+ drc_index = (u32 *)of_get_property(dn, "ibm,my-drc-index", NULL);
+ if (!drc_index) {
+ of_node_put(dn);
+ return -EINVAL;
+ }
+
+ rc = release_drc(*drc_index);
+ if (rc) {
+ of_node_put(dn);
+ return rc;
+ }
+
+ rc = remove_device_tree_nodes(dn);
+ if (rc)
+ acquire_drc(*drc_index);
+
+ of_node_put(dn);
+ return rc? rc : count;
+}
+
static struct class_attribute class_attr_mem_release =
__ATTR(release, S_IWUSR, NULL, memory_release_store);
+static struct class_attribute class_attr_cpu_probe =
+ __ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
+static struct class_attribute class_attr_cpu_release =
+ __ATTR(release, S_IWUSR, NULL, cpu_release_store);

static int pseries_dlpar_init(void)
{
@@ -576,6 +648,18 @@
printk(KERN_INFO "DLPAR: Could not create sysfs memory "
"release file\n");

+ rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_cpu_probe.attr);
+ if (rc)
+ printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
+ "probe file\n");
+
+ rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+ &class_attr_cpu_release.attr);
+ if (rc)
+ printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
+ "release file\n");
+
return 0;
}
__initcall(pseries_dlpar_init);

2009-09-11 21:22:43

by Daniel Walker

[permalink] [raw]
Subject: Re: [PATCH 0/5] kernel handling of dynamic logical partitioning

On Fri, 2009-09-11 at 16:08 -0500, Nathan Fontenot wrote:
> am cc'ing lkml.
>
> Patches include in this set:
> 1/5 - DLPAR infrastructure for powerpc/pseries platform.
> 2/5 - Move the of_drconf_cell struct to prom.h
> 3/5 - Export the memory sysdev class
> 4/5 - Memory DLPAR handling
> 5/5 - CPU DLPAR handling
>

It looks like a couple of your patches have some checkpatch issues..
Could you run these through scripts/checkpatch.pl and clean up any
problems it raises ? Specifically patches 1, 4, and 5 ..

Daniel

2009-09-14 06:39:13

by Andrey Panin

[permalink] [raw]
Subject: Re: [PATCH 4/5] kernel handling of memory DLPAR

On 254, 09 11, 2009 at 04:14:35PM -0500, Nathan Fontenot wrote:
> This adds the capability to DLPAR add and remove memory from the kernel. The
> patch extends the powerpc handling of memory_add_physaddr_to_nid(), which is
> called from the sysfs memory 'probe' file to first ensure that the memory
> has been added to the system. This is done by creating a platform specific
> callout from the routine. The pseries implementation of this handles the
> DLPAR work to add the memory to the system and update the device tree.
>
> The patch also creates a pseries only 'release' sys file,
> /sys/devices/system/memory/release. This file handles the DLPAR
> release of
> memory back to firmware and updating of the device-tree.
>
> Signed-off-by: Nathan Fontenot <[email protected]>

> +static struct property *clone_property(struct property *old_prop)
> +{
> + struct property *new_prop;
> +
> + new_prop = kzalloc((sizeof *new_prop), GFP_KERNEL);
> + if (!new_prop)
> + return NULL;
> +
> + new_prop->name = kzalloc(strlen(old_prop->name) + 1, GFP_KERNEL);
> + new_prop->value = kzalloc(old_prop->length + 1, GFP_KERNEL);

Memory leak here. What if one kzalloc() succeeded and another failed ?

> + if (!new_prop->name || !new_prop->value) {
> + free_property(new_prop);
> + return NULL;
> + }
> +
> + strcpy(new_prop->name, old_prop->name);
> + memcpy(new_prop->value, old_prop->value, old_prop->length);
> + new_prop->length = old_prop->length;
> +
> + return new_prop;
> +}

2009-09-14 06:41:27

by Andrey Panin

[permalink] [raw]
Subject: Re: [PATCH 5/5] kernel handling of CPU DLPAR

On 254, 09 11, 2009 at 04:15:33PM -0500, Nathan Fontenot wrote:
> This adds the capability to DLPAR add and remove CPUs from the kernel. The
> creates two new files /sys/devices/system/cpu/probe and
> /sys/devices/system/cpu/release to handle the DLPAR addition and
> removal of
> CPUs respectively.
>
> CPU DLPAR add is accomplished by writing the drc-index of the CPU to the
> probe file, and removal is done by writing the device-tree path of the cpu
> to the release file.
>
> Signed-off-by: Nathan Fontenot <[email protected]>

> +static ssize_t cpu_probe_store(struct class *class, const char *buf,
> + size_t count)
> +{
> + struct device_node *dn;
> + u32 drc_index;
> + char *cpu_name;
> + int rc;
> +
> + drc_index = simple_strtoull(buf, NULL, 0);
> + if (!drc_index)
> + return -EINVAL;
> +
> + rc = acquire_drc(drc_index);
> + if (rc)
> + return rc;
> +
> + dn = configure_connector(drc_index);
> + if (!dn) {
> + release_drc(drc_index);
> + return rc;
> + }
> +
> + /* fixup dn name */
> + cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus/") + 1,
> + GFP_KERNEL);

Unchecked memory allocation with immediate crash in case of failure.

> + sprintf(cpu_name, "/cpus/%s", dn->full_name);
> + kfree(dn->full_name);
> + dn->full_name = cpu_name;
> +
> + rc = add_device_tree_nodes(dn);
> + if (rc)
> + release_drc(drc_index);
> +
> + return rc ? rc : count;
> +}
> +
> +static ssize_t cpu_release_store(struct class *class, const char *buf,
> + size_t count)
> +{
> + struct device_node *dn;
> + u32 *drc_index;
> + int rc;
> +
> + dn = of_find_node_by_path(buf);
> + if (!dn)
> + return -EINVAL;
> +
> + drc_index = (u32 *)of_get_property(dn, "ibm,my-drc-index", NULL);
> + if (!drc_index) {
> + of_node_put(dn);
> + return -EINVAL;
> + }
> +
> + rc = release_drc(*drc_index);
> + if (rc) {
> + of_node_put(dn);
> + return rc;
> + }
> +
> + rc = remove_device_tree_nodes(dn);
> + if (rc)
> + acquire_drc(*drc_index);
> +
> + of_node_put(dn);
> + return rc? rc : count;
> +}
> +
> static struct class_attribute class_attr_mem_release =
> __ATTR(release, S_IWUSR, NULL, memory_release_store);
> +static struct class_attribute class_attr_cpu_probe =
> + __ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
> +static struct class_attribute class_attr_cpu_release =
> + __ATTR(release, S_IWUSR, NULL, cpu_release_store);
>
> static int pseries_dlpar_init(void)
> {
> @@ -576,6 +648,18 @@
> printk(KERN_INFO "DLPAR: Could not create sysfs memory "
> "release file\n");
>
> + rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> + &class_attr_cpu_probe.attr);
> + if (rc)
> + printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
> + "probe file\n");
> +
> + rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
> + &class_attr_cpu_release.attr);
> + if (rc)
> + printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
> + "release file\n");
> +
> return 0;
> }
> __initcall(pseries_dlpar_init);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2009-09-14 18:18:37

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 4/5] kernel handling of memory DLPAR

Andrey Panin wrote:
> On 254, 09 11, 2009 at 04:14:35PM -0500, Nathan Fontenot wrote:
>> This adds the capability to DLPAR add and remove memory from the kernel. The
>> patch extends the powerpc handling of memory_add_physaddr_to_nid(), which is
>> called from the sysfs memory 'probe' file to first ensure that the memory
>> has been added to the system. This is done by creating a platform specific
>> callout from the routine. The pseries implementation of this handles the
>> DLPAR work to add the memory to the system and update the device tree.
>>
>> The patch also creates a pseries only 'release' sys file,
>> /sys/devices/system/memory/release. This file handles the DLPAR
>> release of
>> memory back to firmware and updating of the device-tree.
>>
>> Signed-off-by: Nathan Fontenot <[email protected]>
>
>> +static struct property *clone_property(struct property *old_prop)
>> +{
>> + struct property *new_prop;
>> +
>> + new_prop = kzalloc((sizeof *new_prop), GFP_KERNEL);
>> + if (!new_prop)
>> + return NULL;
>> +
>> + new_prop->name = kzalloc(strlen(old_prop->name) + 1, GFP_KERNEL);
>> + new_prop->value = kzalloc(old_prop->length + 1, GFP_KERNEL);
>
> Memory leak here. What if one kzalloc() succeeded and another failed ?
>

This should be fine. The free_property routine will free the name or value
fields if they are allocated.

-Nathan

>> + if (!new_prop->name || !new_prop->value) {
>> + free_property(new_prop);
>> + return NULL;
>> + }
>> +
>> + strcpy(new_prop->name, old_prop->name);
>> + memcpy(new_prop->value, old_prop->value, old_prop->length);
>> + new_prop->length = old_prop->length;
>> +
>> + return new_prop;
>> +}

2009-09-14 18:20:09

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 5/5] kernel handling of CPU DLPAR

Andrey Panin wrote:
> On 254, 09 11, 2009 at 04:15:33PM -0500, Nathan Fontenot wrote:
>> This adds the capability to DLPAR add and remove CPUs from the kernel. The
>> creates two new files /sys/devices/system/cpu/probe and
>> /sys/devices/system/cpu/release to handle the DLPAR addition and
>> removal of
>> CPUs respectively.
>>
>> CPU DLPAR add is accomplished by writing the drc-index of the CPU to the
>> probe file, and removal is done by writing the device-tree path of the cpu
>> to the release file.
>>
>> Signed-off-by: Nathan Fontenot <[email protected]>
>
>> +static ssize_t cpu_probe_store(struct class *class, const char *buf,
>> + size_t count)
>> +{
>> + struct device_node *dn;
>> + u32 drc_index;
>> + char *cpu_name;
>> + int rc;
>> +
>> + drc_index = simple_strtoull(buf, NULL, 0);
>> + if (!drc_index)
>> + return -EINVAL;
>> +
>> + rc = acquire_drc(drc_index);
>> + if (rc)
>> + return rc;
>> +
>> + dn = configure_connector(drc_index);
>> + if (!dn) {
>> + release_drc(drc_index);
>> + return rc;
>> + }
>> +
>> + /* fixup dn name */
>> + cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus/") + 1,
>> + GFP_KERNEL);
>
> Unchecked memory allocation with immediate crash in case of failure.

Yep, thats a bad thing. I'll fix this in an updated patch. thanks.

-Nathan

>
>> + sprintf(cpu_name, "/cpus/%s", dn->full_name);
>> + kfree(dn->full_name);
>> + dn->full_name = cpu_name;
>> +
>> + rc = add_device_tree_nodes(dn);
>> + if (rc)
>> + release_drc(drc_index);
>> +
>> + return rc ? rc : count;
>> +}
>> +
>> +static ssize_t cpu_release_store(struct class *class, const char *buf,
>> + size_t count)
>> +{
>> + struct device_node *dn;
>> + u32 *drc_index;
>> + int rc;
>> +
>> + dn = of_find_node_by_path(buf);
>> + if (!dn)
>> + return -EINVAL;
>> +
>> + drc_index = (u32 *)of_get_property(dn, "ibm,my-drc-index", NULL);
>> + if (!drc_index) {
>> + of_node_put(dn);
>> + return -EINVAL;
>> + }
>> +
>> + rc = release_drc(*drc_index);
>> + if (rc) {
>> + of_node_put(dn);
>> + return rc;
>> + }
>> +
>> + rc = remove_device_tree_nodes(dn);
>> + if (rc)
>> + acquire_drc(*drc_index);
>> +
>> + of_node_put(dn);
>> + return rc? rc : count;
>> +}
>> +
>> static struct class_attribute class_attr_mem_release =
>> __ATTR(release, S_IWUSR, NULL, memory_release_store);
>> +static struct class_attribute class_attr_cpu_probe =
>> + __ATTR(probe, S_IWUSR, NULL, cpu_probe_store);
>> +static struct class_attribute class_attr_cpu_release =
>> + __ATTR(release, S_IWUSR, NULL, cpu_release_store);
>>
>> static int pseries_dlpar_init(void)
>> {
>> @@ -576,6 +648,18 @@
>> printk(KERN_INFO "DLPAR: Could not create sysfs memory "
>> "release file\n");
>>
>> + rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
>> + &class_attr_cpu_probe.attr);
>> + if (rc)
>> + printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
>> + "probe file\n");
>> +
>> + rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
>> + &class_attr_cpu_release.attr);
>> + if (rc)
>> + printk(KERN_INFO "DLPAR: Could not create sysfs cpu "
>> + "release file\n");
>> +
>> return 0;
>> }
>> __initcall(pseries_dlpar_init);
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>>

2009-09-14 18:22:19

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 0/5] kernel handling of dynamic logical partitioning

Daniel Walker wrote:
> On Fri, 2009-09-11 at 16:08 -0500, Nathan Fontenot wrote:
>> am cc'ing lkml.
>>
>> Patches include in this set:
>> 1/5 - DLPAR infrastructure for powerpc/pseries platform.
>> 2/5 - Move the of_drconf_cell struct to prom.h
>> 3/5 - Export the memory sysdev class
>> 4/5 - Memory DLPAR handling
>> 5/5 - CPU DLPAR handling
>>
>
> It looks like a couple of your patches have some checkpatch issues..
> Could you run these through scripts/checkpatch.pl and clean up any
> problems it raises ? Specifically patches 1, 4, and 5 ..
>

thanks for checking this, I obviously forgot. I'll post updated patches
(after running checkpatch on them) with fixes.

-Nathan

2009-09-14 18:23:17

by Daniel Walker

[permalink] [raw]
Subject: Re: [PATCH 0/5] kernel handling of dynamic logical partitioning

On Mon, 2009-09-14 at 13:22 -0500, Nathan Fontenot wrote:
> Daniel Walker wrote:
> > On Fri, 2009-09-11 at 16:08 -0500, Nathan Fontenot wrote:
> >> am cc'ing lkml.
> >>
> >> Patches include in this set:
> >> 1/5 - DLPAR infrastructure for powerpc/pseries platform.
> >> 2/5 - Move the of_drconf_cell struct to prom.h
> >> 3/5 - Export the memory sysdev class
> >> 4/5 - Memory DLPAR handling
> >> 5/5 - CPU DLPAR handling
> >>
> >
> > It looks like a couple of your patches have some checkpatch issues..
> > Could you run these through scripts/checkpatch.pl and clean up any
> > problems it raises ? Specifically patches 1, 4, and 5 ..
> >
>
> thanks for checking this, I obviously forgot. I'll post updated patches
> (after running checkpatch on them) with fixes.

Ok, thanks..

Daniel

2009-09-14 18:30:59

by Brian King

[permalink] [raw]
Subject: Re: [PATCH 1/5] dynamic logical partitioning infrastructure

Nathan Fontenot wrote:
> +#include <linux/kernel.h>
> +#include <linux/kref.h>
> +#include <linux/notifier.h>
> +#include <linux/proc_fs.h>
> +#include <linux/spinlock.h>
> +
> +#include <asm/prom.h>
> +#include <asm/machdep.h>
> +#include <asm/uaccess.h>
> +#include <asm/rtas.h>
> +#include <asm/pSeries_reconfig.h>
> +
> +#define CFG_CONN_WORK_SIZE 4096
> +static char workarea[CFG_CONN_WORK_SIZE];
> +spinlock_t workarea_lock;

This can be:

static DEFINE_SPINLOCK(workarea_lock);

Then you can get rid of the runtime initializer.

> +
> +int release_drc(u32 drc_index)
> +{
> + int dr_status, rc;
> +
> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
> + DR_ENTITY_SENSE, drc_index);
> + if (rc || dr_status != DR_ENTITY_PRESENT)
> + return -1;
> +
> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
> + if (rc)
> + return -1;
> +
> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
> + if (rc) {
> + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
> + return -1;
> + }

Is there a better return value here that might be more descriptive than -1?


> +
> + return 0;
> +}
> +
> +static int pseries_dlpar_init(void)
> +{
> + spin_lock_init(&workarea_lock);
> +
> + if (!machine_is(pseries))
> + return 0;

What's the point of this if check if you return 0 either way?

> +
> + return 0;
> +}
> +__initcall(pseries_dlpar_init);


> Index: powerpc/arch/powerpc/platforms/pseries/reconfig.c
> ===================================================================
> --- powerpc.orig/arch/powerpc/platforms/pseries/reconfig.c 2009-09-11
> 12:43:39.000000000 -0500
> +++ powerpc/arch/powerpc/platforms/pseries/reconfig.c 2009-09-11
> 12:51:52.000000000 -0500
> @@ -95,7 +95,7 @@
> return parent;
> }
>
> -static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
> +struct blocking_notifier_head pSeries_reconfig_chain =
> BLOCKING_NOTIFIER_INIT(pSeries_reconfig_chain);

Can't this just be?

BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);

--
Brian King
Linux on Power Virtualization
IBM Linux Technology Center


2009-09-15 14:15:16

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 1/5] dynamic logical partitioning infrastructure

Brian King wrote:
> Nathan Fontenot wrote:
>> +#include <linux/kernel.h>
>> +#include <linux/kref.h>
>> +#include <linux/notifier.h>
>> +#include <linux/proc_fs.h>
>> +#include <linux/spinlock.h>
>> +
>> +#include <asm/prom.h>
>> +#include <asm/machdep.h>
>> +#include <asm/uaccess.h>
>> +#include <asm/rtas.h>
>> +#include <asm/pSeries_reconfig.h>
>> +
>> +#define CFG_CONN_WORK_SIZE 4096
>> +static char workarea[CFG_CONN_WORK_SIZE];
>> +spinlock_t workarea_lock;
>
> This can be:
>
> static DEFINE_SPINLOCK(workarea_lock);
>
> Then you can get rid of the runtime initializer.

Good catch, I will fix it in the updated patches.

>
>> +
>> +int release_drc(u32 drc_index)
>> +{
>> + int dr_status, rc;
>> +
>> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
>> + DR_ENTITY_SENSE, drc_index);
>> + if (rc || dr_status != DR_ENTITY_PRESENT)
>> + return -1;
>> +
>> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
>> + if (rc)
>> + return -1;
>> +
>> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
>> + if (rc) {
>> + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
>> + return -1;
>> + }
>
> Is there a better return value here that might be more descriptive than -1?

Yes, I could return the rtas error code to the user to allow the caller to
evaluate it if they wanted to. For what I am doing I am only concerned with
success/failure so I did not deal with returning anything other than -1.

I'll update the next patch to return the rtas error for failures and 0
for success.

>
>
>> +
>> + return 0;
>> +}
>> +
>> +static int pseries_dlpar_init(void)
>> +{
>> + spin_lock_init(&workarea_lock);
>> +
>> + if (!machine_is(pseries))
>> + return 0;
>
> What's the point of this if check if you return 0 either way?

Yes, it seems a bit odd here, but in patches later in this series I
add additional initialization steps after the machine_is() check
such that it makes sense to bail out here if the check fails.

>
>> +
>> + return 0;
>> +}
>> +__initcall(pseries_dlpar_init);
>
>
>> Index: powerpc/arch/powerpc/platforms/pseries/reconfig.c
>> ===================================================================
>> --- powerpc.orig/arch/powerpc/platforms/pseries/reconfig.c 2009-09-11
>> 12:43:39.000000000 -0500
>> +++ powerpc/arch/powerpc/platforms/pseries/reconfig.c 2009-09-11
>> 12:51:52.000000000 -0500
>> @@ -95,7 +95,7 @@
>> return parent;
>> }
>>
>> -static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
>> +struct blocking_notifier_head pSeries_reconfig_chain =
>> BLOCKING_NOTIFIER_INIT(pSeries_reconfig_chain);
>
> Can't this just be?
>
> BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
>

I think I tried this and was having issues, I don't remember what they
were though. I will try to fix this in the updated patch.

-Nathan Fontenot

2009-09-15 14:38:26

by Brian King

[permalink] [raw]
Subject: Re: [PATCH 2/5] move of_drconf_cell definition to prom.h

Nathan Fontenot wrote:
> Move the definition of the of_drconf_cell struct from numa.c to prom.h.
> This
> is needed so that we can parse the ibm,dynamic-memory device-tree property
> when DLPAR adding and removing memory.
>
> Signed-off-by: Nathan Fontenot <[email protected]>
> ---
>
> Index: powerpc/arch/powerpc/include/asm/prom.h
> ===================================================================
> --- powerpc.orig/arch/powerpc/include/asm/prom.h 2009-09-11
> 12:43:39.000000000 -0500
> +++ powerpc/arch/powerpc/include/asm/prom.h 2009-09-11
> 12:52:11.000000000 -0500
> @@ -349,6 +349,18 @@
> */
> extern void __iomem *of_iomap(struct device_node *device, int index);
>
> +struct of_drconf_cell {
> + u64 base_addr;
> + u32 drc_index;
> + u32 reserved;
> + u32 aa_index;
> + u32 flags;
> +};

Might as well fix this up to use a tab instead of spaces for the indentation
if you are moving it.

> +
> +#define DRCONF_MEM_ASSIGNED 0x00000008
> +#define DRCONF_MEM_AI_INVALID 0x00000040
> +#define DRCONF_MEM_RESERVED 0x00000080
> +
> /*
> * NB: This is here while we transition from using asm/prom.h
> * to linux/of.h
> Index: powerpc/arch/powerpc/mm/numa.c
> ===================================================================
> --- powerpc.orig/arch/powerpc/mm/numa.c 2009-09-11 12:43:39.000000000
> -0500
> +++ powerpc/arch/powerpc/mm/numa.c 2009-09-11 12:52:11.000000000 -0500
> @@ -296,18 +296,6 @@
> return result;
> }
>
> -struct of_drconf_cell {
> - u64 base_addr;
> - u32 drc_index;
> - u32 reserved;
> - u32 aa_index;
> - u32 flags;
> -};
> -
> -#define DRCONF_MEM_ASSIGNED 0x00000008
> -#define DRCONF_MEM_AI_INVALID 0x00000040
> -#define DRCONF_MEM_RESERVED 0x00000080
> -
> /*
> * Read the next lmb list entry from the ibm,dynamic-memory property
> * and return the information in the provided of_drconf_cell structure.
> _______________________________________________
> Linuxppc-dev mailing list
> [email protected]
> https://lists.ozlabs.org/listinfo/linuxppc-dev


--
Brian King
Linux on Power Virtualization
IBM Linux Technology Center

2009-09-15 14:49:01

by Brian King

[permalink] [raw]
Subject: Re: [PATCH 5/5] kernel handling of CPU DLPAR

Nathan Fontenot wrote:
> +static ssize_t cpu_probe_store(struct class *class, const char *buf,
> + size_t count)
> +{
> + struct device_node *dn;
> + u32 drc_index;
> + char *cpu_name;
> + int rc;
> +
> + drc_index = simple_strtoull(buf, NULL, 0);

Can just use simple_strtoul here instead.

> + if (!drc_index)
> + return -EINVAL;
> +
> + rc = acquire_drc(drc_index);
> + if (rc)
> + return rc;
> +
> + dn = configure_connector(drc_index);
> + if (!dn) {
> + release_drc(drc_index);
> + return rc;
> + }
> +
> + /* fixup dn name */
> + cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus/") + 1,
> + GFP_KERNEL);
> + sprintf(cpu_name, "/cpus/%s", dn->full_name);
> + kfree(dn->full_name);
> + dn->full_name = cpu_name;
> +
> + rc = add_device_tree_nodes(dn);
> + if (rc)
> + release_drc(drc_index);
> +
> + return rc ? rc : count;
> +}
> +


--
Brian King
Linux on Power Virtualization
IBM Linux Technology Center

2009-09-16 01:33:46

by Michael Ellerman

[permalink] [raw]
Subject: Re: [PATCH 1/5] dynamic logical partitioning infrastructure

On Fri, 2009-09-11 at 16:10 -0500, Nathan Fontenot wrote:
> This patch provides the kernel DLPAR infrastructure in a new filed named
> dlpar.c. The functionality provided is for acquiring and releasing a
> resource from firmware and the parsing of information returned from the
> ibm,configure-connector rtas call. Additionally this exports the
> pSeries reconfiguration notifier chain so that it can be invoked when
> device tree updates are made.
>
> Signed-off-by: Nathan Fontenot <[email protected]>
> ---
>
> Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-09-11 12:51:52.000000000 -0500
> @@ -0,0 +1,416 @@
> +/*
> + * dlpar.c - support for dynamic reconfiguration (including PCI
> + * Hotplug and Dynamic Logical Partitioning on RPA platforms).
> + *
> + * Copyright (C) 2009 Nathan Fontenot
> + * Copyright (C) 2009 IBM Corporation
> + *
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/kref.h>
> +#include <linux/notifier.h>
> +#include <linux/proc_fs.h>
> +#include <linux/spinlock.h>
> +
> +#include <asm/prom.h>
> +#include <asm/machdep.h>
> +#include <asm/uaccess.h>
> +#include <asm/rtas.h>
> +#include <asm/pSeries_reconfig.h>
> +
> +#define CFG_CONN_WORK_SIZE 4096
> +static char workarea[CFG_CONN_WORK_SIZE];
> +spinlock_t workarea_lock;
> +
> +static struct property *parse_cc_property(char *workarea)
> +{
> + struct property *prop;
> + u32 *work_ints;
> + char *name;
> + char *value;
> +
> + prop = kzalloc(sizeof(*prop), GFP_KERNEL);
> + if (!prop)
> + return NULL;
> +
> + work_ints = (u32 *)workarea;
> + name = workarea + work_ints[2];
> + prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
> + if (!prop->name) {
> + kfree(prop);
> + return NULL;
> + }
> +
> + strcpy(prop->name, name);
> +
> + prop->length = work_ints[3];
> + value = workarea + work_ints[4];
> + prop->value = kzalloc(prop->length, GFP_KERNEL);


The use of work_ints is a bit opaque, it might be clearer if you define
a struct, something like:

struct cc_prop {
u32 ?;
u32 ?;
u32 name_offset;
u32 length;
u32 value_offset;
};

cc = (struct cc_prop *)workarea;

name = workarea + cc->name_offset;
..
prop->length = cc->length;
value = workarea + cc->value_offset;

etc.


Also I don't see any checking of the offsets into workarea (for name &
value), vs the size of workarea.

cheers


Attachments:
signature.asc (197.00 B)
This is a digitally signed message part

2009-09-17 14:45:29

by Nathan Fontenot

[permalink] [raw]
Subject: Re: [PATCH 1/5] dynamic logical partitioning infrastructure

Michael Ellerman wrote:
> On Fri, 2009-09-11 at 16:10 -0500, Nathan Fontenot wrote:
>> This patch provides the kernel DLPAR infrastructure in a new filed named
>> dlpar.c. The functionality provided is for acquiring and releasing a
>> resource from firmware and the parsing of information returned from the
>> ibm,configure-connector rtas call. Additionally this exports the
>> pSeries reconfiguration notifier chain so that it can be invoked when
>> device tree updates are made.
>>
>> Signed-off-by: Nathan Fontenot <[email protected]>
>> ---
>>
>> Index: powerpc/arch/powerpc/platforms/pseries/dlpar.c
>> ===================================================================
>> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
>> +++ powerpc/arch/powerpc/platforms/pseries/dlpar.c 2009-09-11 12:51:52.000000000 -0500
>> @@ -0,0 +1,416 @@
>> +/*
>> + * dlpar.c - support for dynamic reconfiguration (including PCI
>> + * Hotplug and Dynamic Logical Partitioning on RPA platforms).
>> + *
>> + * Copyright (C) 2009 Nathan Fontenot
>> + * Copyright (C) 2009 IBM Corporation
>> + *
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU General Public License version
>> + * 2 as published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/kref.h>
>> +#include <linux/notifier.h>
>> +#include <linux/proc_fs.h>
>> +#include <linux/spinlock.h>
>> +
>> +#include <asm/prom.h>
>> +#include <asm/machdep.h>
>> +#include <asm/uaccess.h>
>> +#include <asm/rtas.h>
>> +#include <asm/pSeries_reconfig.h>
>> +
>> +#define CFG_CONN_WORK_SIZE 4096
>> +static char workarea[CFG_CONN_WORK_SIZE];
>> +spinlock_t workarea_lock;
>> +
>> +static struct property *parse_cc_property(char *workarea)
>> +{
>> + struct property *prop;
>> + u32 *work_ints;
>> + char *name;
>> + char *value;
>> +
>> + prop = kzalloc(sizeof(*prop), GFP_KERNEL);
>> + if (!prop)
>> + return NULL;
>> +
>> + work_ints = (u32 *)workarea;
>> + name = workarea + work_ints[2];
>> + prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
>> + if (!prop->name) {
>> + kfree(prop);
>> + return NULL;
>> + }
>> +
>> + strcpy(prop->name, name);
>> +
>> + prop->length = work_ints[3];
>> + value = workarea + work_ints[4];
>> + prop->value = kzalloc(prop->length, GFP_KERNEL);
>
>
> The use of work_ints is a bit opaque, it might be clearer if you define
> a struct, something like:
>
> struct cc_prop {
> u32 ?;
> u32 ?;
> u32 name_offset;
> u32 length;
> u32 value_offset;
> };
>
> cc = (struct cc_prop *)workarea;
>
> name = workarea + cc->name_offset;
> ..
> prop->length = cc->length;
> value = workarea + cc->value_offset;
>

Good idea, and I agree that the use of the workarea/work_ints is a bit vague.
The current way works because sometimes its easier to think of the workarea
as a char buffer and sometimes as a int buffer.

I'll try to come up with something to make the parsing of the workarea buffer
easier to understand.

-Nathan Fontenot

> etc.
>
>
> Also I don't see any checking of the offsets into workarea (for name &
> value), vs the size of workarea.
>
> cheers
>