On Power systems with shared configurations of CPUs and memory, there
are some issues with association of additional CPUs and memory to nodes
when hot-adding resources. These patches address some of those problems.
powerpc/hotplug: On systems like PowerPC which allow 'hot-add' of CPU
or memory resources, it may occur that the new resources are to be
inserted into nodes that were not used for these resources at bootup.
In the kernel, any node that is used must be defined and initialized
at boot. In order to meet both needs, this patch adds a new kernel
command line option (numnodes=<int>) for use by the PowerPC
architecture-specific code that defines the maximum number of nodes
that the kernel will ever need in its current hardware environment.
The boot code that initializes nodes for PowerPC will read this value
and use it to ensure that all of the desired nodes are setup in the
'node_possible_map', and elsewhere.
powerpc/numa: Correct the currently broken capability to set the
topology for shared CPUs in LPARs. At boot time for shared CPU
lpars, the topology for each shared CPU is set to node zero, however,
this is now updated correctly using the Virtual Processor Home Node
(VPHN) capabilities information provided by the pHyp. The VPHN handling
in Linux is disabled, if PRRN handling is present.
Signed-off-by: Michael Bringmann <[email protected]>
Michael Bringmann (2):
powerpc/hotplug: Add option to define max nodes allowing dynamic
growth of resources.
powerpc/numa: Update CPU topology when VPHN enabled
---
Changes in V6:
-- Reorder some code to better eliminate unused functions in
conditional builds.
powerpc/hotplug: On systems like PowerPC which allow 'hot-add' of CPU
or memory resources, it may occur that the new resources are to be
inserted into nodes that were not used for these resources at bootup.
In the kernel, any node that is used must be defined and initialized
at boot. In order to meet both needs, this patch adds a new kernel
command line option (numnodes=<int>) for use by the PowerPC architecture-
specific code that defines the maximum number of nodes that the kernel
will ever need in its current hardware environment. The boot code that
initializes nodes for PowerPC will read this value and use it to ensure
that all of the desired nodes are setup in the 'node_possible_map', and
elsewhere.
Signed-off-by: Michael Bringmann <[email protected]>
---
---
arch/powerpc/mm/numa.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index e6f742d..0746d93 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -60,10 +60,27 @@
static int n_mem_addr_cells, n_mem_size_cells;
static int form1_affinity;
+#define TOPOLOGY_DEF_NUM_NODES 0
#define MAX_DISTANCE_REF_POINTS 4
static int distance_ref_points_depth;
static const __be32 *distance_ref_points;
static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static int topology_num_nodes = TOPOLOGY_DEF_NUM_NODES;
+
+/*
+ * Topology-related early parameters
+ */
+static int __init early_num_nodes(char *p)
+{
+ if (!p)
+ return 1;
+
+ topology_num_nodes = memparse(p, &p);
+ dbg("topology num nodes = 0x%d\n", topology_num_nodes);
+
+ return 0;
+}
+early_param("numnodes", early_num_nodes);
/*
* Allocate node_to_cpumask_map based on number of available nodes
@@ -892,6 +909,18 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
NODE_DATA(nid)->node_spanned_pages = spanned_pages;
}
+static void __init setup_min_nodes(void)
+{
+ int i, l = topology_num_nodes;
+
+ for (i = 0; i < l; i++) {
+ if (!node_possible(i)) {
+ setup_node_data(i, 0, 0);
+ node_set(i, node_possible_map);
+ }
+ }
+}
+
void __init initmem_init(void)
{
int nid, cpu;
@@ -911,6 +940,8 @@ void __init initmem_init(void)
*/
nodes_and(node_possible_map, node_possible_map, node_online_map);
+ setup_min_nodes();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
powerpc/numa: Correct the currently broken capability to set the
topology for shared CPUs in LPARs. At boot time for shared CPU
lpars, the topology for each shared CPU is set to node zero, however,
this is now updated correctly using the Virtual Processor Home Node
(VPHN) capabilities information provided by the pHyp.
Also, update initialization checks for device-tree attributes to
independently recognize PRRN or VPHN usage.
Signed-off-by: Michael Bringmann <[email protected]>
---
Changes in V6:
-- Place extern of timed_topology_update() proto under additional #ifdef
for hotplug-cpu.
---
arch/powerpc/include/asm/topology.h | 16 +++++++
arch/powerpc/mm/numa.c | 64 +++++++++++++++++++++++---
arch/powerpc/platforms/pseries/dlpar.c | 2 +
arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 +
4 files changed, 77 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 9cc6ec9..ae3cdd0 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -79,6 +79,22 @@ static inline int prrn_is_enabled(void)
}
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
+#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) && \
+ defined(CONFIG_HOTPLUG_CPU)
+extern int timed_topology_update(int nsecs);
+#else
+static int timed_topology_update(int nsecs)
+{
+ return 0;
+}
+#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR && CONFIG_HOTPLUG_CPU */
+
+#if defined(CONFIG_PPC_SPLPAR)
+extern void shared_topology_update(void);
+#else
+#define shared_topology_update() 0
+#endif /* CONFIG_PPC_SPLPAR */
+
#include <asm-generic/topology.h>
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0746d93..cf5992d 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -29,6 +29,7 @@
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/sched.h>
#include <asm/cputhreads.h>
#include <asm/sparsemem.h>
#include <asm/prom.h>
@@ -935,7 +936,7 @@ void __init initmem_init(void)
/*
* Reduce the possible NUMA nodes to the online NUMA nodes,
- * since we do not support node hotplug. This ensures that we
+ * since we do not support node hotplug. This ensures that we
* lower the maximum NUMA node ID to what is actually present.
*/
nodes_and(node_possible_map, node_possible_map, node_online_map);
@@ -1179,11 +1180,32 @@ struct topology_update_data {
int new_nid;
};
+#define TOPOLOGY_DEF_TIMER_SECS 60
+
static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
static cpumask_t cpu_associativity_changes_mask;
static int vphn_enabled;
static int prrn_enabled;
static void reset_topology_timer(void);
+static int topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
+static int topology_inited;
+static int topology_update_needed;
+
+/*
+ * Change polling interval for associativity changes.
+ */
+int timed_topology_update(int nsecs)
+{
+ if (nsecs > 0)
+ topology_timer_secs = nsecs;
+ else
+ topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
+
+ if (vphn_enabled)
+ reset_topology_timer();
+
+ return 0;
+}
/*
* Store the current values of the associativity change counters in the
@@ -1277,6 +1299,12 @@ static long vphn_get_associativity(unsigned long cpu,
"hcall_vphn() experienced a hardware fault "
"preventing VPHN. Disabling polling...\n");
stop_topology_update();
+ break;
+ case H_SUCCESS:
+ printk(KERN_INFO
+ "VPHN hcall succeeded. Reset polling...\n");
+ timed_topology_update(0);
+ break;
}
return rc;
@@ -1354,8 +1382,11 @@ int numa_update_cpu_topology(bool cpus_locked)
struct device *dev;
int weight, new_nid, i = 0;
- if (!prrn_enabled && !vphn_enabled)
+ if (!prrn_enabled && !vphn_enabled) {
+ if (!topology_inited)
+ topology_update_needed = 1;
return 0;
+ }
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
@@ -1394,6 +1425,8 @@ int numa_update_cpu_topology(bool cpus_locked)
cpumask_andnot(&cpu_associativity_changes_mask,
&cpu_associativity_changes_mask,
cpu_sibling_mask(cpu));
+ pr_info("Assoc chg gives same node %d for cpu%d\n",
+ new_nid, cpu);
cpu = cpu_last_thread_sibling(cpu);
continue;
}
@@ -1410,6 +1443,9 @@ int numa_update_cpu_topology(bool cpus_locked)
cpu = cpu_last_thread_sibling(cpu);
}
+ if (i)
+ updates[i-1].next = NULL;
+
pr_debug("Topology update for the following CPUs:\n");
if (cpumask_weight(&updated_cpus)) {
for (ud = &updates[0]; ud; ud = ud->next) {
@@ -1464,6 +1500,7 @@ int numa_update_cpu_topology(bool cpus_locked)
out:
kfree(updates);
+ topology_update_needed = 0;
return changed;
}
@@ -1483,6 +1520,14 @@ static void topology_schedule_update(void)
schedule_work(&topology_work);
}
+void shared_topology_update(void)
+{
+ if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ lppaca_shared_proc(get_lppaca()))
+ topology_schedule_update();
+}
+EXPORT_SYMBOL(shared_topology_update);
+
static void topology_timer_fn(unsigned long ignored)
{
if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
@@ -1499,7 +1544,7 @@ static void topology_timer_fn(unsigned long ignored)
static void reset_topology_timer(void)
{
topology_timer.data = 0;
- topology_timer.expires = jiffies + 60 * HZ;
+ topology_timer.expires = jiffies + topology_timer_secs * HZ;
mod_timer(&topology_timer, topology_timer.expires);
}
@@ -1549,15 +1594,14 @@ int start_topology_update(void)
if (firmware_has_feature(FW_FEATURE_PRRN)) {
if (!prrn_enabled) {
prrn_enabled = 1;
- vphn_enabled = 0;
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_register(&dt_update_nb);
#endif
}
- } else if (firmware_has_feature(FW_FEATURE_VPHN) &&
+ }
+ if (firmware_has_feature(FW_FEATURE_VPHN) &&
lppaca_shared_proc(get_lppaca())) {
if (!vphn_enabled) {
- prrn_enabled = 0;
vphn_enabled = 1;
setup_cpu_associativity_change_counters();
init_timer_deferrable(&topology_timer);
@@ -1580,7 +1624,8 @@ int stop_topology_update(void)
#ifdef CONFIG_SMP
rc = of_reconfig_notifier_unregister(&dt_update_nb);
#endif
- } else if (vphn_enabled) {
+ }
+ if (vphn_enabled) {
vphn_enabled = 0;
rc = del_timer_sync(&topology_timer);
}
@@ -1646,6 +1691,11 @@ static int topology_update_init(void)
if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
return -ENOMEM;
+ topology_inited = 1;
+ if (topology_update_needed)
+ bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask),
+ nr_cpumask_bits);
+
return 0;
}
device_initcall(topology_update_init);
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index bda18d8..0219cd3 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -592,6 +592,8 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,
static int __init pseries_dlpar_init(void)
{
+ shared_topology_update();
+
pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue",
WQ_UNBOUND, 1);
return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7bc0e91..96c02d0 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -356,6 +356,7 @@ static int dlpar_online_cpu(struct device_node *dn)
BUG_ON(get_cpu_current_state(cpu)
!= CPU_STATE_OFFLINE);
cpu_maps_update_done();
+ timed_topology_update(1);
rc = device_online(get_cpu_device(cpu));
if (rc)
goto out;
@@ -522,6 +523,7 @@ static int dlpar_offline_cpu(struct device_node *dn)
set_preferred_offline_state(cpu,
CPU_STATE_OFFLINE);
cpu_maps_update_done();
+ timed_topology_update(1);
rc = device_offline(get_cpu_device(cpu));
if (rc)
goto out;
Michael Bringmann <[email protected]> writes:
> powerpc/hotplug: On systems like PowerPC which allow 'hot-add' of CPU
> or memory resources, it may occur that the new resources are to be
> inserted into nodes that were not used for these resources at bootup.
> In the kernel, any node that is used must be defined and initialized
> at boot. In order to meet both needs, this patch adds a new kernel
> command line option (numnodes=<int>) for use by the PowerPC architecture-
Sorry, that's a hack.
I thought you were going to use firmware properties to find the set of
possible nodes. Did that not work?
cheers
Michael Bringmann <[email protected]> writes:
> On Power systems with shared configurations of CPUs and memory, there
> are some issues with association of additional CPUs and memory to nodes
> when hot-adding resources. These patches address some of those problems.
>
> powerpc/hotplug: On systems like PowerPC which allow 'hot-add' of CPU
> or memory resources, it may occur that the new resources are to be
> inserted into nodes that were not used for these resources at bootup.
> In the kernel, any node that is used must be defined and initialized
> at boot. In order to meet both needs, this patch adds a new kernel
> command line option (numnodes=<int>) for use by the PowerPC
> architecture-specific code that defines the maximum number of nodes
> that the kernel will ever need in its current hardware environment.
> The boot code that initializes nodes for PowerPC will read this value
> and use it to ensure that all of the desired nodes are setup in the
> 'node_possible_map', and elsewhere.
>
> powerpc/numa: Correct the currently broken capability to set the
> topology for shared CPUs in LPARs. At boot time for shared CPU
> lpars, the topology for each shared CPU is set to node zero, however,
> this is now updated correctly using the Virtual Processor Home Node
> (VPHN) capabilities information provided by the pHyp. The VPHN handling
> in Linux is disabled, if PRRN handling is present.
>
> Signed-off-by: Michael Bringmann <[email protected]>
>
> Michael Bringmann (2):
> powerpc/hotplug: Add option to define max nodes allowing dynamic
> growth of resources.
> powerpc/numa: Update CPU topology when VPHN enabled
> ---
> Changes in V6:
> -- Reorder some code to better eliminate unused functions in
> conditional builds.
What changed between yesterday's V6 and this V6?
If you're going to resend, please bump the version number, we have tools
that parse the subject and version, and resending multiple times with
the same number breaks those.
cheers
One of the patches was doubled and sent twice yesterday.
Will update number regardless in future.
On 06/21/2017 04:54 AM, Michael Ellerman wrote:
> Michael Bringmann <[email protected]> writes:
>
>> On Power systems with shared configurations of CPUs and memory, there
>> are some issues with association of additional CPUs and memory to nodes
>> when hot-adding resources. These patches address some of those problems.
>>
>> powerpc/hotplug: On systems like PowerPC which allow 'hot-add' of CPU
>> or memory resources, it may occur that the new resources are to be
>> inserted into nodes that were not used for these resources at bootup.
>> In the kernel, any node that is used must be defined and initialized
>> at boot. In order to meet both needs, this patch adds a new kernel
>> command line option (numnodes=<int>) for use by the PowerPC
>> architecture-specific code that defines the maximum number of nodes
>> that the kernel will ever need in its current hardware environment.
>> The boot code that initializes nodes for PowerPC will read this value
>> and use it to ensure that all of the desired nodes are setup in the
>> 'node_possible_map', and elsewhere.
>>
>> powerpc/numa: Correct the currently broken capability to set the
>> topology for shared CPUs in LPARs. At boot time for shared CPU
>> lpars, the topology for each shared CPU is set to node zero, however,
>> this is now updated correctly using the Virtual Processor Home Node
>> (VPHN) capabilities information provided by the pHyp. The VPHN handling
>> in Linux is disabled, if PRRN handling is present.
>>
>> Signed-off-by: Michael Bringmann <[email protected]>
>>
>> Michael Bringmann (2):
>> powerpc/hotplug: Add option to define max nodes allowing dynamic
>> growth of resources.
>> powerpc/numa: Update CPU topology when VPHN enabled
>> ---
>> Changes in V6:
>> -- Reorder some code to better eliminate unused functions in
>> conditional builds.
>
> What changed between yesterday's V6 and this V6?
>
> If you're going to resend, please bump the version number, we have tools
> that parse the subject and version, and resending multiple times with
> the same number breaks those.
>
> cheers
>
>
--
Michael W. Bringmann
Linux Technology Center
IBM Corporation
Tie-Line 363-5196
External: (512) 286-5196
Cell: (512) 466-0650
[email protected]
On 06/21/2017 04:52 AM, Michael Ellerman wrote:
> Michael Bringmann <[email protected]> writes:
>
>> powerpc/hotplug: On systems like PowerPC which allow 'hot-add' of CPU
>> or memory resources, it may occur that the new resources are to be
>> inserted into nodes that were not used for these resources at bootup.
>> In the kernel, any node that is used must be defined and initialized
>> at boot. In order to meet both needs, this patch adds a new kernel
>> command line option (numnodes=<int>) for use by the PowerPC architecture-
>
> Sorry, that's a hack.
It is an intermediate step pending the provision of the firmware properties
under discussion that were mentioned by Nathan Fontenot last week.
> I thought you were going to use firmware properties to find the set of
> possible nodes. Did that not work?
Inference based on the current set of firmware properties for associativity
is insufficient. That is partly the reason for the properties mentioned by
Nathan last week. The current firmware properties only cover what is known
at boot time. They do not cover expansions from DLPAR / hot-add operations
which can add up to everything else on the system.
> cheers
Regards,
--
Michael W. Bringmann
Linux Technology Center
IBM Corporation
Tie-Line 363-5196
External: (512) 286-5196
Cell: (512) 466-0650
[email protected]
Hi Michael,
[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.12-rc6 next-20170623]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/Michael-Bringmann/powerpc-hotplug-Ensure-enough-nodes-avail-for-operations/20170621-141803
base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-storcenter_defconfig (attached as .config)
compiler: powerpc-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc
All errors (new ones prefixed by >>):
In file included from include/linux/topology.h:35:0,
from include/linux/gfp.h:8,
from include/linux/idr.h:16,
from include/linux/kernfs.h:14,
from include/linux/sysfs.h:15,
from include/linux/kobject.h:21,
from include/linux/of.h:21,
from include/linux/irqdomain.h:34,
from arch/powerpc/include/asm/irq.h:12,
from arch/powerpc/include/asm/prom.h:19,
from arch/powerpc/kernel/cputable.c:22:
>> arch/powerpc/include/asm/topology.h:85:12: error: 'timed_topology_update' defined but not used [-Werror=unused-function]
static int timed_topology_update(int nsecs)
^~~~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
vim +/timed_topology_update +85 arch/powerpc/include/asm/topology.h
79 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
80
81 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) && \
82 defined(CONFIG_HOTPLUG_CPU)
83 extern int timed_topology_update(int nsecs);
84 #else
> 85 static int timed_topology_update(int nsecs)
86 {
87 return 0;
88 }
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation