Hello.
These are parts of patches for new nodes addition v4.
When new node is added, new pgdat must be allocated and initialized.
But, ia64 has copies of node_data[] on each node. So, kernel has to
allocate not only pgdat but also its copies area. and all of copies
must be updated at hot-add. These are patches for it.
This patch is for 2.6.17-rc3-mm1.
Please apply.
------------------------------------------------------------
Change log from v4 of node hot-add.
- update for 2.6.17-rc3-mm1.
V4 of post is here.
<description>
http://marc.theaimsgroup.com/?l=linux-mm&m=114258404023573&w=2
<patches>
http://marc.theaimsgroup.com/?l=linux-mm&w=2&r=1&s=memory+hotplug+node+v.4.&q=b
--
Yasunori Goto
This is to refresh node_data[] array for ia64.
As I mentioned previous patches,
ia64 has copies of information of pgdat address array on each node
as per node data.
At v2 of node_add, this function used stop_machine_run() to update them.
(I wished that they were copied safety as much as possible.)
But, in this patch, this arrays are just copied simply, and
set node_online_map bit after completion of pgdat initialization.
So, kernel must touch NODE_DATA() macro after checking
node_online_map(). (Current code has already done it.)
This is more simple way for just hot-add.....
Note : It will be problem when hot-remove will occur,
because, even if online_map bit is set, kernel may
touch NODE_DATA() due to race condition. :-(
Signed-off-by: Yasunori Goto <[email protected]>
arch/ia64/mm/discontig.c | 24 +++++++++++++++++++-----
include/asm-ia64/nodedata.h | 12 ++++++++++++
include/linux/memory_hotplug.h | 4 +---
3 files changed, 32 insertions(+), 8 deletions(-)
Index: pgdat12/arch/ia64/mm/discontig.c
===================================================================
--- pgdat12.orig/arch/ia64/mm/discontig.c 2006-04-28 10:24:56.000000000 +0900
+++ pgdat12/arch/ia64/mm/discontig.c 2006-04-28 10:31:49.000000000 +0900
@@ -308,6 +308,17 @@ static void __init reserve_pernode_space
}
}
+static void __meminit scatter_node_data(void)
+{
+ pg_data_t **dst;
+ int node;
+
+ for_each_online_node(node){
+ dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
+ memcpy(dst, pgdat_list, sizeof(pgdat_list));
+ }
+}
+
/**
* initialize_pernode_data - fixup per-cpu & per-node pointers
*
@@ -320,11 +331,8 @@ static void __init initialize_pernode_da
{
int cpu, node;
- /* Copy the pg_data_t list to each node and init the node field */
- for_each_online_node(node) {
- memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
- sizeof(pgdat_list));
- }
+ scatter_node_data();
+
#ifdef CONFIG_SMP
/* Set the node_data pointer for each per-cpu struct */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
@@ -783,3 +791,9 @@ void __init paging_init(void)
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
+
+void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
+{
+ pgdat_list[update_node] = update_pgdat;
+ scatter_node_data();
+}
Index: pgdat12/include/asm-ia64/nodedata.h
===================================================================
--- pgdat12.orig/include/asm-ia64/nodedata.h 2006-04-28 10:24:51.000000000 +0900
+++ pgdat12/include/asm-ia64/nodedata.h 2006-04-28 10:27:40.000000000 +0900
@@ -47,6 +47,18 @@ struct ia64_node_data {
*/
#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid])
+/*
+ * LOCAL_DATA_ADDR - This is to calculate the address of other node's
+ * "local_node_data" at hot-plug phase. The local_node_data
+ * is pointed by per_cpu_page. Kernel usually use it for
+ * just executing cpu. However, when new node is hot-added,
+ * the addresses of local data for other nodes are necessary
+ * to update all of them.
+ */
+#define LOCAL_DATA_ADDR(pgdat) \
+ ((struct ia64_node_data *)((u64)(pgdat) + \
+ L1_CACHE_ALIGN(sizeof(struct pglist_data))))
+
#endif /* CONFIG_NUMA */
#endif /* _ASM_IA64_NODEDATA_H */
Index: pgdat12/include/linux/memory_hotplug.h
===================================================================
--- pgdat12.orig/include/linux/memory_hotplug.h 2006-04-28 10:24:51.000000000 +0900
+++ pgdat12/include/linux/memory_hotplug.h 2006-04-28 10:31:49.000000000 +0900
@@ -91,9 +91,7 @@ static inline pg_data_t *arch_alloc_node
static inline void arch_free_nodedata(pg_data_t *pgdat)
{
}
-static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
-}
+extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
--
Yasunori Goto
This is a patch to allocate pgdat and per node data area for ia64.
The size for them can be calculated by compute_pernodesize().
Signed-off-by: Yasunori Goto <[email protected]>
arch/ia64/mm/discontig.c | 16 ++++++++++++++--
include/linux/memory_hotplug.h | 9 ++-------
2 files changed, 16 insertions(+), 9 deletions(-)
Index: pgdat12/arch/ia64/mm/discontig.c
===================================================================
--- pgdat12.orig/arch/ia64/mm/discontig.c 2006-04-28 10:31:49.000000000 +0900
+++ pgdat12/arch/ia64/mm/discontig.c 2006-04-28 10:32:31.000000000 +0900
@@ -100,7 +100,7 @@ static int __init build_node_maps(unsign
* acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
* called yet. Note that node 0 will also count all non-existent cpus.
*/
-static int __init early_nr_cpus_node(int node)
+static int __meminit early_nr_cpus_node(int node)
{
int cpu, n = 0;
@@ -115,7 +115,7 @@ static int __init early_nr_cpus_node(int
* compute_pernodesize - compute size of pernode data
* @node: the node id.
*/
-static unsigned long __init compute_pernodesize(int node)
+static unsigned long __meminit compute_pernodesize(int node)
{
unsigned long pernodesize = 0, cpus;
@@ -792,6 +792,18 @@ void __init paging_init(void)
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
+pg_data_t *arch_alloc_nodedata(int nid)
+{
+ unsigned long size = compute_pernodesize(nid);
+
+ return kzalloc(size, GFP_KERNEL);
+}
+
+void arch_free_nodedata(pg_data_t *pgdat)
+{
+ kfree(pgdat);
+}
+
void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
{
pgdat_list[update_node] = update_pgdat;
Index: pgdat12/include/linux/memory_hotplug.h
===================================================================
--- pgdat12.orig/include/linux/memory_hotplug.h 2006-04-28 10:31:49.000000000 +0900
+++ pgdat12/include/linux/memory_hotplug.h 2006-04-28 10:33:17.000000000 +0900
@@ -84,13 +84,8 @@ static inline int memofy_add_physaddr_to
* Now, arch_free_nodedata() is just defined for error path of node_hot_add.
*
*/
-static inline pg_data_t *arch_alloc_nodedata(int nid)
-{
- return NULL;
-}
-static inline void arch_free_nodedata(pg_data_t *pgdat)
-{
-}
+extern pg_data_t *arch_alloc_nodedata(int nid);
+extern void arch_free_nodedata(pg_data_t *pgdat);
extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
--
Yasunori Goto
This is preparing patch to make common code for updating of NODE_DATA()
of ia64 between boottime and hotplug.
Current code remembers pgdat address in mem_data which is used at just boot
time. But its information can be used at hotplug time
by moving to global value.
The next patche use this array.
Signed-off-by: Yasunori Goto <[email protected]>
arch/ia64/mm/discontig.c | 19 ++++++++-----------
1 files changed, 8 insertions(+), 11 deletions(-)
Index: pgdat11/arch/ia64/mm/discontig.c
===================================================================
--- pgdat11.orig/arch/ia64/mm/discontig.c 2006-04-20 11:00:04.000000000 +0900
+++ pgdat11/arch/ia64/mm/discontig.c 2006-04-20 11:00:46.000000000 +0900
@@ -33,7 +33,6 @@
*/
struct early_node_data {
struct ia64_node_data *node_data;
- pg_data_t *pgdat;
unsigned long pernode_addr;
unsigned long pernode_size;
struct bootmem_data bootmem_data;
@@ -46,6 +45,8 @@ struct early_node_data {
static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
static nodemask_t memory_less_mask __initdata;
+static pg_data_t *pgdat_list[MAX_NUMNODES];
+
/*
* To prevent cache aliasing effects, align per-node structures so that they
* start at addresses that are strided by node number.
@@ -175,13 +176,13 @@ static void __init fill_pernode(int node
pernode += PERCPU_PAGE_SIZE * cpus;
pernode += node * L1_CACHE_BYTES;
- mem_data[node].pgdat = __va(pernode);
+ pgdat_list[node] = __va(pernode);
pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
mem_data[node].node_data = __va(pernode);
pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
- mem_data[node].pgdat->bdata = bdp;
+ pgdat_list[node]->bdata = bdp;
pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
cpu_data = per_cpu_node_setup(cpu_data, node);
@@ -268,7 +269,7 @@ static int __init find_pernode_space(uns
static int __init free_node_bootmem(unsigned long start, unsigned long len,
int node)
{
- free_bootmem_node(mem_data[node].pgdat, start, len);
+ free_bootmem_node(pgdat_list[node], start, len);
return 0;
}
@@ -287,7 +288,7 @@ static void __init reserve_pernode_space
int node;
for_each_online_node(node) {
- pg_data_t *pdp = mem_data[node].pgdat;
+ pg_data_t *pdp = pgdat_list[node];
if (node_isset(node, memory_less_mask))
continue;
@@ -317,12 +318,8 @@ static void __init reserve_pernode_space
*/
static void __init initialize_pernode_data(void)
{
- pg_data_t *pgdat_list[MAX_NUMNODES];
int cpu, node;
- for_each_online_node(node)
- pgdat_list[node] = mem_data[node].pgdat;
-
/* Copy the pg_data_t list to each node and init the node field */
for_each_online_node(node) {
memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
@@ -372,7 +369,7 @@ static void __init *memory_less_node_all
if (bestnode == -1)
bestnode = anynode;
- ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, pernodesize,
+ ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
return ptr;
@@ -476,7 +473,7 @@ void __init find_memory(void)
pernodesize = mem_data[node].pernode_size;
map = pernode + pernodesize;
- init_bootmem_node(mem_data[node].pgdat,
+ init_bootmem_node(pgdat_list[node],
map>>PAGE_SHIFT,
bdp->node_boot_start>>PAGE_SHIFT,
bdp->node_low_pfn);
--
Yasunori Goto