2006-05-02 11:31:13

by Yasunori Goto

[permalink] [raw]
Subject: [Patch 000/003] pgdat allocation and update for ia64 of memory hotplug.

Hello.

These are parts of patches for new nodes addition v4.
When new node is added, new pgdat must be allocated and initialized.
But, ia64 has copies of node_data[] on each node. So, kernel has to
allocate not only pgdat but also its copies area. and all of copies
must be updated at hot-add. These are patches for it.

This patch is for 2.6.17-rc3-mm1.

Please apply.

------------------------------------------------------------

Change log from v4 of node hot-add.
- update for 2.6.17-rc3-mm1.

V4 of post is here.
<description>
http://marc.theaimsgroup.com/?l=linux-mm&m=114258404023573&w=2
<patches>
http://marc.theaimsgroup.com/?l=linux-mm&w=2&r=1&s=memory+hotplug+node+v.4.&q=b

--
Yasunori Goto



2006-05-02 11:36:26

by Yasunori Goto

[permalink] [raw]
Subject: [Patch 002/003] pgdat allocation and update for ia64 of memory hotplug. (update pgdat address array)

This is to refresh node_data[] array for ia64.
As I mentioned previous patches,
ia64 has copies of information of pgdat address array on each node
as per node data.

At v2 of node_add, this function used stop_machine_run() to update them.
(I wished that they were copied safety as much as possible.)
But, in this patch, this arrays are just copied simply, and
set node_online_map bit after completion of pgdat initialization.

So, kernel must touch NODE_DATA() macro after checking
node_online_map(). (Current code has already done it.)
This is more simple way for just hot-add.....

Note : It will be problem when hot-remove will occur,
because, even if online_map bit is set, kernel may
touch NODE_DATA() due to race condition. :-(


Signed-off-by: Yasunori Goto <[email protected]>

arch/ia64/mm/discontig.c | 24 +++++++++++++++++++-----
include/asm-ia64/nodedata.h | 12 ++++++++++++
include/linux/memory_hotplug.h | 4 +---
3 files changed, 32 insertions(+), 8 deletions(-)

Index: pgdat12/arch/ia64/mm/discontig.c
===================================================================
--- pgdat12.orig/arch/ia64/mm/discontig.c 2006-04-28 10:24:56.000000000 +0900
+++ pgdat12/arch/ia64/mm/discontig.c 2006-04-28 10:31:49.000000000 +0900
@@ -308,6 +308,17 @@ static void __init reserve_pernode_space
}
}

+static void __meminit scatter_node_data(void)
+{
+ pg_data_t **dst;
+ int node;
+
+ for_each_online_node(node){
+ dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs;
+ memcpy(dst, pgdat_list, sizeof(pgdat_list));
+ }
+}
+
/**
* initialize_pernode_data - fixup per-cpu & per-node pointers
*
@@ -320,11 +331,8 @@ static void __init initialize_pernode_da
{
int cpu, node;

- /* Copy the pg_data_t list to each node and init the node field */
- for_each_online_node(node) {
- memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
- sizeof(pgdat_list));
- }
+ scatter_node_data();
+
#ifdef CONFIG_SMP
/* Set the node_data pointer for each per-cpu struct */
for (cpu = 0; cpu < NR_CPUS; cpu++) {
@@ -783,3 +791,9 @@ void __init paging_init(void)

zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
+
+void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
+{
+ pgdat_list[update_node] = update_pgdat;
+ scatter_node_data();
+}
Index: pgdat12/include/asm-ia64/nodedata.h
===================================================================
--- pgdat12.orig/include/asm-ia64/nodedata.h 2006-04-28 10:24:51.000000000 +0900
+++ pgdat12/include/asm-ia64/nodedata.h 2006-04-28 10:27:40.000000000 +0900
@@ -47,6 +47,18 @@ struct ia64_node_data {
*/
#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid])

+/*
+ * LOCAL_DATA_ADDR - This is to calculate the address of other node's
+ * "local_node_data" at hot-plug phase. The local_node_data
+ * is pointed by per_cpu_page. Kernel usually use it for
+ * just executing cpu. However, when new node is hot-added,
+ * the addresses of local data for other nodes are necessary
+ * to update all of them.
+ */
+#define LOCAL_DATA_ADDR(pgdat) \
+ ((struct ia64_node_data *)((u64)(pgdat) + \
+ L1_CACHE_ALIGN(sizeof(struct pglist_data))))
+
#endif /* CONFIG_NUMA */

#endif /* _ASM_IA64_NODEDATA_H */
Index: pgdat12/include/linux/memory_hotplug.h
===================================================================
--- pgdat12.orig/include/linux/memory_hotplug.h 2006-04-28 10:24:51.000000000 +0900
+++ pgdat12/include/linux/memory_hotplug.h 2006-04-28 10:31:49.000000000 +0900
@@ -91,9 +91,7 @@ static inline pg_data_t *arch_alloc_node
static inline void arch_free_nodedata(pg_data_t *pgdat)
{
}
-static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
-{
-}
+extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);

#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */


--
Yasunori Goto


2006-05-02 11:36:27

by Yasunori Goto

[permalink] [raw]
Subject: [Patch 003/003] pgdat allocation and update for ia64 of memory hotplug.(allocate pgdat and per node data)

This is a patch to allocate pgdat and per node data area for ia64.
The size for them can be calculated by compute_pernodesize().

Signed-off-by: Yasunori Goto <[email protected]>

arch/ia64/mm/discontig.c | 16 ++++++++++++++--
include/linux/memory_hotplug.h | 9 ++-------
2 files changed, 16 insertions(+), 9 deletions(-)

Index: pgdat12/arch/ia64/mm/discontig.c
===================================================================
--- pgdat12.orig/arch/ia64/mm/discontig.c 2006-04-28 10:31:49.000000000 +0900
+++ pgdat12/arch/ia64/mm/discontig.c 2006-04-28 10:32:31.000000000 +0900
@@ -100,7 +100,7 @@ static int __init build_node_maps(unsign
* acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
* called yet. Note that node 0 will also count all non-existent cpus.
*/
-static int __init early_nr_cpus_node(int node)
+static int __meminit early_nr_cpus_node(int node)
{
int cpu, n = 0;

@@ -115,7 +115,7 @@ static int __init early_nr_cpus_node(int
* compute_pernodesize - compute size of pernode data
* @node: the node id.
*/
-static unsigned long __init compute_pernodesize(int node)
+static unsigned long __meminit compute_pernodesize(int node)
{
unsigned long pernodesize = 0, cpus;

@@ -792,6 +792,18 @@ void __init paging_init(void)
zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}

+pg_data_t *arch_alloc_nodedata(int nid)
+{
+ unsigned long size = compute_pernodesize(nid);
+
+ return kzalloc(size, GFP_KERNEL);
+}
+
+void arch_free_nodedata(pg_data_t *pgdat)
+{
+ kfree(pgdat);
+}
+
void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
{
pgdat_list[update_node] = update_pgdat;
Index: pgdat12/include/linux/memory_hotplug.h
===================================================================
--- pgdat12.orig/include/linux/memory_hotplug.h 2006-04-28 10:31:49.000000000 +0900
+++ pgdat12/include/linux/memory_hotplug.h 2006-04-28 10:33:17.000000000 +0900
@@ -84,13 +84,8 @@ static inline int memofy_add_physaddr_to
* Now, arch_free_nodedata() is just defined for error path of node_hot_add.
*
*/
-static inline pg_data_t *arch_alloc_nodedata(int nid)
-{
- return NULL;
-}
-static inline void arch_free_nodedata(pg_data_t *pgdat)
-{
-}
+extern pg_data_t *arch_alloc_nodedata(int nid);
+extern void arch_free_nodedata(pg_data_t *pgdat);
extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);

#else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */

--
Yasunori Goto


2006-05-02 11:37:15

by Yasunori Goto

[permalink] [raw]
Subject: [Patch 001/003] pgdat allocation and update for ia64 of memory hotplug.(hold pgdat address at system running)

This is preparing patch to make common code for updating of NODE_DATA()
of ia64 between boottime and hotplug.

Current code remembers pgdat address in mem_data which is used at just boot
time. But its information can be used at hotplug time
by moving to global value.
The next patche use this array.


Signed-off-by: Yasunori Goto <[email protected]>

arch/ia64/mm/discontig.c | 19 ++++++++-----------
1 files changed, 8 insertions(+), 11 deletions(-)

Index: pgdat11/arch/ia64/mm/discontig.c
===================================================================
--- pgdat11.orig/arch/ia64/mm/discontig.c 2006-04-20 11:00:04.000000000 +0900
+++ pgdat11/arch/ia64/mm/discontig.c 2006-04-20 11:00:46.000000000 +0900
@@ -33,7 +33,6 @@
*/
struct early_node_data {
struct ia64_node_data *node_data;
- pg_data_t *pgdat;
unsigned long pernode_addr;
unsigned long pernode_size;
struct bootmem_data bootmem_data;
@@ -46,6 +45,8 @@ struct early_node_data {
static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
static nodemask_t memory_less_mask __initdata;

+static pg_data_t *pgdat_list[MAX_NUMNODES];
+
/*
* To prevent cache aliasing effects, align per-node structures so that they
* start at addresses that are strided by node number.
@@ -175,13 +176,13 @@ static void __init fill_pernode(int node
pernode += PERCPU_PAGE_SIZE * cpus;
pernode += node * L1_CACHE_BYTES;

- mem_data[node].pgdat = __va(pernode);
+ pgdat_list[node] = __va(pernode);
pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));

mem_data[node].node_data = __va(pernode);
pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));

- mem_data[node].pgdat->bdata = bdp;
+ pgdat_list[node]->bdata = bdp;
pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));

cpu_data = per_cpu_node_setup(cpu_data, node);
@@ -268,7 +269,7 @@ static int __init find_pernode_space(uns
static int __init free_node_bootmem(unsigned long start, unsigned long len,
int node)
{
- free_bootmem_node(mem_data[node].pgdat, start, len);
+ free_bootmem_node(pgdat_list[node], start, len);

return 0;
}
@@ -287,7 +288,7 @@ static void __init reserve_pernode_space
int node;

for_each_online_node(node) {
- pg_data_t *pdp = mem_data[node].pgdat;
+ pg_data_t *pdp = pgdat_list[node];

if (node_isset(node, memory_less_mask))
continue;
@@ -317,12 +318,8 @@ static void __init reserve_pernode_space
*/
static void __init initialize_pernode_data(void)
{
- pg_data_t *pgdat_list[MAX_NUMNODES];
int cpu, node;

- for_each_online_node(node)
- pgdat_list[node] = mem_data[node].pgdat;
-
/* Copy the pg_data_t list to each node and init the node field */
for_each_online_node(node) {
memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
@@ -372,7 +369,7 @@ static void __init *memory_less_node_all
if (bestnode == -1)
bestnode = anynode;

- ptr = __alloc_bootmem_node(mem_data[bestnode].pgdat, pernodesize,
+ ptr = __alloc_bootmem_node(pgdat_list[bestnode], pernodesize,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));

return ptr;
@@ -476,7 +473,7 @@ void __init find_memory(void)
pernodesize = mem_data[node].pernode_size;
map = pernode + pernodesize;

- init_bootmem_node(mem_data[node].pgdat,
+ init_bootmem_node(pgdat_list[node],
map>>PAGE_SHIFT,
bdp->node_boot_start>>PAGE_SHIFT,
bdp->node_low_pfn);

--
Yasunori Goto