2006-08-04 13:14:42

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

Is the first of 10 patches. They were built ontop of Kames 6 patches sent out
within the last few days ([RFC][PATCH] fix ioresouce handling take2 [1/5] was
the first). Kames patches fix several real isses and with the 6th patch they
are complete from my point of view.

I have worked to integrate the feedback I recived on the last round of patches
and welcome more ideas/advice. Thanks to everyone who has provied input on
these patches already.

This patch set allow SPARSEMEM and RESERVE based hot-add to work. I have
test both options and they work as expected. I am adding memory to the
2nd node of a numa system (x86_64).

Major changes from last set is the config change and RESERVE enablment.


From: Keith Mannthey <[email protected]>

Make ACPI motherboard driver not attach to devices/handles it dosen't expect.
Fix a bug where the motherboard driver attached to hot-add memory event and
caused the add memory call to fail.

Signed-off-by: Keith Mannthey<[email protected]>
---
motherboard.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletion(-)

diff -urN orig/drivers/acpi/motherboard.c work/drivers/acpi/motherboard.c
--- orig/drivers/acpi/motherboard.c 2006-07-28 13:57:35.000000000 -0400
+++ work/drivers/acpi/motherboard.c 2006-07-28 16:39:22.000000000 -0400
@@ -87,6 +87,7 @@
}
} else {
/* Memory mapped IO? */
+ return -EINVAL;
}

if (requested_res)
@@ -96,11 +97,16 @@

static int acpi_motherboard_add(struct acpi_device *device)
{
+ acpi_status status;
if (!device)
return -EINVAL;
- acpi_walk_resources(device->handle, METHOD_NAME__CRS,
+
+ status = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
acpi_reserve_io_ranges, NULL);

+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
return 0;
}


2006-08-04 13:14:14

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 4/10] hot-add-mem x86_64: Enable SPARSEMEM in srat.c

From: Keith Mannthey <[email protected]>

Enable x86_64 srat.c to share code between both reserve and sparsemem based add memory
paths. Both paths need the hot-add area node locality infomration (nodes_add). This
code refactors the code path to allow this.

Signed-off-by: Keith Mannthey<[email protected]>
---
srat.c | 51 +++++++++++++++++++++++++++++----------------------
1 files changed, 29 insertions(+), 22 deletions(-)

Files orig/arch/x86_64/mm/.srat.c.swp and current/arch/x86_64/mm/.srat.c.swp differ
diff -urN orig/arch/x86_64/mm/srat.c current/arch/x86_64/mm/srat.c
--- orig/arch/x86_64/mm/srat.c 2006-08-04 00:41:17.000000000 -0400
+++ current/arch/x86_64/mm/srat.c 2006-08-04 01:02:25.000000000 -0400
@@ -21,12 +21,6 @@
#include <asm/numa.h>
#include <asm/e820.h>

-#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
- defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
- && !defined(CONFIG_MEMORY_HOTPLUG)
-#define RESERVE_HOTADD 1
-#endif
-
static struct acpi_table_slit *acpi_slit;

static nodemask_t nodes_parsed __initdata;
@@ -34,9 +28,6 @@
static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
static int found_add_area __initdata;
int hotadd_percent __initdata = 0;
-#ifndef RESERVE_HOTADD
-#define hotadd_percent 0 /* Ignore all settings */
-#endif

/* Too small nodes confuse the VM badly. Usually they result
from BIOS bugs. */
@@ -157,7 +148,7 @@
pxm, pa->apic_id, node);
}

-#ifdef RESERVE_HOTADD
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
/*
* Protect against too large hotadd areas that would fill up memory.
*/
@@ -200,15 +191,37 @@
return 1;
}

+static int update_end_of_memory(unsigned long end)
+{
+ found_add_area = 1;
+ if ((end >> PAGE_SHIFT) > end_pfn)
+ end_pfn = end >> PAGE_SHIFT;
+ return 1;
+}
+
+static inline int save_add_info(void)
+{
+ return hotadd_percent > 0;
+}
+#else
+int update_end_of_memory(unsigned long end) {return 0;}
+static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+static inline int save_add_info(void) {return 1;}
+#else
+static inline int save_add_info(void) {return 0;}
+#endif
+#endif
/*
- * It is fine to add this area to the nodes data it will be used later
+ * Update nodes_add and decide if to include add are in the zone.
+ * Both SPARSE and RESERVE need nodes_add infomation.
* This code supports one contigious hot add area per node.
*/
static int reserve_hotadd(int node, unsigned long start, unsigned long end)
{
unsigned long s_pfn = start >> PAGE_SHIFT;
unsigned long e_pfn = end >> PAGE_SHIFT;
- int changed = 0;
+ int ret = 0, changed = 0;
struct bootnode *nd = &nodes_add[node];

/* I had some trouble with strange memory hotadd regions breaking
@@ -235,7 +248,6 @@

/* Looks good */

- found_add_area = 1;
if (nd->start == nd->end) {
nd->start = start;
nd->end = end;
@@ -253,14 +265,12 @@
printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
}

- if ((nd->end >> PAGE_SHIFT) > end_pfn)
- end_pfn = nd->end >> PAGE_SHIFT;
+ ret = update_end_of_memory(nd->end);

if (changed)
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
- return 0;
+ return ret;
}
-#endif

/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
void __init
@@ -279,7 +289,7 @@
}
if (ma->flags.enabled == 0)
return;
- if (ma->flags.hot_pluggable && hotadd_percent == 0)
+ if (ma->flags.hot_pluggable && !save_add_info())
return;
start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
@@ -318,15 +328,13 @@
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
nd->start, nd->end);

-#ifdef RESERVE_HOTADD
- if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
+ if (ma->flags.hot_pluggable && !reserve_hotadd(node, start, end) < 0) {
/* Ignore hotadd region. Undo damage */
printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
*nd = oldnode;
if ((nd->start | nd->end) == 0)
node_clear(node, nodes_parsed);
}
-#endif
}

/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -342,7 +350,6 @@
unsigned long e = nodes[i].end >> PAGE_SHIFT;
pxmram += e - s;
pxmram -= e820_hole_size(s, e);
- pxmram -= nodes_add[i].end - nodes_add[i].start;
if ((long)pxmram < 0)
pxmram = 0;
}

2006-08-04 13:14:08

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 3/10] hot-add-mem x86_64: Kconfig changes

From: Keith Mannthey <[email protected]>

Create Kconfig namespace for MEMORY_HOTPLUG_RESERVE and MEMORY_HOTPLUG_SPARSE.
This is needed to create a disticiton between the 2 paths. Selecting the high
level opiton of MEMORY_HOTPLUG will get you MEMORY_HOTPLUG_SPARSE if you have
sparsemem enabled or MEMORY_HOTPLUG_RESERVE if you are x86_64 with discontig
and ACPI numa support.


Signed-off-by: Keith Mannthey<[email protected]>
---
arch/x86_64/Kconfig | 4 ++++
mm/Kconfig | 7 ++++++-
2 files changed, 10 insertions(+), 1 deletion(-)

diff -urN linux-2.6.18-rc3-stock/arch/x86_64/Kconfig linux-2.6.17/arch/x86_64/Kconfig
--- linux-2.6.18-rc3-stock/arch/x86_64/Kconfig 2006-07-31 21:08:04.000000000 -0400
+++ linux-2.6.17/arch/x86_64/Kconfig 2006-07-31 21:24:54.000000000 -0400
@@ -349,6 +349,10 @@

source "mm/Kconfig"

+config MEMORY_HOTPLUG_RESERVE
+ def_bool y
+ depends on (MEMORY_HOTPLUG && DISCONTIGMEM)
+
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool y
depends on NUMA
diff -urN linux-2.6.18-rc3-stock/mm/Kconfig linux-2.6.17/mm/Kconfig
--- linux-2.6.18-rc3-stock/mm/Kconfig 2006-07-31 21:08:04.000000000 -0400
+++ linux-2.6.17/mm/Kconfig 2006-07-31 21:25:18.000000000 -0400
@@ -115,12 +115,17 @@
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
- depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
+ depends on SPARSEMEM || X86_64_ACPI_NUMA
+ depends on HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
depends on (IA64 || X86 || PPC64)

comment "Memory hotplug is currently incompatible with Software Suspend"
depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND

+config MEMORY_HOTPLUG_SPARSE
+ def_bool y
+ depends on SPARSEMEM && MEMORY_HOTPLUG
+
# Heavily threaded applications may benefit from splitting the mm-wide
# page_table_lock, so that faults on different parts of the user address
# space can be handled with less contention: split it at this NR_CPUS.

2006-08-04 13:14:34

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 2/10] hot-add-mem x86_64: fixup externs

From: Keith Mannthey <[email protected]>

fixup externs in memory_hotplug.c. Cleanup.

Signed-off-by: Keith Mannthey<[email protected]>
---
include/linux/memory_hotplug.h | 2 ++
include/linux/mm.h | 2 ++
mm/memory_hotplug.c | 4 ----
3 files changed, 4 insertions(+), 4 deletions(-)

diff -urN orig/include/linux/memory_hotplug.h current/include/linux/memory_hotplug.h
--- orig/include/linux/memory_hotplug.h 2006-08-04 00:41:19.000000000 -0400
+++ current/include/linux/memory_hotplug.h 2006-08-04 00:50:34.000000000 -0400
@@ -172,5 +172,7 @@
extern int add_memory(int nid, u64 start, u64 size);
extern int arch_add_memory(int nid, u64 start, u64 size);
extern int remove_memory(u64 start, u64 size);
+extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
+ int nr_pages);

#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff -urN orig/include/linux/mm.h current/include/linux/mm.h
--- orig/include/linux/mm.h 2006-08-04 00:41:19.000000000 -0400
+++ current/include/linux/mm.h 2006-08-04 00:47:49.000000000 -0400
@@ -884,6 +884,8 @@
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
+extern void zonetable_add(struct zone *zone, int nid, int zid,
+ unsigned long pfn, unsigned long size);

#ifdef CONFIG_NUMA
extern void setup_per_cpu_pageset(void);
diff -urN orig/mm/memory_hotplug.c current/mm/memory_hotplug.c
--- orig/mm/memory_hotplug.c 2006-08-04 00:42:02.000000000 -0400
+++ current/mm/memory_hotplug.c 2006-08-04 00:47:49.000000000 -0400
@@ -24,8 +24,6 @@

#include <asm/tlbflush.h>

-extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
- unsigned long size);
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
struct pglist_data *pgdat = zone->zone_pgdat;
@@ -45,8 +43,6 @@
return 0;
}

-extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
- int nr_pages);
static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
{
int nr_pages = PAGES_PER_SECTION;

2006-08-04 13:15:10

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 8/10] hot-add-mem x86_64: use CONFIG_MEMORY_HOTPLUG_SPARSE

From: Keith Mannthey <[email protected]>

Migate CONFIG_MEMORY_HOTPLUG to CONFIG_MEMORY_HOTPLUG_SPARSE where needed.

Signed-off-by: Keith Mannthey<[email protected]>
---
drivers/base/Makefile | 2 +-
include/linux/memory.h | 4 ++--
mm/memory_hotplug.c | 4 +++-
3 files changed, 6 insertions(+), 4 deletions(-)

diff -urN orig/drivers/base/Makefile current/drivers/base/Makefile
--- orig/drivers/base/Makefile 2006-08-04 00:41:17.000000000 -0400
+++ current/drivers/base/Makefile 2006-08-04 01:41:04.000000000 -0400
@@ -8,7 +8,7 @@
obj-$(CONFIG_ISA) += isa.o
obj-$(CONFIG_FW_LOADER) += firmware_class.o
obj-$(CONFIG_NUMA) += node.o
-obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o
+obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
obj-$(CONFIG_SMP) += topology.o
obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o

diff -urN orig/include/linux/memory.h current/include/linux/memory.h
--- orig/include/linux/memory.h 2006-06-17 21:49:35.000000000 -0400
+++ current/include/linux/memory.h 2006-08-04 01:42:24.000000000 -0400
@@ -57,7 +57,7 @@
struct notifier_block;
struct mem_section;

-#ifndef CONFIG_MEMORY_HOTPLUG
+#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
static inline int memory_dev_init(void)
{
return 0;
@@ -78,7 +78,7 @@
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)


-#endif /* CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */

#define hotplug_memory_notifier(fn, pri) { \
static struct notifier_block fn##_mem_nb = \
diff -urN orig/mm/memory_hotplug.c current/mm/memory_hotplug.c
--- orig/mm/memory_hotplug.c 2006-08-04 00:54:44.000000000 -0400
+++ current/mm/memory_hotplug.c 2006-08-04 01:46:56.000000000 -0400
@@ -24,6 +24,7 @@

#include <asm/tlbflush.h>

+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
struct pglist_data *pgdat = zone->zone_pgdat;
@@ -189,7 +190,8 @@
vm_total_pages = nr_free_pagecache_pages();
return 0;
}
-
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+
static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
{
struct pglist_data *pgdat;

2006-08-04 13:15:14

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 5/10] hot-add-mem x86_64: memory_add_physaddr_to_nid enable

From: Keith Mannthey <[email protected]>

The api for hot-add memory already has a construct for finding nodes based on
an address, memory_add_physaddr_to_nid. This patch allows the fucntion to do
something besides return 0. It uses the nodes_add infomation to lookup to node
info for a hot add event.

Signed-off-by: Keith Mannthey<[email protected]>
---
init.c | 20 +++++++-------------
srat.c | 13 ++++++++++++-
2 files changed, 19 insertions(+), 14 deletions(-)

diff -urN linux-2.6.17/arch/x86_64/mm/init.c current/arch/x86_64/mm/init.c
--- linux-2.6.17/arch/x86_64/mm/init.c 2006-08-04 01:30:39.000000000 -0400
+++ current/arch/x86_64/mm/init.c 2006-08-04 01:24:04.000000000 -0400
@@ -517,19 +517,6 @@

#ifdef CONFIG_MEMORY_HOTPLUG
/*
- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
- * via probe interface of sysfs. If acpi notifies hot-add event, then it
- * can tell node id by searching dsdt. But, probe interface doesn't have
- * node id. So, return 0 as node id at this time.
- */
-#ifdef CONFIG_NUMA
-int memory_add_physaddr_to_nid(u64 start)
-{
- return 0;
-}
-#endif
-
-/*
* Memory is added always to NORMAL zone. This means you will never get
* additional DMA/DMA32 memory.
*/
@@ -560,6 +547,13 @@
}
EXPORT_SYMBOL_GPL(remove_memory);

+#ifndef CONFIG_ACPI_NUMA
+int memory_add_physaddr_to_nid(u64 start)
+{
+ return 0;
+}
+#endif
+
#else /* CONFIG_MEMORY_HOTPLUG */
/*
* Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
diff -urN linux-2.6.17/arch/x86_64/mm/srat.c current/arch/x86_64/mm/srat.c
--- linux-2.6.17/arch/x86_64/mm/srat.c 2006-08-04 01:31:44.000000000 -0400
+++ current/arch/x86_64/mm/srat.c 2006-08-04 01:24:04.000000000 -0400
@@ -25,7 +25,7 @@

static nodemask_t nodes_parsed __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
+static struct bootnode nodes_add[MAX_NUMNODES];
static int found_add_area __initdata;
int hotadd_percent __initdata = 0;

@@ -457,3 +457,14 @@
}

EXPORT_SYMBOL(__node_distance);
+
+int memory_add_physaddr_to_nid(u64 start)
+{
+ int i, ret = 0;
+
+ for_each_node(i)
+ if (nodes_add[i].start <= start && nodes_add[i].end > start)
+ ret = i;
+
+ return ret;
+}

2006-08-04 13:15:57

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 9/10] hot-add-mem x86_64: use CONFIG_MEMORY_HOTPLUG_RESERVE

From: Keith Mannthey <[email protected]>

The api for hot-add memory already has a construct for finding nodes based on
an address, memory_add_physaddr_to_nid. This patch allows the fucntion to do
something besides return 0. It uses the nodes_add infomation to lookup to node
info for a hot add event.

Signed-off-by: Keith Mannthey<[email protected]>
---
init.c | 20 +++++++-------------
srat.c | 13 ++++++++++++-
2 files changed, 19 insertions(+), 14 deletions(-)

diff -urN linux-2.6.17/arch/x86_64/mm/init.c current/arch/x86_64/mm/init.c
--- linux-2.6.17/arch/x86_64/mm/init.c 2006-08-04 01:30:39.000000000 -0400
+++ current/arch/x86_64/mm/init.c 2006-08-04 01:24:04.000000000 -0400
@@ -517,19 +517,6 @@

#ifdef CONFIG_MEMORY_HOTPLUG
/*
- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
- * via probe interface of sysfs. If acpi notifies hot-add event, then it
- * can tell node id by searching dsdt. But, probe interface doesn't have
- * node id. So, return 0 as node id at this time.
- */
-#ifdef CONFIG_NUMA
-int memory_add_physaddr_to_nid(u64 start)
-{
- return 0;
-}
-#endif
-
-/*
* Memory is added always to NORMAL zone. This means you will never get
* additional DMA/DMA32 memory.
*/
@@ -560,6 +547,13 @@
}
EXPORT_SYMBOL_GPL(remove_memory);

+#ifndef CONFIG_ACPI_NUMA
+int memory_add_physaddr_to_nid(u64 start)
+{
+ return 0;
+}
+#endif
+
#else /* CONFIG_MEMORY_HOTPLUG */
/*
* Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
diff -urN linux-2.6.17/arch/x86_64/mm/srat.c current/arch/x86_64/mm/srat.c
--- linux-2.6.17/arch/x86_64/mm/srat.c 2006-08-04 01:31:44.000000000 -0400
+++ current/arch/x86_64/mm/srat.c 2006-08-04 01:24:04.000000000 -0400
@@ -25,7 +25,7 @@

static nodemask_t nodes_parsed __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
+static struct bootnode nodes_add[MAX_NUMNODES];
static int found_add_area __initdata;
int hotadd_percent __initdata = 0;

@@ -457,3 +457,14 @@
}

EXPORT_SYMBOL(__node_distance);
+
+int memory_add_physaddr_to_nid(u64 start)
+{
+ int i, ret = 0;
+
+ for_each_node(i)
+ if (nodes_add[i].start <= start && nodes_add[i].end > start)
+ ret = i;
+
+ return ret;
+}

2006-08-04 13:16:08

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 7/10] hot-add-mem x86_64: x86_64 kernel mapping fix

From: Keith Mannthey <[email protected]>

Fix for the x86_64 kernel mapping code. Without this patch the update path
only inits one pmd_page worth of memory and tramples any entries on it.
now the calling convention to phys_pmd_init and phys_init is to always
pass a [pmd/pud] page not an offset within a page.

Signed-off-by: Keith Mannthey<[email protected]>
---
init.c | 51 ++++++++++++++++++++++++++-------------------------
1 files changed, 26 insertions(+), 25 deletions(-)

diff -urN linux-2.6.17-stock/arch/x86_64/mm/init.c linux-2.6.17/arch/x86_64/mm/init.c
--- linux-2.6.17-stock/arch/x86_64/mm/init.c 2006-08-04 08:01:57.000000000 -0400
+++ linux-2.6.17/arch/x86_64/mm/init.c 2006-08-04 08:01:01.000000000 -0400
@@ -250,12 +250,13 @@
}

static void __meminit
-phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
{
- int i;
+ int i = pmd_index(address);

- for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
+ for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
unsigned long entry;
+ pmd_t *pmd = pmd_page + pmd_index(address);

if (address >= end) {
if (!after_bootmem)
@@ -263,6 +264,10 @@
set_pmd(pmd, __pmd(0));
break;
}
+
+ if (pmd_val(*pmd))
+ continue;
+
entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
entry &= __supported_pte_mask;
set_pmd(pmd, __pmd(entry));
@@ -272,45 +277,41 @@
static void __meminit
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
{
- pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
-
- if (pmd_none(*pmd)) {
- spin_lock(&init_mm.page_table_lock);
- phys_pmd_init(pmd, address, end);
- spin_unlock(&init_mm.page_table_lock);
- __flush_tlb_all();
- }
+ pmd_t *pmd = pmd_offset(pud,0);
+ spin_lock(&init_mm.page_table_lock);
+ phys_pmd_init(pmd, address, end);
+ spin_unlock(&init_mm.page_table_lock);
+ __flush_tlb_all();
}

-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
{
- long i = pud_index(address);
+ int i = pud_index(addr);

- pud = pud + i;

- if (after_bootmem && pud_val(*pud)) {
- phys_pmd_update(pud, address, end);
- return;
- }
-
- for (; i < PTRS_PER_PUD; pud++, i++) {
+ for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
int map;
- unsigned long paddr, pmd_phys;
+ unsigned long pmd_phys;
+ pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;

- paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
- if (paddr >= end)
+ if (addr >= end)
break;

- if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
+ if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
set_pud(pud, __pud(0));
continue;
}

+ if (pud_val(*pud)) {
+ phys_pmd_update(pud, addr, end);
+ continue;
+ }
+
pmd = alloc_low_page(&map, &pmd_phys);
spin_lock(&init_mm.page_table_lock);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
- phys_pmd_init(pmd, paddr, end);
+ phys_pmd_init(pmd, addr, end);
spin_unlock(&init_mm.page_table_lock);
unmap_low_page(map);
}

2006-08-04 13:15:14

by Keith Mannthey

[permalink] [raw]
Subject: [PATCH 10/10] hot-add-mem x86_64: valid add range check

From: Keith Mannthey <[email protected]>

Introduce and implement valid_add_memory_range to MEMORY_HOTPLUG. The
RESERVE path needs to be careful about check to make sure it only onlines
correct memory. The SPASRMEM path is using resource code to do this so
it gets a no-op check. This frame work makes it was to add specific checks
for valid add memory ranges as the need arises.

Signed-off-by: Keith Mannthey<[email protected]>
---
arch/x86_64/mm/srat.c | 6 ++++++
include/linux/memory_hotplug.h | 2 +-
mm/memory_hotplug.c | 8 ++++++++
3 files changed, 15 insertions(+), 1 deletion(-)

diff -urN orig/arch/x86_64/mm/srat.c current/arch/x86_64/mm/srat.c
--- orig/arch/x86_64/mm/srat.c 2006-08-04 06:56:24.000000000 -0400
+++ current/arch/x86_64/mm/srat.c 2006-08-04 03:39:29.000000000 -0400
@@ -203,6 +203,12 @@
{
return hotadd_percent > 0;
}
+
+inline int valid_add_memory_range(int nid, u64 start, u64 size) {
+ if (nodes_add[nid].start <= start && nodes_add[nid].end >= (start+size))
+ return 1;
+ return 0;
+}
#else
int update_end_of_memory(unsigned long end) {return 0;}
static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
diff -urN orig/include/linux/memory_hotplug.h current/include/linux/memory_hotplug.h
--- orig/include/linux/memory_hotplug.h 2006-08-04 06:56:24.000000000 -0400
+++ current/include/linux/memory_hotplug.h 2006-08-04 02:08:45.000000000 -0400
@@ -159,7 +159,7 @@
dump_stack();
return -ENOSYS;
}
-
+extern int valid_add_memory_range(int nid, u64 start, u64 size);
#endif /* ! CONFIG_MEMORY_HOTPLUG */
static inline int __remove_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages)
diff -urN orig/mm/memory_hotplug.c current/mm/memory_hotplug.c
--- orig/mm/memory_hotplug.c 2006-08-04 06:59:05.000000000 -0400
+++ current/mm/memory_hotplug.c 2006-08-04 03:08:23.000000000 -0400
@@ -220,6 +220,11 @@
vm_total_pages = nr_free_pagecache_pages();
return 0;
}
+
+inline int valid_add_memory_range(int nid, u64 start, u64 size) {
+ return 1;
+}
+
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */

static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
@@ -257,6 +262,9 @@
int new_pgdat = 0;
struct resource *res;
int ret;
+
+ if (!valid_add_memory_range(nid,start,size))
+ return -EINVAL;

res = register_memory_resource(start, size);
if (!res)

2006-08-04 15:15:56

by Mika Penttilä

[permalink] [raw]
Subject: Re: [Lhms-devel] [PATCH 4/10] hot-add-mem x86_64: Enable SPARSEMEM in srat.c

Keith Mannthey wrote:
> From: Keith Mannthey <[email protected]>
>
> Enable x86_64 srat.c to share code between both reserve and sparsemem based add memory
> paths. Both paths need the hot-add area node locality infomration (nodes_add). This
> code refactors the code path to allow this.
>
> Signed-off-by: Keith Mannthey<[email protected]>
>
Ok nice, but.... hotadd_enough_memory() is broken, it does weird things
with nd->start and nd->end which haven't been assigned even values yet.
Also, mysterious business with find_e820_area and last_area_end...These
areas are not in e820...

And why the reserve_bootmem_node()? Areas not RAM (per e820) are
reserved anyways.

--Mika

2006-08-04 17:42:39

by Dave Hansen

[permalink] [raw]
Subject: Re: [PATCH 4/10] hot-add-mem x86_64: Enable SPARSEMEM in srat.c

On Fri, 2006-08-04 at 07:14 -0600, Keith Mannthey wrote:
> From: Keith Mannthey <[email protected]>
>
> Enable x86_64 srat.c to share code between both reserve and sparsemem based add memory
> paths. Both paths need the hot-add area node locality infomration (nodes_add). This
> code refactors the code path to allow this.

I won't respond to all of these, but the set looks pretty darn sane.

> Signed-off-by: Keith Mannthey<[email protected]>
> ---
> srat.c | 51 +++++++++++++++++++++++++++++----------------------
> 1 files changed, 29 insertions(+), 22 deletions(-)
>
> Files orig/arch/x86_64/mm/.srat.c.swp and current/arch/x86_64/mm/.srat.c.swp differ
> diff -urN orig/arch/x86_64/mm/srat.c current/arch/x86_64/mm/srat.c
> --- orig/arch/x86_64/mm/srat.c 2006-08-04 00:41:17.000000000 -0400
> +++ current/arch/x86_64/mm/srat.c 2006-08-04 01:02:25.000000000 -0400
> @@ -21,12 +21,6 @@
> #include <asm/numa.h>
> #include <asm/e820.h>
>
> -#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
> - defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
> - && !defined(CONFIG_MEMORY_HOTPLUG)
> -#define RESERVE_HOTADD 1
> -#endif

Thanks goodness this is going away :)

> static struct acpi_table_slit *acpi_slit;
>
> static nodemask_t nodes_parsed __initdata;
> @@ -34,9 +28,6 @@
> static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
> static int found_add_area __initdata;
> int hotadd_percent __initdata = 0;
> -#ifndef RESERVE_HOTADD
> -#define hotadd_percent 0 /* Ignore all settings */
> -#endif
>
> /* Too small nodes confuse the VM badly. Usually they result
> from BIOS bugs. */
> @@ -157,7 +148,7 @@
> pxm, pa->apic_id, node);
> }
>
> -#ifdef RESERVE_HOTADD
> +#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> /*
> * Protect against too large hotadd areas that would fill up memory.
> */
> @@ -200,15 +191,37 @@
> return 1;
> }
>
> +static int update_end_of_memory(unsigned long end)
> +{
> + found_add_area = 1;
> + if ((end >> PAGE_SHIFT) > end_pfn)
> + end_pfn = end >> PAGE_SHIFT;
> + return 1;
> +}

I don't have a really strong feeling either way, but you can use
include/linux/pfn.h and PFN_DOWN() here, instead of the explicit >>'s.

> +static inline int save_add_info(void)
> +{
> + return hotadd_percent > 0;
> +}

This name is a wee bit too generic to be in the global namespace.
Perhaps there should be a "memory" in there somewhere.


> -#ifdef RESERVE_HOTADD
> - if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
> + if (ma->flags.hot_pluggable && !reserve_hotadd(node, start, end) < 0) {
> /* Ignore hotadd region. Undo damage */
> printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
> *nd = oldnode;
> if ((nd->start | nd->end) == 0)
> node_clear(node, nodes_parsed);
> }
> -#endif
> }

Cool. No more #ifdef.

-- Dave

2006-08-04 19:37:25

by Keith Mannthey

[permalink] [raw]
Subject: Re: [Lhms-devel] [PATCH 4/10] hot-add-mem x86_64: Enable SPARSEMEM in srat.c

On Fri, 2006-08-04 at 18:17 +0300, Mika Penttilä wrote:
> Keith Mannthey wrote:
> > From: Keith Mannthey <[email protected]>
> >
> > Enable x86_64 srat.c to share code between both reserve and sparsemem based add memory
> > paths. Both paths need the hot-add area node locality infomration (nodes_add). This
> > code refactors the code path to allow this.
> >
> > Signed-off-by: Keith Mannthey<[email protected]>
> >
> Ok nice, but.... hotadd_enough_memory() is broken, it does weird things
> with nd->start and nd->end which haven't been assigned even values yet.
> Also, mysterious business with find_e820_area and last_area_end...These
> areas are not in e820...
Thats for pointing out the breakage in hotadd_enough_memory. I think
the find_e820_area stuff is to make sure there box has the memory to
reserve the maps....but it doesn't look to do it right. I can take a
pass at a re-write for this function.


STAT hot-add memroy areas can be outside the e820. The e820 just
exposes the end of memory the is present in the box even though there
maybe add area on the other size of that.

For example my memory is layed out as follows.

SRAT: Node 0 PXM 0 0-80000000
SRAT: Node 0 PXM 0 0-470000000
SRAT: Node 0 PXM 0 0-1070000000
SRAT: hot plug zone found 470000000 - 1070000000
SRAT: Node 1 PXM 1 1070000000-1160000000
SRAT: Node 1 PXM 1 1070000000-3200000000
SRAT: hot plug zone found 1160000000 - 3200000000

The e820 ends at 1160000000 but there is still a possible add zone on
the other side of that.

The first hot plug zone is reserved by the e820 but no the 2nd.
> And why the reserve_bootmem_node()? Areas not RAM (per e820) are
> reserved anyways.

To make sure the areas are outside of the e820 are reserved.

Thanks,
Keith

2006-08-05 05:40:19

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

On Fri, 4 Aug 2006 07:13:51 -0600
Keith Mannthey <[email protected]> wrote:
> I have worked to integrate the feedback I recived on the last round of patches
> and welcome more ideas/advice. Thanks to everyone who has provied input on
> these patches already.
>
It looks patch5/10 and patch9/10 has same diffs...
plz send right patch.

-Kame

2006-08-05 05:52:04

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

On Fri, 4 Aug 2006 07:13:51 -0600
Keith Mannthey <[email protected]> wrote:
> I have worked to integrate the feedback I recived on the last round of patches
> and welcome more ideas/advice. Thanks to everyone who has provied input on
> these patches already.
>
Just from review...

If new zone , which was empty at boot, are added into the system.
build_all_zonelists() has to be called. (see online_pages() in memory_hotplug.c)
it looks x86_64's __add_pages() doesn't calles it.

Precisely, look online_pages() (CCONFIG_MEMORY_HOTPLUG_SPARSE)
==
setup_per_zone_pages_min();

if (need_zonelists_rebuild)
build_all_zonelists();
vm_total_pages = nr_free_pagecache_pages();
==
These 3 calls are necessary, I think.

-Kame

2006-08-07 17:42:04

by Keith Mannthey

[permalink] [raw]
Subject: Re: [PATCH 9/10] hot-add-mem x86_64: use CONFIG_MEMORY_HOTPLUG_RESERVE

On Fri, 2006-08-04 at 07:14 -0600, Keith Mannthey wrote:
> From: Keith Mannthey <[email protected]>
>


Opps looks like I attached the wrong patch in the original email :(
Here is the real patch...

From: Keith Mannthey <[email protected]>

Make CONFIG_MEMORY_HOTPLUG_RESERVE and CONFIG_MEMORY_HOTPLUG_SPARSE
build in the same tree.

Signed-off-by: Keith Mannthey<[email protected]>
---
arch/x86_64/mm/init.c | 10 +++++---
mm/memory_hotplug.c | 60 ++++++++++++++++++++++++
+------------------------- 2 files changed, 36 insertions(+), 34
deletions(-)

diff -urN linux-2.6.17-stock/arch/x86_64/mm/init.c
linux-2.6.17/arch/x86_64/mm/init.c
--- linux-2.6.17-stock/arch/x86_64/mm/init.c 2006-08-04
08:03:44.000000000 -0400
+++ linux-2.6.17/arch/x86_64/mm/init.c 2006-08-04 08:04:40.000000000
-0400
@@ -529,12 +529,12 @@
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;

+ init_memory_mapping(start, (start + size -1));
+
ret = __add_pages(zone, start_pfn, nr_pages);
if (ret)
goto error;

- init_memory_mapping(start, (start + size -1));
-
return ret;
error:
printk("%s: Problem encountered in __add_pages!\n", __func__);
@@ -555,7 +555,9 @@
}
#endif

-#else /* CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
/*
* Memory Hotadd without sparsemem. The mem_maps have been allocated in
advance,
* just online the pages.
@@ -581,7 +583,7 @@
}
return err;
}
-#endif /* CONFIG_MEMORY_HOTPLUG */
+#endif

static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
kcore_modules,
kcore_vsyscall;
diff -urN linux-2.6.17-stock/mm/memory_hotplug.c
linux-2.6.17/mm/memory_hotplug.c
--- linux-2.6.17-stock/mm/memory_hotplug.c 2006-08-04 08:03:54.000000000
-0400
+++ linux-2.6.17/mm/memory_hotplug.c 2006-08-04 08:04:40.000000000 -0400
@@ -24,6 +24,36 @@

#include <asm/tlbflush.h>

+/* add this memory to iomem resource */
+static struct resource *register_memory_resource(u64 start, u64 size)
+{
+ struct resource *res;
+ res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+ BUG_ON(!res);
+
+ res->name = "System RAM";
+ res->start = start;
+ res->end = start + size - 1;
+ res->flags = IORESOURCE_MEM;
+ if (request_resource(&iomem_resource, res) < 0) {
+ printk("System RAM resource %llx - %llx cannot be added\n",
+ (unsigned long long)res->start, (unsigned long long)res->end);
+ kfree(res);
+ res = NULL;
+ }
+ return res;
+}
+
+static void release_memory_resource(struct resource *res)
+{
+ if (!res)
+ return;
+ release_resource(res);
+ kfree(res);
+ return;
+}
+
+
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
@@ -220,36 +250,6 @@
return;
}

-/* add this memory to iomem resource */
-static struct resource *register_memory_resource(u64 start, u64 size)
-{
- struct resource *res;
- res = kzalloc(sizeof(struct resource), GFP_KERNEL);
- BUG_ON(!res);
-
- res->name = "System RAM";
- res->start = start;
- res->end = start + size - 1;
- res->flags = IORESOURCE_MEM;
- if (request_resource(&iomem_resource, res) < 0) {
- printk("System RAM resource %llx - %llx cannot be added\n",
- (unsigned long long)res->start, (unsigned long long)res->end);
- kfree(res);
- res = NULL;
- }
- return res;
-}
-
-static void release_memory_resource(struct resource *res)
-{
- if (!res)
- return;
- release_resource(res);
- kfree(res);
- return;
-}
-
-

int add_memory(int nid, u64 start, u64 size)
{


2006-08-07 18:39:44

by Keith Mannthey

[permalink] [raw]
Subject: Re: [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

On Sat, 2006-08-05 at 14:51 +0900, KAMEZAWA Hiroyuki wrote:
> On Fri, 4 Aug 2006 07:13:51 -0600
> Keith Mannthey <[email protected]> wrote:
> > I have worked to integrate the feedback I recived on the last round of patches
> > and welcome more ideas/advice. Thanks to everyone who has provied input on
> > these patches already.
> >
> Just from review...
>
> If new zone , which was empty at boot, are added into the system.
> build_all_zonelists() has to be called. (see online_pages() in memory_hotplug.c)
> it looks x86_64's __add_pages() doesn't calles it.

With RESERVE there are not empty zones. All zones (including add-areas)
are setup during boot and hot add areas reserved in the bootmem
allocator.

Zones don't change size there is no adding to the zone just on-lining on
pages at are already present in the zone.


--
keith mannthey <[email protected]>
Linux Technology Center IBM

2006-08-08 00:29:06

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [Lhms-devel] [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

On Mon, 07 Aug 2006 11:39:27 -0700
keith mannthey <[email protected]> wrote:

> On Sat, 2006-08-05 at 14:51 +0900, KAMEZAWA Hiroyuki wrote:
> > On Fri, 4 Aug 2006 07:13:51 -0600
> > Keith Mannthey <[email protected]> wrote:
> > > I have worked to integrate the feedback I recived on the last round of patches
> > > and welcome more ideas/advice. Thanks to everyone who has provied input on
> > > these patches already.
> > >
> > Just from review...
> >
> > If new zone , which was empty at boot, are added into the system.
> > build_all_zonelists() has to be called. (see online_pages() in memory_hotplug.c)
> > it looks x86_64's __add_pages() doesn't calles it.
>
> With RESERVE there are not empty zones. All zones (including add-areas)
> are setup during boot and hot add areas reserved in the bootmem
> allocator.
>
> Zones don't change size there is no adding to the zone just on-lining on
> pages at are already present in the zone.
>
Hmm, curious.
please explain.
==
int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
{
int err = -EIO;
unsigned long pfn;
unsigned long total = 0, mem = 0;
for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
if (pfn_valid(pfn)) {
online_page(pfn_to_page(pfn));
err = 0;
mem++;
}
total++;
}
if (!err) {
z->spanned_pages += total;
z->present_pages += mem; -------------------------------(*)
z->zone_pgdat->node_spanned_pages += total;
z->zone_pgdat->node_present_pages += mem;
}
return err;
}
==
It looks contents of zone is increased at (*). Do I see old code ?

==
static inline int populated_zone(struct zone *zone)
{
return (!!zone->present_pages);
}
==
"empty zone" I said means a zone which is not populated.

this populated_zone() is used at build_zone_list().
if populated_zone(z)==0, zone "z" is not included into zonelist and zone will be never
used.

-Kame

2006-08-08 00:57:00

by Keith Mannthey

[permalink] [raw]
Subject: Re: [Lhms-devel] [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

On Tue, 2006-08-08 at 09:31 +0900, KAMEZAWA Hiroyuki wrote:
> On Mon, 07 Aug 2006 11:39:27 -0700
> keith mannthey <[email protected]> wrote:
>
> > On Sat, 2006-08-05 at 14:51 +0900, KAMEZAWA Hiroyuki wrote:
> > > On Fri, 4 Aug 2006 07:13:51 -0600
> > > Keith Mannthey <[email protected]> wrote:
> > > > I have worked to integrate the feedback I recived on the last round of patches
> > > > and welcome more ideas/advice. Thanks to everyone who has provied input on
> > > > these patches already.
> > > >
> > > Just from review...
> > >
> > > If new zone , which was empty at boot, are added into the system.
> > > build_all_zonelists() has to be called. (see online_pages() in memory_hotplug.c)
> > > it looks x86_64's __add_pages() doesn't calles it.
> >
> > With RESERVE there are not empty zones. All zones (including add-areas)
> > are setup during boot and hot add areas reserved in the bootmem
> > allocator.
> >
> > Zones don't change size there is no adding to the zone just on-lining on
> > pages at are already present in the zone.
> >
> Hmm, curious.
> please explain.
> ==
> int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
> {
> int err = -EIO;
> unsigned long pfn;
> unsigned long total = 0, mem = 0;
> for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
> if (pfn_valid(pfn)) {
> online_page(pfn_to_page(pfn));
> err = 0;
> mem++;
> }
> total++;
> }
> if (!err) {
> z->spanned_pages += total;
> z->present_pages += mem; -------------------------------(*)

It is an accounting issue. What I meant by re-sizing is there is no
change to node_mem_map (with reserve CONFIG_FLAT_NODE_MEM_MAP is set).
When the original size for the zone is setup it views that add areas as
a "hole" in the e820 space.

See arch/x86_64/mm/init.c size_zones.

> ==
> It looks contents of zone is increased at (*). Do I see old code ?
>
> ==
> static inline int populated_zone(struct zone *zone)
> {
> return (!!zone->present_pages);
> }
> ==
> "empty zone" I said means a zone which is not populated.

I know of no x86_64 hardware the supports empty node hot-add memory. If
it exists I would recommend using SPARSEMEM based hot-add. On HW I am
aware of there is always some memory present in a node at boot.


Thanks,
Keith

2006-08-08 02:06:15

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [Lhms-devel] [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

On Mon, 07 Aug 2006 17:56:56 -0700
keith mannthey <[email protected]> wrote:

> I know of no x86_64 hardware the supports empty node hot-add memory. If
> it exists I would recommend using SPARSEMEM based hot-add. On HW I am
> aware of there is always some memory present in a node at boot.
>
>
O.K one more.

I know x86_64 has ZONE_DMA32. A system boot with only memory below 4G
has no avilable memory in ZONE_NORMAL. If a new memory above 4G is added,
ZONE_NORMAL comes as *new* zone.
ZONE_NORMAL is empty at boot, so it's not in zonelist at boot.

is this not problem ?

-Kame

2006-08-08 02:15:18

by Keith Mannthey

[permalink] [raw]
Subject: Re: [Lhms-devel] [PATCH 1/10] hot-add-mem x86_64: acpi motherboard fix

On Tue, 2006-08-08 at 11:08 +0900, KAMEZAWA Hiroyuki wrote:
> On Mon, 07 Aug 2006 17:56:56 -0700
> keith mannthey <[email protected]> wrote:
>
> > I know of no x86_64 hardware the supports empty node hot-add memory. If
> > it exists I would recommend using SPARSEMEM based hot-add. On HW I am
> > aware of there is always some memory present in a node at boot.
> >
> >
> O.K one more.
>
> I know x86_64 has ZONE_DMA32. A system boot with only memory below 4G
> has no avilable memory in ZONE_NORMAL. If a new memory above 4G is added,
> ZONE_NORMAL comes as *new* zone.
> ZONE_NORMAL is empty at boot, so it's not in zonelist at boot.
>
> is this not problem ?

Perhaps in this situation you could run into trouble. I am not sure if
I can put my hardware into this config but I will try.

Thanks for taking a look at these patches and the reserve path.

Thanks,
Keith