2005-05-04 20:36:53

by Andy Whitcroft

[permalink] [raw]
Subject: [3/3] sparsemem memory model for ppc64

Provide the architecture specific implementation for SPARSEMEM for
PPC64 systems.

Signed-off-by: Andy Whitcroft <[email protected]>
Signed-off-by: Dave Hansen <[email protected]>
Signed-off-by: Mike Kravetz <[email protected]> (in part)
Signed-off-by: Martin Bligh <[email protected]>
---
arch/ppc64/Kconfig | 13 ++++++++++++-
arch/ppc64/kernel/setup.c | 1 +
arch/ppc64/mm/Makefile | 2 +-
arch/ppc64/mm/init.c | 24 +++++++++++++++++++-----
include/asm-ppc64/mmzone.h | 36 +++++++++++++++++++++++-------------
include/asm-ppc64/page.h | 3 ++-
include/asm-ppc64/sparsemem.h | 16 ++++++++++++++++
7 files changed, 74 insertions(+), 21 deletions(-)

diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/arch/ppc64/Kconfig current/arch/ppc64/Kconfig
--- reference/arch/ppc64/Kconfig 2005-05-04 20:54:52.000000000 +0100
+++ current/arch/ppc64/Kconfig 2005-05-04 20:54:54.000000000 +0100
@@ -198,6 +198,13 @@ config HMT
This option enables hardware multithreading on RS64 cpus.
pSeries systems p620 and p660 have such a cpu type.

+config ARCH_SELECT_MEMORY_MODEL
+ def_bool y
+
+config ARCH_FLATMEM_ENABLE
+ def_bool y
+ depends on !NUMA
+
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
depends on SMP && PPC_PSERIES
@@ -209,6 +216,10 @@ config ARCH_DISCONTIGMEM_DEFAULT
config ARCH_FLATMEM_ENABLE
def_bool y

+config ARCH_SPARSEMEM_ENABLE
+ def_bool y
+ depends on ARCH_DISCONTIGMEM_ENABLE
+
source "mm/Kconfig"

config HAVE_ARCH_EARLY_PFN_TO_NID
@@ -229,7 +240,7 @@ config NODES_SPAN_OTHER_NODES

config NUMA
bool "NUMA support"
- depends on DISCONTIGMEM
+ default y if DISCONTIGMEM || SPARSEMEM

config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/arch/ppc64/kernel/setup.c current/arch/ppc64/kernel/setup.c
--- reference/arch/ppc64/kernel/setup.c 2005-04-11 19:33:15.000000000 +0100
+++ current/arch/ppc64/kernel/setup.c 2005-05-04 20:54:53.000000000 +0100
@@ -1059,6 +1059,7 @@ void __init setup_arch(char **cmdline_p)

/* set up the bootmem stuff with available memory */
do_init_bootmem();
+ sparse_init();

/* initialize the syscall map in systemcfg */
setup_syscall_map();
diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/arch/ppc64/mm/init.c current/arch/ppc64/mm/init.c
--- reference/arch/ppc64/mm/init.c 2005-05-04 20:54:20.000000000 +0100
+++ current/arch/ppc64/mm/init.c 2005-05-04 20:54:54.000000000 +0100
@@ -601,13 +601,21 @@ EXPORT_SYMBOL(page_is_ram);
* Initialize the bootmem system and give it all the memory we
* have available.
*/
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_NEED_MULTIPLE_NODES
void __init do_init_bootmem(void)
{
unsigned long i;
unsigned long start, bootmap_pages;
unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
int boot_mapsize;
+ unsigned long start_pfn, end_pfn;
+ /*
+ * Note presence of first (logical/coalasced) LMB which will
+ * contain RMO region
+ */
+ start_pfn = lmb.memory.region[0].physbase >> PAGE_SHIFT;
+ end_pfn = start_pfn + (lmb.memory.region[0].size >> PAGE_SHIFT);
+ memory_present(0, start_pfn, end_pfn);

/*
* Find an area to use for the bootmem bitmap. Calculate the size of
@@ -623,12 +631,18 @@ void __init do_init_bootmem(void)

max_pfn = max_low_pfn;

- /* add all physical memory to the bootmem map. Also find the first */
+ /* add all physical memory to the bootmem map. Also, find the first
+ * presence of all LMBs*/
for (i=0; i < lmb.memory.cnt; i++) {
unsigned long physbase, size;

physbase = lmb.memory.region[i].physbase;
size = lmb.memory.region[i].size;
+ if (i) { /* already created mappings for first LMB */
+ start_pfn = physbase >> PAGE_SHIFT;
+ end_pfn = start_pfn + (size >> PAGE_SHIFT);
+ }
+ memory_present(0, start_pfn, end_pfn);
free_bootmem(physbase, size);
}

@@ -667,7 +681,7 @@ void __init paging_init(void)
free_area_init_node(0, &contig_page_data, zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
}
-#endif /* CONFIG_DISCONTIGMEM */
+#endif /* ! CONFIG_NEED_MULTIPLE_NODES */

static struct kcore_list kcore_vmem;

@@ -698,7 +712,7 @@ module_init(setup_kcore);

void __init mem_init(void)
{
-#ifdef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_NEED_MULTIPLE_NODES
int nid;
#endif
pg_data_t *pgdat;
@@ -709,7 +723,7 @@ void __init mem_init(void)
num_physpages = max_low_pfn; /* RAM is assumed contiguous */
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);

-#ifdef CONFIG_DISCONTIGMEM
+#ifdef CONFIG_NEED_MULTIPLE_NODES
for_each_online_node(nid) {
if (NODE_DATA(nid)->node_spanned_pages != 0) {
printk("freeing bootmem node %x\n", nid);
diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/arch/ppc64/mm/Makefile current/arch/ppc64/mm/Makefile
--- reference/arch/ppc64/mm/Makefile 2005-01-21 14:04:09.000000000 +0000
+++ current/arch/ppc64/mm/Makefile 2005-05-04 20:54:54.000000000 +0100
@@ -6,6 +6,6 @@ EXTRA_CFLAGS += -mno-minimal-toc

obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \
slb_low.o slb.o stab.o mmap.o
-obj-$(CONFIG_DISCONTIGMEM) += numa.o
+obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o
diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/include/asm-ppc64/mmzone.h current/include/asm-ppc64/mmzone.h
--- reference/include/asm-ppc64/mmzone.h 2005-05-04 20:54:50.000000000 +0100
+++ current/include/asm-ppc64/mmzone.h 2005-05-04 20:54:54.000000000 +0100
@@ -10,9 +10,20 @@
#include <linux/config.h>
#include <asm/smp.h>

-#ifdef CONFIG_DISCONTIGMEM
+/* generic non-linear memory support:
+ *
+ * 1) we will not split memory into more chunks than will fit into the
+ * flags field of the struct page
+ */
+
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES

extern struct pglist_data *node_data[];
+/*
+ * Return a pointer to the node data for node n.
+ */
+#define NODE_DATA(nid) (node_data[nid])

/*
* Following are specific to this numa platform.
@@ -47,30 +58,27 @@ static inline int pa_to_nid(unsigned lon
return nid;
}

-#define pfn_to_nid(pfn) pa_to_nid((pfn) << PAGE_SHIFT)
-
-/*
- * Return a pointer to the node data for node n.
- */
-#define NODE_DATA(nid) (node_data[nid])
-
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)

/*
* Following are macros that each numa implmentation must define.
*/

-/*
- * Given a kernel address, find the home node of the underlying memory.
- */
-#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr))
-
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn)

#define local_mapnr(kvaddr) \
( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr))

+#ifdef CONFIG_DISCONTIGMEM
+
+/*
+ * Given a kernel address, find the home node of the underlying memory.
+ */
+#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr))
+
+#define pfn_to_nid(pfn) pa_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
+
/* Written this way to avoid evaluating arguments twice */
#define discontigmem_pfn_to_page(pfn) \
({ \
@@ -91,6 +99,8 @@ static inline int pa_to_nid(unsigned lon

#endif /* CONFIG_DISCONTIGMEM */

+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
#define early_pfn_to_nid(pfn) pa_to_nid(((unsigned long)pfn) << PAGE_SHIFT)
#endif
diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/include/asm-ppc64/page.h current/include/asm-ppc64/page.h
--- reference/include/asm-ppc64/page.h 2005-04-11 19:33:45.000000000 +0100
+++ current/include/asm-ppc64/page.h 2005-05-04 20:54:54.000000000 +0100
@@ -224,7 +224,8 @@ extern u64 ppc64_pft_size; /* Log 2 of
#define page_to_pfn(page) discontigmem_page_to_pfn(page)
#define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn)
#define pfn_valid(pfn) discontigmem_pfn_valid(pfn)
-#else
+#endif
+#ifdef CONFIG_FLATMEM
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define pfn_valid(pfn) ((pfn) < max_mapnr)
diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/include/asm-ppc64/sparsemem.h current/include/asm-ppc64/sparsemem.h
--- reference/include/asm-ppc64/sparsemem.h 1970-01-01 01:00:00.000000000 +0100
+++ current/include/asm-ppc64/sparsemem.h 2005-05-04 20:54:54.000000000 +0100
@@ -0,0 +1,16 @@
+#ifndef _ASM_PPC64_SPARSEMEM_H
+#define _ASM_PPC64_SPARSEMEM_H 1
+
+#ifdef CONFIG_SPARSEMEM
+/*
+ * SECTION_SIZE_BITS 2^N: how big each section will be
+ * MAX_PHYSADDR_BITS 2^N: how much physical address space we have
+ * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
+ */
+#define SECTION_SIZE_BITS 24
+#define MAX_PHYSADDR_BITS 38
+#define MAX_PHYSMEM_BITS 36
+
+#endif /* CONFIG_SPARSEMEM */
+
+#endif /* _ASM_PPC64_SPARSEMEM_H */


2005-05-05 02:33:43

by Olof Johansson

[permalink] [raw]
Subject: Re: [3/3] sparsemem memory model for ppc64

Hi,

Just two formatting nitpicks below.

-Olof

On Wed, May 04, 2005 at 09:30:57PM +0100, Andy Whitcroft wrote:
> diff -X /home/apw/brief/lib/vdiff.excl -rupN reference/arch/ppc64/mm/init.c current/arch/ppc64/mm/init.c
> --- reference/arch/ppc64/mm/init.c 2005-05-04 20:54:20.000000000 +0100
> +++ current/arch/ppc64/mm/init.c 2005-05-04 20:54:54.000000000 +0100
[...]
> @@ -623,12 +631,18 @@ void __init do_init_bootmem(void)
>
> max_pfn = max_low_pfn;
>
> - /* add all physical memory to the bootmem map. Also find the first */
> + /* add all physical memory to the bootmem map. Also, find the first
> + * presence of all LMBs*/

CodingStyle: */ on new line

> for (i=0; i < lmb.memory.cnt; i++) {
> unsigned long physbase, size;
>
> physbase = lmb.memory.region[i].physbase;
> size = lmb.memory.region[i].size;
> + if (i) { /* already created mappings for first LMB */
> + start_pfn = physbase >> PAGE_SHIFT;
> + end_pfn = start_pfn + (size >> PAGE_SHIFT);

Comment on new line indented, please


-Olof

2005-05-05 17:38:09

by Andy Whitcroft

[permalink] [raw]
Subject: Re: [3/3] sparsemem memory model for ppc64

Olof Johansson wrote:
> Hi,
>
> Just two formatting nitpicks below.

Thanks, this would be better served by rewriting the first comment and
removing the second all together.

/* Add all physical memory to the bootmem map, mark each area
* present. The first block has already been marked present above.
*/

I note that the diff in question has sneaked into the wrong patch, that
segement represents memory_present. So I'll rediff them with it there.
No overall change to the code.

-apw

2005-05-05 17:53:37

by Mike Kravetz

[permalink] [raw]
Subject: Re: [3/3] sparsemem memory model for ppc64

On Wed, May 04, 2005 at 09:30:57PM +0100, Andy Whitcroft wrote:
> + /*
> + * Note presence of first (logical/coalasced) LMB which will
> + * contain RMO region
> + */
> + start_pfn = lmb.memory.region[0].physbase >> PAGE_SHIFT;
> + end_pfn = start_pfn + (lmb.memory.region[0].size >> PAGE_SHIFT);
> + memory_present(0, start_pfn, end_pfn);

I need to take a close look at this again, but I think this special
handling for the RMO region in unnecessary. I added it in the 'early
days of SPARSE' when there were some 'bootstrap' issues and we needed
to initialize some memory before setting up the bootmem bitmap. I'm
pretty sure all those issues have gone away.

--
Mike

2005-05-09 15:49:52

by Andy Whitcroft

[permalink] [raw]
Subject: sparsemem ppc64 tidy flat memory comments and fix benign mempresent call

I was going to rediff the memory present patches, but as -mm has
picked these up already here is a simple patch to clean up this
errant comment and address a benign call to memory_present().
Applies onto the existing patches.

-apw

Tidy up the comments for the ppc64 flat memory support and fix
a currently benign double call to memory_present() for the first
memory block.

Signed-off-by: Andy Whitcroft <[email protected]>

---
init.c | 9 +++++----
1 files changed, 5 insertions(+), 4 deletions(-)

diff -upN reference/arch/ppc64/mm/init.c current/arch/ppc64/mm/init.c
--- reference/arch/ppc64/mm/init.c
+++ current/arch/ppc64/mm/init.c
@@ -631,18 +631,19 @@ void __init do_init_bootmem(void)

max_pfn = max_low_pfn;

- /* add all physical memory to the bootmem map. Also, find the first
- * presence of all LMBs*/
+ /* Add all physical memory to the bootmem map, mark each area
+ * present. The first block has already been marked present above.
+ */
for (i=0; i < lmb.memory.cnt; i++) {
unsigned long physbase, size;

physbase = lmb.memory.region[i].physbase;
size = lmb.memory.region[i].size;
- if (i) { /* already created mappings for first LMB */
+ if (i) {
start_pfn = physbase >> PAGE_SHIFT;
end_pfn = start_pfn + (size >> PAGE_SHIFT);
+ memory_present(0, start_pfn, end_pfn);
}
- memory_present(0, start_pfn, end_pfn);
free_bootmem(physbase, size);
}

2005-05-09 23:04:23

by Joel Schopp

[permalink] [raw]
Subject: Re: sparsemem ppc64 tidy flat memory comments and fix benign mempresent call


> diff -upN reference/arch/ppc64/mm/init.c current/arch/ppc64/mm/init.c
> --- reference/arch/ppc64/mm/init.c
> +++ current/arch/ppc64/mm/init.c
> @@ -631,18 +631,19 @@ void __init do_init_bootmem(void)
>
> max_pfn = max_low_pfn;
>
> - /* add all physical memory to the bootmem map. Also, find the first
> - * presence of all LMBs*/
> + /* Add all physical memory to the bootmem map, mark each area
> + * present. The first block has already been marked present above.
> + */
> for (i=0; i < lmb.memory.cnt; i++) {
> unsigned long physbase, size;
>
> physbase = lmb.memory.region[i].physbase;
> size = lmb.memory.region[i].size;
> - if (i) { /* already created mappings for first LMB */
> + if (i) {
> start_pfn = physbase >> PAGE_SHIFT;
> end_pfn = start_pfn + (size >> PAGE_SHIFT);
> + memory_present(0, start_pfn, end_pfn);
> }
> - memory_present(0, start_pfn, end_pfn);
> free_bootmem(physbase, size);
> }

Instead of moving all that around why don't we just drop the duplicate
and the if altogether? I tested and sent a patch back in March that
cleaned up the non-numa case pretty well.

http://sourceforge.net/mailarchive/message.php?msg_id=11320001

2005-05-10 15:48:10

by Andy Whitcroft

[permalink] [raw]
Subject: Re: sparsemem ppc64 tidy flat memory comments and fix benign mempresent call

Testing seems to confirm that we do not need to handle the first memory
block specially in do_init_bootmem.

Signed-off-by: Andy Whitcroft <[email protected]>

diffstat sparsemem-ppc64-flat-first-block-is-not-special
---

diff -upN reference/arch/ppc64/mm/init.c current/arch/ppc64/mm/init.c
--- reference/arch/ppc64/mm/init.c
+++ current/arch/ppc64/mm/init.c
@@ -612,14 +612,6 @@ void __init do_init_bootmem(void)
unsigned long start, bootmap_pages;
unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
int boot_mapsize;
- unsigned long start_pfn, end_pfn;
- /*
- * Note presence of first (logical/coalasced) LMB which will
- * contain RMO region
- */
- start_pfn = lmb.memory.region[0].physbase >> PAGE_SHIFT;
- end_pfn = start_pfn + (lmb.memory.region[0].size >> PAGE_SHIFT);
- memory_present(0, start_pfn, end_pfn);

/*
* Find an area to use for the bootmem bitmap. Calculate the size of
@@ -636,18 +628,19 @@ void __init do_init_bootmem(void)
max_pfn = max_low_pfn;

/* Add all physical memory to the bootmem map, mark each area
- * present. The first block has already been marked present above.
+ * present.
*/
for (i=0; i < lmb.memory.cnt; i++) {
unsigned long physbase, size;
+ unsigned long start_pfn, end_pfn;

physbase = lmb.memory.region[i].physbase;
size = lmb.memory.region[i].size;
- if (i) {
- start_pfn = physbase >> PAGE_SHIFT;
- end_pfn = start_pfn + (size >> PAGE_SHIFT);
- memory_present(0, start_pfn, end_pfn);
- }
+
+ start_pfn = physbase >> PAGE_SHIFT;
+ end_pfn = start_pfn + (size >> PAGE_SHIFT);
+ memory_present(0, start_pfn, end_pfn);
+
free_bootmem(physbase, size);
}


Attachments:
sparsemem-ppc64-flat-first-block-is-not-special (1.62 kB)

2005-05-10 19:42:37

by Mike Kravetz

[permalink] [raw]
Subject: Re: sparsemem ppc64 tidy flat memory comments and fix benign mempresent call

On Tue, May 10, 2005 at 04:45:48PM +0100, Andy Whitcroft wrote:
> Joel, Mike, Dave could you test this one on your platforms to confirm
> its widly applicable, if so we can push it up to -mm.

It works on my machine with various config options.

--
Mike

2005-05-17 13:09:16

by Andy Whitcroft

[permalink] [raw]
Subject: [PATCH] sparsemem-ppc64-flat-first-block-is-not-special

Ok. Testing seems to show that indeed the initial memory blocks
do not need to be treated specially on ppc64 non-numa systems.
Andrew could you add this to the sparsemem patches please.
Applies on top of 2.6.12-rc4-mm2.

-apw

Testing seems to confirm that we do not need to handle the first memory
block specially in do_init_bootmem.

Signed-off-by: Andy Whitcroft <[email protected]>

diffstat sparsemem-ppc64-flat-first-block-is-not-special
---
init.c | 21 +++++++--------------
1 files changed, 7 insertions(+), 14 deletions(-)

diff -upN reference/arch/ppc64/mm/init.c current/arch/ppc64/mm/init.c
--- reference/arch/ppc64/mm/init.c
+++ current/arch/ppc64/mm/init.c
@@ -538,14 +538,6 @@ void __init do_init_bootmem(void)
unsigned long start, bootmap_pages;
unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT;
int boot_mapsize;
- unsigned long start_pfn, end_pfn;
- /*
- * Note presence of first (logical/coalasced) LMB which will
- * contain RMO region
- */
- start_pfn = lmb.memory.region[0].physbase >> PAGE_SHIFT;
- end_pfn = start_pfn + (lmb.memory.region[0].size >> PAGE_SHIFT);
- memory_present(0, start_pfn, end_pfn);

/*
* Find an area to use for the bootmem bitmap. Calculate the size of
@@ -562,18 +554,19 @@ void __init do_init_bootmem(void)
max_pfn = max_low_pfn;

/* Add all physical memory to the bootmem map, mark each area
- * present. The first block has already been marked present above.
+ * present.
*/
for (i=0; i < lmb.memory.cnt; i++) {
unsigned long physbase, size;
+ unsigned long start_pfn, end_pfn;

physbase = lmb.memory.region[i].physbase;
size = lmb.memory.region[i].size;
- if (i) {
- start_pfn = physbase >> PAGE_SHIFT;
- end_pfn = start_pfn + (size >> PAGE_SHIFT);
- memory_present(0, start_pfn, end_pfn);
- }
+
+ start_pfn = physbase >> PAGE_SHIFT;
+ end_pfn = start_pfn + (size >> PAGE_SHIFT);
+ memory_present(0, start_pfn, end_pfn);
+
free_bootmem(physbase, size);
}