Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
where possible") optimized the loop in memmap_init_zone(). But it causes
possible panic bug. So Daniel Vacek reverted it later.
But as suggested by Daniel Vacek, it is fine to using memblock to skip
gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
More from what Daniel said:
"On arm and arm64, memblock is used by default. But generic version of
pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
not always return the next valid one but skips more resulting in some
valid frames to be skipped (as if they were invalid). And that's why
kernel was eventually crashing on some !arm machines."
About the performance consideration:
As said by James in b92df1de5,
"I have tested this patch on a virtual model of a Samurai CPU with a
sparse memory map. The kernel boot time drops from 109 to 62 seconds."
Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
Besides we can remain memblock_next_valid_pfn, there is still some room
for improvement. After this set, I can see the time overhead of memmap_init
is reduced from 27956us to 13537us in my armv8a server(QDF2400 with 96G
memory, pagesize 64k). I believe arm server will benefit more if memory is
larger than TBs
Patch 1 introduces new config to make codes more generic
Patch 2 remains the memblock_next_valid_pfn on arm and arm64,this patch is
originated from b92df1de5
Patch 3 optimizes the memblock_next_valid_pfn()
Patch 4~6 optimizes the early_pfn_valid()
Changelog:
V10:- move codes to memblock.c, refine the performance consideration
V9: - rebase to mmotm master, refine the log description. No major changes
V8: - introduce new config and move generic code to early_pfn.h
- optimize memblock_next_valid_pfn as suggested by Matthew Wilcox
V7: - fix i386 compilation error. refine the commit description
V6: - simplify the codes, move arm/arm64 common codes to one file.
- refine patches as suggested by Danial Vacek and Ard Biesheuvel
V5: - further refining as suggested by Danial Vacek. Make codes
arm/arm64 more arch specific
V4: - refine patches as suggested by Danial Vacek and Wei Yang
- optimized on arm besides arm64
V3: - fix 2 issues reported by kbuild test robot
V2: - rebase to mmotm latest
- remain memblock_next_valid_pfn on arm64
- refine memblock_search_pfn_regions and pfn_valid_region
Jia He (6):
arm: arm64: introduce CONFIG_HAVE_MEMBLOCK_PFN_VALID
mm: page_alloc: remain memblock_next_valid_pfn() on arm/arm64
mm: page_alloc: reduce unnecessary binary search in
memblock_next_valid_pfn()
mm/memblock: introduce memblock_search_pfn_regions()
mm/memblock: introduce pfn_valid_region()
mm: page_alloc: reduce unnecessary binary search in early_pfn_valid()
arch/arm/Kconfig | 4 +++
arch/arm64/Kconfig | 4 +++
include/linux/memblock.h | 2 ++
include/linux/mmzone.h | 16 +++++++++
mm/Kconfig | 3 ++
mm/memblock.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 5 ++-
7 files changed, 117 insertions(+), 1 deletion(-)
--
1.8.3.1
From: Jia He <[email protected]>
Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
where possible") optimized the loop in memmap_init_zone(). But it causes
possible panic bug. So Daniel Vacek reverted it later.
But as suggested by Daniel Vacek, it is fine to using memblock to skip
gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
Daniel said:
"On arm and arm64, memblock is used by default. But generic version of
pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
not always return the next valid one but skips more resulting in some
valid frames to be skipped (as if they were invalid). And that's why
kernel was eventually crashing on some !arm machines."
About the performance consideration:
As said by James in b92df1de5,
"I have tested this patch on a virtual model of a Samurai CPU
with a sparse memory map. The kernel boot time drops from 109 to
62 seconds."
Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
Suggested-by: Daniel Vacek <[email protected]>
Signed-off-by: Jia He <[email protected]>
---
include/linux/mmzone.h | 11 +++++++++++
mm/memblock.c | 30 ++++++++++++++++++++++++++++++
mm/page_alloc.c | 5 ++++-
3 files changed, 45 insertions(+), 1 deletion(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 32699b2..57cdc42 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1241,6 +1241,8 @@ static inline int pfn_valid(unsigned long pfn)
return 0;
return valid_section(__nr_to_section(pfn_to_section_nr(pfn)));
}
+
+#define next_valid_pfn(pfn) (pfn + 1)
#endif
static inline int pfn_present(unsigned long pfn)
@@ -1266,6 +1268,10 @@ static inline int pfn_present(unsigned long pfn)
#endif
#define early_pfn_valid(pfn) pfn_valid(pfn)
+#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
+extern ulong memblock_next_valid_pfn(ulong pfn);
+#define next_valid_pfn(pfn) memblock_next_valid_pfn(pfn)
+#endif
void sparse_init(void);
#else
#define sparse_init() do {} while (0)
@@ -1287,6 +1293,11 @@ struct mminit_pfnnid_cache {
#define early_pfn_valid(pfn) (1)
#endif
+/* fallback to default definitions*/
+#ifndef next_valid_pfn
+#define next_valid_pfn(pfn) (pfn + 1)
+#endif
+
void memory_present(int nid, unsigned long start, unsigned long end);
/*
diff --git a/mm/memblock.c b/mm/memblock.c
index b9cdfa0..ccad225 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1139,6 +1139,36 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
}
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
+ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
+{
+ struct memblock_type *type = &memblock.memory;
+ unsigned int right = type->cnt;
+ unsigned int mid, left = 0;
+ phys_addr_t addr = PFN_PHYS(++pfn);
+
+ do {
+ mid = (right + left) / 2;
+
+ if (addr < type->regions[mid].base)
+ right = mid;
+ else if (addr >= (type->regions[mid].base +
+ type->regions[mid].size))
+ left = mid + 1;
+ else {
+ /* addr is within the region, so pfn is valid */
+ return pfn;
+ }
+ } while (left < right);
+
+ if (right == type->cnt)
+ return -1UL;
+ else
+ return PHYS_PFN(type->regions[right].base);
+}
+EXPORT_SYMBOL(memblock_next_valid_pfn);
+#endif /*CONFIG_HAVE_MEMBLOCK_PFN_VALID*/
+
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
phys_addr_t end, int nid, ulong flags)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cd3c7b9..607deff 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5485,8 +5485,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
if (context != MEMMAP_EARLY)
goto not_early;
- if (!early_pfn_valid(pfn))
+ if (!early_pfn_valid(pfn)) {
+ pfn = next_valid_pfn(pfn) - 1;
continue;
+ }
+
if (!early_pfn_in_nid(pfn, nid))
continue;
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
--
1.8.3.1
Make CONFIG_HAVE_MEMBLOCK_PFN_VALID a new config option so it can move
memblock_next_valid_pfn to generic code file. All the latter optimizations
are based on this config.
The memblock initialization time on arm/arm64 can benefit from this.
Signed-off-by: Jia He <[email protected]>
Reviewed-by: Pavel Tatashin <[email protected]>
---
arch/arm/Kconfig | 4 ++++
arch/arm64/Kconfig | 4 ++++
mm/Kconfig | 3 +++
3 files changed, 11 insertions(+)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 843edfd..7ea2636 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1642,6 +1642,10 @@ config ARCH_SELECT_MEMORY_MODEL
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
+config HAVE_MEMBLOCK_PFN_VALID
+ def_bool y
+ depends on HAVE_ARCH_PFN_VALID
+
config HAVE_GENERIC_GUP
def_bool y
depends on ARM_LPAE
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 42c090c..26d75f4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -778,6 +778,10 @@ config ARCH_SELECT_MEMORY_MODEL
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
+config HAVE_MEMBLOCK_PFN_VALID
+ def_bool y
+ depends on HAVE_ARCH_PFN_VALID
+
config HW_PERF_EVENTS
def_bool y
depends on ARM_PMU
diff --git a/mm/Kconfig b/mm/Kconfig
index 94af022..28fcf54 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -137,6 +137,9 @@ config HAVE_MEMBLOCK_NODE_MAP
config HAVE_MEMBLOCK_PHYS_MAP
bool
+config HAVE_MEMBLOCK_PFN_VALID
+ bool
+
config HAVE_GENERIC_GUP
bool
--
1.8.3.1
This helper is to find the memory region index of input pfn.
Signed-off-by: Jia He <[email protected]>
---
include/linux/memblock.h | 2 ++
mm/memblock.c | 9 +++++++++
2 files changed, 11 insertions(+)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index ca59883..b0f0307 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -203,6 +203,8 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+int memblock_search_pfn_regions(unsigned long pfn);
+
/**
* for_each_free_mem_range - iterate through free memblock areas
* @i: u64 used as loop variable
diff --git a/mm/memblock.c b/mm/memblock.c
index 84f7fa7..c783b1a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1676,6 +1676,15 @@ static int __init_memblock memblock_search(struct memblock_type *type, phys_addr
return -1;
}
+/* search memblock with the input pfn, return the region idx */
+int __init_memblock memblock_search_pfn_regions(unsigned long pfn)
+{
+ struct memblock_type *type = &memblock.memory;
+ int mid = memblock_search(type, PFN_PHYS(pfn));
+
+ return mid;
+}
+
bool __init memblock_is_reserved(phys_addr_t addr)
{
return memblock_search(&memblock.reserved, addr) != -1;
--
1.8.3.1
From: Jia He <[email protected]>
Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
where possible") optimized the loop in memmap_init_zone(). But there is
still some room for improvement.
E.g. if pfn and pfn+1 are in the same memblock region, we can simply pfn++
instead of doing the binary search in memblock_next_valid_pfn.
Furthermore, if the pfn is in a gap of two memory region, skip to next
region directly if possible.
Attached the memblock region information in my server.
[ 0.000000] Zone ranges:
[ 0.000000] DMA32 [mem 0x0000000000200000-0x00000000ffffffff]
[ 0.000000] Normal [mem 0x0000000100000000-0x00000017ffffffff]
[ 0.000000] Movable zone start for each node
[ 0.000000] Early memory node ranges
[ 0.000000] node 0: [mem 0x0000000000200000-0x000000000021ffff]
[ 0.000000] node 0: [mem 0x0000000000820000-0x000000000307ffff]
[ 0.000000] node 0: [mem 0x0000000003080000-0x000000000308ffff]
[ 0.000000] node 0: [mem 0x0000000003090000-0x00000000031fffff]
[ 0.000000] node 0: [mem 0x0000000003200000-0x00000000033fffff]
[ 0.000000] node 0: [mem 0x0000000003410000-0x00000000034fffff]
[ 0.000000] node 0: [mem 0x0000000003500000-0x000000000351ffff]
[ 0.000000] node 0: [mem 0x0000000003520000-0x000000000353ffff]
[ 0.000000] node 0: [mem 0x0000000003540000-0x0000000003e3ffff]
[ 0.000000] node 0: [mem 0x0000000003e40000-0x0000000003e7ffff]
[ 0.000000] node 0: [mem 0x0000000003e80000-0x0000000003ecffff]
[ 0.000000] node 0: [mem 0x0000000003ed0000-0x0000000003ed5fff]
[ 0.000000] node 0: [mem 0x0000000003ed6000-0x0000000006eeafff]
[ 0.000000] node 0: [mem 0x0000000006eeb000-0x000000000710ffff]
[ 0.000000] node 0: [mem 0x0000000007110000-0x0000000007f0ffff]
[ 0.000000] node 0: [mem 0x0000000007f10000-0x0000000007faffff]
[ 0.000000] node 0: [mem 0x0000000007fb0000-0x000000000806ffff]
[ 0.000000] node 0: [mem 0x0000000008070000-0x00000000080affff]
[ 0.000000] node 0: [mem 0x00000000080b0000-0x000000000832ffff]
[ 0.000000] node 0: [mem 0x0000000008330000-0x000000000836ffff]
[ 0.000000] node 0: [mem 0x0000000008370000-0x000000000838ffff]
[ 0.000000] node 0: [mem 0x0000000008390000-0x00000000083a9fff]
[ 0.000000] node 0: [mem 0x00000000083aa000-0x00000000083bbfff]
[ 0.000000] node 0: [mem 0x00000000083bc000-0x00000000083fffff]
[ 0.000000] node 0: [mem 0x0000000008400000-0x000000000841ffff]
[ 0.000000] node 0: [mem 0x0000000008420000-0x000000000843ffff]
[ 0.000000] node 0: [mem 0x0000000008440000-0x000000000865ffff]
[ 0.000000] node 0: [mem 0x0000000008660000-0x000000000869ffff]
[ 0.000000] node 0: [mem 0x00000000086a0000-0x00000000086affff]
[ 0.000000] node 0: [mem 0x00000000086b0000-0x00000000086effff]
[ 0.000000] node 0: [mem 0x00000000086f0000-0x0000000008b6ffff]
[ 0.000000] node 0: [mem 0x0000000008b70000-0x0000000008bbffff]
[ 0.000000] node 0: [mem 0x0000000008bc0000-0x0000000008edffff]
[ 0.000000] node 0: [mem 0x0000000008ee0000-0x0000000008ee0fff]
[ 0.000000] node 0: [mem 0x0000000008ee1000-0x0000000008ee2fff]
[ 0.000000] node 0: [mem 0x0000000008ee3000-0x000000000decffff]
[ 0.000000] node 0: [mem 0x000000000ded0000-0x000000000defffff]
[ 0.000000] node 0: [mem 0x000000000df00000-0x000000000fffffff]
[ 0.000000] node 0: [mem 0x0000000010800000-0x0000000017feffff]
[ 0.000000] node 0: [mem 0x000000001c000000-0x000000001c00ffff]
[ 0.000000] node 0: [mem 0x000000001c010000-0x000000001c7fffff]
[ 0.000000] node 0: [mem 0x000000001c810000-0x000000007efbffff]
[ 0.000000] node 0: [mem 0x000000007efc0000-0x000000007efdffff]
[ 0.000000] node 0: [mem 0x000000007efe0000-0x000000007efeffff]
[ 0.000000] node 0: [mem 0x000000007eff0000-0x000000007effffff]
[ 0.000000] node 0: [mem 0x000000007f000000-0x00000017ffffffff]
[ 0.000000] Initmem setup node 0 [mem
0x0000000000200000-0x00000017ffffffff]
[ 0.000000] On node 0 totalpages: 25145296
[ 0.000000] DMA32 zone: 16376 pages used for memmap
[ 0.000000] DMA32 zone: 0 pages reserved
[ 0.000000] DMA32 zone: 1028048 pages, LIFO batch:31
[ 0.000000] Normal zone: 376832 pages used for memmap
[ 0.000000] Normal zone: 24117248 pages, LIFO batch:31
Signed-off-by: Jia He <[email protected]>
---
mm/memblock.c | 37 +++++++++++++++++++++++++++++--------
1 file changed, 29 insertions(+), 8 deletions(-)
diff --git a/mm/memblock.c b/mm/memblock.c
index ccad225..84f7fa7 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1140,31 +1140,52 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
+static int early_region_idx __init_memblock = -1;
ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
{
struct memblock_type *type = &memblock.memory;
- unsigned int right = type->cnt;
- unsigned int mid, left = 0;
+ struct memblock_region *regions = type->regions;
+ uint right = type->cnt;
+ uint mid, left = 0;
+ ulong start_pfn, end_pfn, next_start_pfn;
phys_addr_t addr = PFN_PHYS(++pfn);
+ /* fast path, return pfn+1 if next pfn is in the same region */
+ if (early_region_idx != -1) {
+ start_pfn = PFN_DOWN(regions[early_region_idx].base);
+ end_pfn = PFN_DOWN(regions[early_region_idx].base +
+ regions[early_region_idx].size);
+
+ if (pfn >= start_pfn && pfn < end_pfn)
+ return pfn;
+
+ early_region_idx++;
+ next_start_pfn = PFN_DOWN(regions[early_region_idx].base);
+
+ if (pfn >= end_pfn && pfn <= next_start_pfn)
+ return next_start_pfn;
+ }
+
+ /* slow path, do the binary searching */
do {
mid = (right + left) / 2;
- if (addr < type->regions[mid].base)
+ if (addr < regions[mid].base)
right = mid;
- else if (addr >= (type->regions[mid].base +
- type->regions[mid].size))
+ else if (addr >= (regions[mid].base + regions[mid].size))
left = mid + 1;
else {
- /* addr is within the region, so pfn is valid */
+ early_region_idx = mid;
return pfn;
}
} while (left < right);
if (right == type->cnt)
return -1UL;
- else
- return PHYS_PFN(type->regions[right].base);
+
+ early_region_idx = right;
+
+ return PHYS_PFN(regions[early_region_idx].base);
}
EXPORT_SYMBOL(memblock_next_valid_pfn);
#endif /*CONFIG_HAVE_MEMBLOCK_PFN_VALID*/
--
1.8.3.1
From: Jia He <[email protected]>
Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
where possible") optimized the loop in memmap_init_zone(). But there is
still some room for improvement.
E.g. in early_pfn_valid(), we can record the last returned memblock region.
If current pfn and last pfn are in the same memory region, we needn't do
the unnecessary binary searches because memblock_is_nomap is the same
result for whole memory region.
Signed-off-by: Jia He <[email protected]>
---
mm/memblock.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/mm/memblock.c b/mm/memblock.c
index c783b1a..274bd9f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1188,6 +1188,30 @@ ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
return PHYS_PFN(regions[early_region_idx].base);
}
EXPORT_SYMBOL(memblock_next_valid_pfn);
+
+int pfn_valid_region(ulong pfn)
+{
+ ulong start_pfn, end_pfn;
+ struct memblock_type *type = &memblock.memory;
+ struct memblock_region *regions = type->regions;
+
+ if (early_region_idx != -1) {
+ start_pfn = PFN_DOWN(regions[early_region_idx].base);
+ end_pfn = PFN_DOWN(regions[early_region_idx].base +
+ regions[early_region_idx].size);
+
+ if (pfn >= start_pfn && pfn < end_pfn)
+ return !memblock_is_nomap(
+ ®ions[early_region_idx]);
+ }
+
+ early_region_idx = memblock_search_pfn_regions(pfn);
+ if (early_region_idx == -1)
+ return false;
+
+ return !memblock_is_nomap(®ions[early_region_idx]);
+}
+EXPORT_SYMBOL(pfn_valid_region);
#endif /*CONFIG_HAVE_MEMBLOCK_PFN_VALID*/
static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
--
1.8.3.1
Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
where possible") optimized the loop in memmap_init_zone(). But there is
still some room for improvement. E.g. in early_pfn_valid(), if pfn and
pfn+1 are in the same memblock region, we can record the last returned
memblock region index and check whether pfn++ is still in the same
region.
Currently it only improve the performance on arm/arm64 and will have no
impact on other arches.
For the performance improvement, after this set, I can see the time
overhead of memmap_init() is reduced from 27956us to 13537us in my
armv8a server(QDF2400 with 96G memory, pagesize 64k).
Signed-off-by: Jia He <[email protected]>
---
include/linux/mmzone.h | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 57cdc42..83b1d11 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1267,11 +1267,16 @@ static inline int pfn_present(unsigned long pfn)
#define pfn_to_nid(pfn) (0)
#endif
-#define early_pfn_valid(pfn) pfn_valid(pfn)
#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
extern ulong memblock_next_valid_pfn(ulong pfn);
#define next_valid_pfn(pfn) memblock_next_valid_pfn(pfn)
-#endif
+
+extern int pfn_valid_region(ulong pfn);
+#define early_pfn_valid(pfn) pfn_valid_region(pfn)
+#else
+#define early_pfn_valid(pfn) pfn_valid(pfn)
+#endif /*CONFIG_HAVE_ARCH_PFN_VALID*/
+
void sparse_init(void);
#else
#define sparse_init() do {} while (0)
--
1.8.3.1
On Fri, 6 Jul 2018 17:01:11 +0800 Jia He <[email protected]> wrote:
> From: Jia He <[email protected]>
>
> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
> where possible") optimized the loop in memmap_init_zone(). But it causes
> possible panic bug. So Daniel Vacek reverted it later.
>
> But as suggested by Daniel Vacek, it is fine to using memblock to skip
> gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
> Daniel said:
> "On arm and arm64, memblock is used by default. But generic version of
> pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
> not always return the next valid one but skips more resulting in some
> valid frames to be skipped (as if they were invalid). And that's why
> kernel was eventually crashing on some !arm machines."
>
> About the performance consideration:
> As said by James in b92df1de5,
> "I have tested this patch on a virtual model of a Samurai CPU
> with a sparse memory map. The kernel boot time drops from 109 to
> 62 seconds."
>
> Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
>
We're making a bit of a mess here. mmzone.h:
...
#ifndef CONFIG_HAVE_ARCH_PFN_VALID
...
#define next_valid_pfn(pfn) (pfn + 1)
#endif
...
#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
#define next_valid_pfn(pfn) memblock_next_valid_pfn(pfn)
...
#else
...
#ifndef next_valid_pfn
#define next_valid_pfn(pfn) (pfn + 1)
#endif
I guess it works OK, since CONFIG_HAVE_MEMBLOCK_PFN_VALID depends on
CONFIG_HAVE_ARCH_PFN_VALID. But it could all do with some cleanup and
modernization.
- Perhaps memblock_next_valid_pfn() should just be called
pfn_valid(). So the header file's responsibility is to provide
pfn_valid() and next_valid_pfn().
- CONFIG_HAVE_ARCH_PFN_VALID should go away. The current way of
doing such thnigs is for the arch (or some Kconfig combination) to
define pfn_valid() and next_valid_pfn() in some fashion and to then
ensure that one of them is #defined to something, to indicate that
both of these have been set up. Or something like that.
Secondly, in memmap_init_zone()
> - if (!early_pfn_valid(pfn))
> + if (!early_pfn_valid(pfn)) {
> + pfn = next_valid_pfn(pfn) - 1;
> continue;
> + }
> +
This is weird-looking. next_valid_pfn(pfn) is usually (pfn+1) so it's
a no-op. Sometimes we're calling memblock_next_valid_pfn() and then
backing up one, presumably because the `for' loop ends in `pfn++'. Or
something. Can this please be fully commented or cleaned up?
On Fri, 6 Jul 2018 17:01:09 +0800 Jia He <[email protected]> wrote:
> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
> where possible") optimized the loop in memmap_init_zone(). But it causes
> possible panic bug. So Daniel Vacek reverted it later.
>
> But as suggested by Daniel Vacek, it is fine to using memblock to skip
> gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
>
> More from what Daniel said:
> "On arm and arm64, memblock is used by default. But generic version of
> pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
> not always return the next valid one but skips more resulting in some
> valid frames to be skipped (as if they were invalid). And that's why
> kernel was eventually crashing on some !arm machines."
>
> About the performance consideration:
> As said by James in b92df1de5,
> "I have tested this patch on a virtual model of a Samurai CPU with a
> sparse memory map. The kernel boot time drops from 109 to 62 seconds."
> Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
>
> Besides we can remain memblock_next_valid_pfn, there is still some room
> for improvement. After this set, I can see the time overhead of memmap_init
> is reduced from 27956us to 13537us in my armv8a server(QDF2400 with 96G
> memory, pagesize 64k). I believe arm server will benefit more if memory is
> larger than TBs
It's a shame that we're at v10, still with very little evidence of
review activity.
Oh well, it's a nice speedup. I'll toss it in and see what happens,
but I'm not very familiar with memblock so we should try to find
reviewers, please.
Hi Andew
Thanks for the comments
On 7/7/2018 6:37 AM, Andrew Morton Wrote:
> On Fri, 6 Jul 2018 17:01:11 +0800 Jia He <[email protected]> wrote:
>
>> From: Jia He <[email protected]>
>>
>> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
>> where possible") optimized the loop in memmap_init_zone(). But it causes
>> possible panic bug. So Daniel Vacek reverted it later.
>>
>> But as suggested by Daniel Vacek, it is fine to using memblock to skip
>> gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
>> Daniel said:
>> "On arm and arm64, memblock is used by default. But generic version of
>> pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
>> not always return the next valid one but skips more resulting in some
>> valid frames to be skipped (as if they were invalid). And that's why
>> kernel was eventually crashing on some !arm machines."
>>
>> About the performance consideration:
>> As said by James in b92df1de5,
>> "I have tested this patch on a virtual model of a Samurai CPU
>> with a sparse memory map. The kernel boot time drops from 109 to
>> 62 seconds."
>>
>> Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
>>
>
> We're making a bit of a mess here. mmzone.h:
>
> ...
> #ifndef CONFIG_HAVE_ARCH_PFN_VALID
> ...
> #define next_valid_pfn(pfn) (pfn + 1)
Yes, ^ this line can be removed.
> #endif
> ...
> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
> #define next_valid_pfn(pfn) memblock_next_valid_pfn(pfn)
> ...
> #else
> ...
> #ifndef next_valid_pfn
> #define next_valid_pfn(pfn) (pfn + 1)
> #endif
>
> I guess it works OK, since CONFIG_HAVE_MEMBLOCK_PFN_VALID depends on
> CONFIG_HAVE_ARCH_PFN_VALID. But it could all do with some cleanup and
> modernization.
>
> - Perhaps memblock_next_valid_pfn() should just be called
> pfn_valid(). So the header file's responsibility is to provide
> pfn_valid() and next_valid_pfn().
>
> - CONFIG_HAVE_ARCH_PFN_VALID should go away. The current way of
> doing such thnigs is for the arch (or some Kconfig combination) to
> define pfn_valid() and next_valid_pfn() in some fashion and to then
> ensure that one of them is #defined to something, to indicate that
> both of these have been set up. Or something like that.
This is what I did in Patch v2, please see [1]. But Daniel opposed it [2]
As he said:
Now, if any other architecture defines CONFIG_HAVE_ARCH_PFN_VALID and
implements it's own version of pfn_valid(), there is no guarantee that
it will be based on memblock data or somehow equivalent to the arm
implementation, right?
I think it make sense, so I introduced the new config
CONFIG_HAVE_MEMBLOCK_PFN_VALID instead of using CONFIG_HAVE_ARCH_PFN_VALID
how about you ? :-)
[1] https://lkml.org/lkml/2018/3/24/71
[2] https://lkml.org/lkml/2018/3/28/231
>
>
> Secondly, in memmap_init_zone()
>
>> - if (!early_pfn_valid(pfn))
>> + if (!early_pfn_valid(pfn)) {
>> + pfn = next_valid_pfn(pfn) - 1;
>> continue;
>> + }
>> +
>
> This is weird-looking. next_valid_pfn(pfn) is usually (pfn+1) so it's
> a no-op. Sometimes we're calling memblock_next_valid_pfn() and then
> backing up one, presumably because the `for' loop ends in `pfn++'. Or
> something. Can this please be fully commented or cleaned up?
To clean it up, maybe below is not acceptable for you and other experts ?
if (!early_pfn_valid(pfn)) {
#ifndef XXX
continue;
}
#else
pfn = next_valid_pfn(pfn) - 1;
continue;
}
#endif
Another way which was suggested by Ard Biesheuvel
something like:
for (pfn = start_pfn; pfn < end_pfn; pfn = next_valid_pfn(pfn))
...
But it might have impact on memmap_init_zone loop.
E.g. context != MEMMAP_EARLY, pfn will not be checked by early_pfn_valid, thus
it will change the mem hotplug logic.
Sure, as you suggested, I can give more comments in all the cases of different
configs/arches for this line.
--
Cheers,
Jia
On Fri, 6 Jul 2018 17:01:09 +0800 Jia He <[email protected]> wrote:
> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
> where possible") optimized the loop in memmap_init_zone(). But it causes
> possible panic bug. So Daniel Vacek reverted it later.
>
> But as suggested by Daniel Vacek, it is fine to using memblock to skip
> gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
>
> More from what Daniel said:
> "On arm and arm64, memblock is used by default. But generic version of
> pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
> not always return the next valid one but skips more resulting in some
> valid frames to be skipped (as if they were invalid). And that's why
> kernel was eventually crashing on some !arm machines."
>
> About the performance consideration:
> As said by James in b92df1de5,
> "I have tested this patch on a virtual model of a Samurai CPU with a
> sparse memory map. The kernel boot time drops from 109 to 62 seconds."
> Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
>
> Besides we can remain memblock_next_valid_pfn, there is still some room
> for improvement. After this set, I can see the time overhead of memmap_init
> is reduced from 27956us to 13537us in my armv8a server(QDF2400 with 96G
> memory, pagesize 64k). I believe arm server will benefit more if memory is
> larger than TBs
This patchset is basically unreviewed at this stage. Could people
please find some time to check it carefully?
Thanks.
On 18-08-15 15:34:56, Andrew Morton wrote:
> On Fri, 6 Jul 2018 17:01:09 +0800 Jia He <[email protected]> wrote:
>
> > Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
> > where possible") optimized the loop in memmap_init_zone(). But it causes
> > possible panic bug. So Daniel Vacek reverted it later.
> >
> > But as suggested by Daniel Vacek, it is fine to using memblock to skip
> > gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
> >
> > More from what Daniel said:
> > "On arm and arm64, memblock is used by default. But generic version of
> > pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
> > not always return the next valid one but skips more resulting in some
> > valid frames to be skipped (as if they were invalid). And that's why
> > kernel was eventually crashing on some !arm machines."
> >
> > About the performance consideration:
> > As said by James in b92df1de5,
> > "I have tested this patch on a virtual model of a Samurai CPU with a
> > sparse memory map. The kernel boot time drops from 109 to 62 seconds."
> > Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
> >
> > Besides we can remain memblock_next_valid_pfn, there is still some room
> > for improvement. After this set, I can see the time overhead of memmap_init
> > is reduced from 27956us to 13537us in my armv8a server(QDF2400 with 96G
> > memory, pagesize 64k). I believe arm server will benefit more if memory is
> > larger than TBs
>
> This patchset is basically unreviewed at this stage. Could people
> please find some time to check it carefully?
Working on it.
Pavel
On 18-07-06 17:01:11, Jia He wrote:
> From: Jia He <[email protected]>
>
> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
> where possible") optimized the loop in memmap_init_zone(). But it causes
> possible panic bug. So Daniel Vacek reverted it later.
>
> But as suggested by Daniel Vacek, it is fine to using memblock to skip
> gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID.
> Daniel said:
> "On arm and arm64, memblock is used by default. But generic version of
> pfn_valid() is based on mem sections and memblock_next_valid_pfn() does
> not always return the next valid one but skips more resulting in some
> valid frames to be skipped (as if they were invalid). And that's why
> kernel was eventually crashing on some !arm machines."
>
> About the performance consideration:
> As said by James in b92df1de5,
> "I have tested this patch on a virtual model of a Samurai CPU
> with a sparse memory map. The kernel boot time drops from 109 to
> 62 seconds."
>
> Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64.
>
> Suggested-by: Daniel Vacek <[email protected]>
> Signed-off-by: Jia He <[email protected]>
The version of this patch in linux-next has few fixes, I reviewed that one
looks good to me.
Reviewed-by: Pavel Tatashin <[email protected]>
> Signed-off-by: Jia He <[email protected]>
> ---
> mm/memblock.c | 37 +++++++++++++++++++++++++++++--------
> 1 file changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/mm/memblock.c b/mm/memblock.c
> index ccad225..84f7fa7 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1140,31 +1140,52 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
> #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
>
> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
> +static int early_region_idx __init_memblock = -1;
One comment:
This should be __initdata, but even better bring it inside the function
as local static variable.
> ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
> {
Otherwise looks good:
Reviewed-by: Pavel Tatashin <[email protected]>
On 8/16/18 9:08 PM, Pavel Tatashin wrote:
>
>> Signed-off-by: Jia He <[email protected]>
>> ---
>> mm/memblock.c | 37 +++++++++++++++++++++++++++++--------
>> 1 file changed, 29 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index ccad225..84f7fa7 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -1140,31 +1140,52 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
>> #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
>>
>> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
>> +static int early_region_idx __init_memblock = -1;
>
> One comment:
>
> This should be __initdata, but even better bring it inside the function
> as local static variable.
Disregard this comment, this global is used in the next commits. So,
everything is OK. No need for __initdata either.
>
>> ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
>> {
>
> Otherwise looks good:
>
> Reviewed-by: Pavel Tatashin <[email protected]>
>
>
On 7/6/18 5:01 AM, Jia He wrote:
> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
> where possible") optimized the loop in memmap_init_zone(). But there is
> still some room for improvement. E.g. in early_pfn_valid(), if pfn and
> pfn+1 are in the same memblock region, we can record the last returned
> memblock region index and check whether pfn++ is still in the same
> region.
>
> Currently it only improve the performance on arm/arm64 and will have no
> impact on other arches.
>
> For the performance improvement, after this set, I can see the time
> overhead of memmap_init() is reduced from 27956us to 13537us in my
> armv8a server(QDF2400 with 96G memory, pagesize 64k).
This series would be a lot simpler if patches 4, 5, and 6 were dropped.
The extra complexity does not make sense to save 0.0001s/T during not.
Patches 1-3, look OK, but without patches 4-5 __init_memblock should be
made local static as I suggested earlier.
So, I think Jia should re-spin this series with only 3 patches. Or,
Andrew could remove the from linux-next before merge.
Thank you,
Pavel
On 8/16/18 9:35 PM, Pasha Tatashin wrote:
>
>
> On 7/6/18 5:01 AM, Jia He wrote:
>> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
>> where possible") optimized the loop in memmap_init_zone(). But there is
>> still some room for improvement. E.g. in early_pfn_valid(), if pfn and
>> pfn+1 are in the same memblock region, we can record the last returned
>> memblock region index and check whether pfn++ is still in the same
>> region.
>>
>> Currently it only improve the performance on arm/arm64 and will have no
>> impact on other arches.
>>
>> For the performance improvement, after this set, I can see the time
>> overhead of memmap_init() is reduced from 27956us to 13537us in my
>> armv8a server(QDF2400 with 96G memory, pagesize 64k).
>
> This series would be a lot simpler if patches 4, 5, and 6 were dropped.
> The extra complexity does not make sense to save 0.0001s/T during not.
s/not/boot
>
> Patches 1-3, look OK, but without patches 4-5 __init_memblock should be
> made local static as I suggested earlier.
s/__init_memblock/early_region_idx
Hi Pasha
Thanks for the comments
On 8/17/2018 9:35 AM, Pasha Tatashin Wrote:
>
>
> On 7/6/18 5:01 AM, Jia He wrote:
>> Commit b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns
>> where possible") optimized the loop in memmap_init_zone(). But there is
>> still some room for improvement. E.g. in early_pfn_valid(), if pfn and
>> pfn+1 are in the same memblock region, we can record the last returned
>> memblock region index and check whether pfn++ is still in the same
>> region.
>>
>> Currently it only improve the performance on arm/arm64 and will have no
>> impact on other arches.
>>
>> For the performance improvement, after this set, I can see the time
>> overhead of memmap_init() is reduced from 27956us to 13537us in my
>> armv8a server(QDF2400 with 96G memory, pagesize 64k).
>
> This series would be a lot simpler if patches 4, 5, and 6 were dropped.
> The extra complexity does not make sense to save 0.0001s/T during not.
>
> Patches 1-3, look OK, but without patches 4-5 __init_memblock should be
> made local static as I suggested earlier.
>
> So, I think Jia should re-spin this series with only 3 patches. Or,
> Andrew could remove the from linux-next before merge.
>
I will respin it with #1-#3 patch if no more comments
Cheers,
Jia
> Thank you,
> Pavel
>
On Fri, Jul 06, 2018 at 05:01:10PM +0800, Jia He wrote:
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 42c090c..26d75f4 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -778,6 +778,10 @@ config ARCH_SELECT_MEMORY_MODEL
> config HAVE_ARCH_PFN_VALID
> def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
>
> +config HAVE_MEMBLOCK_PFN_VALID
> + def_bool y
> + depends on HAVE_ARCH_PFN_VALID
> +
> config HW_PERF_EVENTS
> def_bool y
> depends on ARM_PMU
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 94af022..28fcf54 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -137,6 +137,9 @@ config HAVE_MEMBLOCK_NODE_MAP
> config HAVE_MEMBLOCK_PHYS_MAP
> bool
>
> +config HAVE_MEMBLOCK_PFN_VALID
> + bool
Since you defined HAVE_MEMBLOCK_PFN_VALID here, do we need to define it
in the arch code as well? If kept it in the mm/Kconfig only, you could
just select it in the arch HAVE_ARCH_PFN_VALID entry:
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d0a53cc6293a..cd230c77e122 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -787,6 +787,7 @@ config ARCH_FLATMEM_ENABLE
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
+ select HAVE_MEMBLOCK_PFN_VALID
config HW_PERF_EVENTS
def_bool y
(similarly for arch/arm)
--
Catalin
On 8/17/2018 10:50 PM, Catalin Marinas Wrote:
> On Fri, Jul 06, 2018 at 05:01:10PM +0800, Jia He wrote:
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 42c090c..26d75f4 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -778,6 +778,10 @@ config ARCH_SELECT_MEMORY_MODEL
>> config HAVE_ARCH_PFN_VALID
>> def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
>>
>> +config HAVE_MEMBLOCK_PFN_VALID
>> + def_bool y
>> + depends on HAVE_ARCH_PFN_VALID
>> +
>> config HW_PERF_EVENTS
>> def_bool y
>> depends on ARM_PMU
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index 94af022..28fcf54 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -137,6 +137,9 @@ config HAVE_MEMBLOCK_NODE_MAP
>> config HAVE_MEMBLOCK_PHYS_MAP
>> bool
>>
>> +config HAVE_MEMBLOCK_PFN_VALID
>> + bool
>
> Since you defined HAVE_MEMBLOCK_PFN_VALID here, do we need to define it
> in the arch code as well? If kept it in the mm/Kconfig only, you could
> just select it in the arch HAVE_ARCH_PFN_VALID entry:
>
Ok, thanks for the comments
It makes it more clean.
--
Cheers,
Jia
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index d0a53cc6293a..cd230c77e122 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -787,6 +787,7 @@ config ARCH_FLATMEM_ENABLE
>
> config HAVE_ARCH_PFN_VALID
> def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM
> + select HAVE_MEMBLOCK_PFN_VALID
>
> config HW_PERF_EVENTS
> def_bool y
>
> (similarly for arch/arm)
>
Hi Pasha
On 8/17/2018 9:08 AM, Pasha Tatashin Wrote:
>
>> Signed-off-by: Jia He <[email protected]>
>> ---
>> mm/memblock.c | 37 +++++++++++++++++++++++++++++--------
>> 1 file changed, 29 insertions(+), 8 deletions(-)
>>
>> diff --git a/mm/memblock.c b/mm/memblock.c
>> index ccad225..84f7fa7 100644
>> --- a/mm/memblock.c
>> +++ b/mm/memblock.c
>> @@ -1140,31 +1140,52 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
>> #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
>>
>> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
>> +static int early_region_idx __init_memblock = -1;
>
> One comment:
>
> This should be __initdata, but even better bring it inside the function
> as local static variable.
>
Seems it should be __initdata_memblock instead of __initdata?
--
Cheers,
Jia
>> ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
>> {
>
> Otherwise looks good:
>
> Reviewed-by: Pavel Tatashin <[email protected]>
>
On Tue, 21 Aug 2018 14:14:30 +0800 Jia He <[email protected]> wrote:
> Hi Pasha
>
> On 8/17/2018 9:08 AM, Pasha Tatashin Wrote:
> >
> >> Signed-off-by: Jia He <[email protected]>
> >> ---
> >> mm/memblock.c | 37 +++++++++++++++++++++++++++++--------
> >> 1 file changed, 29 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/mm/memblock.c b/mm/memblock.c
> >> index ccad225..84f7fa7 100644
> >> --- a/mm/memblock.c
> >> +++ b/mm/memblock.c
> >> @@ -1140,31 +1140,52 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
> >> #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
> >>
> >> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
> >> +static int early_region_idx __init_memblock = -1;
> >
> > One comment:
> >
> > This should be __initdata, but even better bring it inside the function
> > as local static variable.
> >
> Seems it should be __initdata_memblock instead of __initdata?
>
Eh, it's 4 bytes.
It should however be local to the sole function which uses it.
And what's this "ulong" thing? mm/ uses unsigned long.
--- a/mm/memblock.c~mm-page_alloc-reduce-unnecessary-binary-search-in-memblock_next_valid_pfn-fix
+++ a/mm/memblock.c
@@ -1232,15 +1232,15 @@ int __init_memblock memblock_set_node(ph
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
-static int early_region_idx __init_memblock = -1;
-ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
+unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn)
{
struct memblock_type *type = &memblock.memory;
struct memblock_region *regions = type->regions;
uint right = type->cnt;
uint mid, left = 0;
- ulong start_pfn, end_pfn, next_start_pfn;
+ unsigned long start_pfn, end_pfn, next_start_pfn;
phys_addr_t addr = PFN_PHYS(++pfn);
+ static int early_region_idx __initdata_memblock = -1;
/* fast path, return pfn+1 if next pfn is in the same region */
if (early_region_idx != -1) {
--- a/include/linux/mmzone.h~mm-page_alloc-reduce-unnecessary-binary-search-in-memblock_next_valid_pfn-fix
+++ a/include/linux/mmzone.h
@@ -1269,7 +1269,7 @@ static inline int pfn_present(unsigned l
#define early_pfn_valid(pfn) pfn_valid(pfn)
#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
-extern ulong memblock_next_valid_pfn(ulong pfn);
+extern unsigned long memblock_next_valid_pfn(unsigned long pfn);
#define next_valid_pfn(pfn) memblock_next_valid_pfn(pfn)
#endif
void sparse_init(void);
_
Hi Andrew
On 8/22/2018 5:08 AM, Andrew Morton Wrote:
> On Tue, 21 Aug 2018 14:14:30 +0800 Jia He <[email protected]> wrote:
>
>> Hi Pasha
>>
>> On 8/17/2018 9:08 AM, Pasha Tatashin Wrote:
>>>
>>>> Signed-off-by: Jia He <[email protected]>
>>>> ---
>>>> mm/memblock.c | 37 +++++++++++++++++++++++++++++--------
>>>> 1 file changed, 29 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/mm/memblock.c b/mm/memblock.c
>>>> index ccad225..84f7fa7 100644
>>>> --- a/mm/memblock.c
>>>> +++ b/mm/memblock.c
>>>> @@ -1140,31 +1140,52 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
>>>> #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
>>>>
>>>> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
>>>> +static int early_region_idx __init_memblock = -1;
>>>
>>> One comment:
>>>
>>> This should be __initdata, but even better bring it inside the function
>>> as local static variable.
>>>
>> Seems it should be __initdata_memblock instead of __initdata?
>>
>
> Eh, it's 4 bytes.
>
> It should however be local to the sole function which uses it.
Sorry, I am not clear for this comment^
early_region_idx records the *last* valid region idx in last
memblock_next_valid_pfn. So it should be static instead of local variable?
>
> And what's this "ulong" thing? mm/ uses unsigned long.
ok, will change it
--
Cheers,
Jia
>
> --- a/mm/memblock.c~mm-page_alloc-reduce-unnecessary-binary-search-in-memblock_next_valid_pfn-fix
> +++ a/mm/memblock.c
> @@ -1232,15 +1232,15 @@ int __init_memblock memblock_set_node(ph
> #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
>
> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
> -static int early_region_idx __init_memblock = -1;
> -ulong __init_memblock memblock_next_valid_pfn(ulong pfn)
> +unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn)
> {
> struct memblock_type *type = &memblock.memory;
> struct memblock_region *regions = type->regions;
> uint right = type->cnt;
> uint mid, left = 0;
> - ulong start_pfn, end_pfn, next_start_pfn;
> + unsigned long start_pfn, end_pfn, next_start_pfn;
> phys_addr_t addr = PFN_PHYS(++pfn);
> + static int early_region_idx __initdata_memblock = -1;
>
> /* fast path, return pfn+1 if next pfn is in the same region */
> if (early_region_idx != -1) {
> --- a/include/linux/mmzone.h~mm-page_alloc-reduce-unnecessary-binary-search-in-memblock_next_valid_pfn-fix
> +++ a/include/linux/mmzone.h
> @@ -1269,7 +1269,7 @@ static inline int pfn_present(unsigned l
>
> #define early_pfn_valid(pfn) pfn_valid(pfn)
> #ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID
> -extern ulong memblock_next_valid_pfn(ulong pfn);
> +extern unsigned long memblock_next_valid_pfn(unsigned long pfn);
> #define next_valid_pfn(pfn) memblock_next_valid_pfn(pfn)
> #endif
> void sparse_init(void);
> _
>
>