VMware reported the performance regression during memmap_init() invocation.
And they bisected to commit 73a6e474cb376 ("mm: memmap_init: iterate over
memblock regions rather that check each PFN") causing it.
After investigation, it's caused by incorrect memmap init defer handling
in memmap_init_zone() after commit 73a6e474cb376. The current
memmap_init_zone() only handle one memory region of one zone, while
memmap_init() iterates over all its memory regions and pass them one by
one into memmap_init_zone() to handle.
So in this patchset, patch 1/2 fixes the bug observed by VMware. Patch
2/2 clean up the inappropriate name of memmap_init(), memmap_init_zone()
accordingly.
VMware helped do the testing on their VMware ESI platform. This patchset
is based on 5.10.0-rc7+, master branch of Linus's tree.
Baoquan He (2):
mm: memmap defer init dosn't work as expected
mm: rename memmap_init() and memmap_init_zone()
arch/ia64/mm/init.c | 8 ++++----
include/linux/mm.h | 5 +++--
mm/memory_hotplug.c | 2 +-
mm/page_alloc.c | 22 ++++++++++++----------
4 files changed, 20 insertions(+), 17 deletions(-)
--
2.17.2
The current memmap_init_zone() only handles memory region inside one zone.
Actually memmap_init() does the memmap init of one zone. So rename both of
them accordingly.
And also rename the function parameter 'range_start_pfn' and local variable
'range_end_pfn' to zone_start_pfn/zone_end_pfn.
Signed-off-by: Baoquan He <[email protected]>
---
arch/ia64/mm/init.c | 6 +++---
include/linux/mm.h | 2 +-
mm/memory_hotplug.c | 2 +-
mm/page_alloc.c | 16 ++++++++--------
4 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 27ca549ff47e..af678197ac2d 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -535,18 +535,18 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
/ sizeof(struct page));
if (map_start < map_end)
- memmap_init_zone((unsigned long)(map_end - map_start),
+ memmap_init_range((unsigned long)(map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
return 0;
}
void __meminit
-memmap_init (unsigned long size, int nid, unsigned long zone,
+memmap_init_zone (unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn)
{
if (!vmem_map) {
- memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
+ memmap_init_range(size, nid, zone, start_pfn, start_pfn + size,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
} else {
struct page *start;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cd5c313729ea..3d81ebbbef89 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2439,7 +2439,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long,
+extern void memmap_init_range(unsigned long, int, unsigned long,
unsigned long, unsigned long, enum meminit_context,
struct vmem_altmap *, int migratetype);
extern void setup_per_zone_wmarks(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 47b75da63f01..579762e4f8d8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -714,7 +714,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
* expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe
*/
- memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
+ memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
MEMINIT_HOTPLUG, altmap, migratetype);
set_zone_contiguous(zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 315c22974f0d..fac599deba56 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6050,7 +6050,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
* (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
* zone stats (e.g., nr_isolate_pageblock) are touched.
*/
-void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, unsigned long zone_end_pfn,
enum meminit_context context,
struct vmem_altmap *altmap, int migratetype)
@@ -6187,21 +6187,21 @@ static void __meminit zone_init_free_lists(struct zone *zone)
}
}
-void __meminit __weak memmap_init(unsigned long size, int nid,
+void __meminit __weak memmap_init_zone(unsigned long size, int nid,
unsigned long zone,
- unsigned long range_start_pfn)
+ unsigned long zone_start_pfn)
{
unsigned long start_pfn, end_pfn;
- unsigned long range_end_pfn = range_start_pfn + size;
+ unsigned long zone_end_pfn = zone_start_pfn + size;
int i;
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
- start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
- end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
+ start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
+ end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
if (end_pfn > start_pfn) {
size = end_pfn - start_pfn;
- memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
+ memmap_init_range(size, nid, zone, start_pfn, zone_end_pfn,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
}
}
@@ -6903,7 +6903,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
set_pageblock_order();
setup_usemap(pgdat, zone, zone_start_pfn, size);
init_currently_empty_zone(zone, zone_start_pfn, size);
- memmap_init(size, nid, j, zone_start_pfn);
+ memmap_init_zone(size, nid, j, zone_start_pfn);
}
}
--
2.17.2
On Mon, Dec 14, 2020 at 11:00:07AM +0100, David Hildenbrand wrote:
> On 13.12.20 16:09, Baoquan He wrote:
> > The current memmap_init_zone() only handles memory region inside one zone.
> > Actually memmap_init() does the memmap init of one zone. So rename both of
> > them accordingly.
> >
> > And also rename the function parameter 'range_start_pfn' and local variable
> > 'range_end_pfn' to zone_start_pfn/zone_end_pfn.
> >
> > Signed-off-by: Baoquan He <[email protected]>
> > ---
> > arch/ia64/mm/init.c | 6 +++---
> > include/linux/mm.h | 2 +-
> > mm/memory_hotplug.c | 2 +-
> > mm/page_alloc.c | 16 ++++++++--------
> > 4 files changed, 13 insertions(+), 13 deletions(-)
> >
> > diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> > index 27ca549ff47e..af678197ac2d 100644
> > --- a/arch/ia64/mm/init.c
> > +++ b/arch/ia64/mm/init.c
> > @@ -535,18 +535,18 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
> > / sizeof(struct page));
> >
> > if (map_start < map_end)
> > - memmap_init_zone((unsigned long)(map_end - map_start),
> > + memmap_init_range((unsigned long)(map_end - map_start),
> > args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
> > MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> > return 0;
> > }
> >
> > void __meminit
> > -memmap_init (unsigned long size, int nid, unsigned long zone,
> > +memmap_init_zone (unsigned long size, int nid, unsigned long zone,
> > unsigned long start_pfn)
>
> While at it s/zone /zone/ please. :)
>
> > {
> > if (!vmem_map) {
> > - memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
> > + memmap_init_range(size, nid, zone, start_pfn, start_pfn + size,
> > MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> > } else {
> > struct page *start;
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index cd5c313729ea..3d81ebbbef89 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -2439,7 +2439,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
> > #endif
> >
> > extern void set_dma_reserve(unsigned long new_dma_reserve);
> > -extern void memmap_init_zone(unsigned long, int, unsigned long,
> > +extern void memmap_init_range(unsigned long, int, unsigned long,
> > unsigned long, unsigned long, enum meminit_context,
> > struct vmem_altmap *, int migratetype);
> > extern void setup_per_zone_wmarks(void);
> > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > index 47b75da63f01..579762e4f8d8 100644
> > --- a/mm/memory_hotplug.c
> > +++ b/mm/memory_hotplug.c
> > @@ -714,7 +714,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
> > * expects the zone spans the pfn range. All the pages in the range
> > * are reserved so nobody should be touching them so we should be safe
> > */
> > - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
> > + memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
> > MEMINIT_HOTPLUG, altmap, migratetype);
> >
> > set_zone_contiguous(zone);
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 315c22974f0d..fac599deba56 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -6050,7 +6050,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> > * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
> > * zone stats (e.g., nr_isolate_pageblock) are touched.
> > */
> > -void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
> > +void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
> > unsigned long start_pfn, unsigned long zone_end_pfn,
> > enum meminit_context context,
> > struct vmem_altmap *altmap, int migratetype)
> > @@ -6187,21 +6187,21 @@ static void __meminit zone_init_free_lists(struct zone *zone)
> > }
> > }
> >
> > -void __meminit __weak memmap_init(unsigned long size, int nid,
> > +void __meminit __weak memmap_init_zone(unsigned long size, int nid,
> > unsigned long zone,
> > - unsigned long range_start_pfn)
> > + unsigned long zone_start_pfn)
>
> Why are we not simply passing "struct zone" like
>
> void __meminit __weak memmap_init_zone(struct zone *zone)
>
> from which we can derive
> - nid
> - zone idx
> - zone_start_pfn
> - spanned_pages / zone_end_pfn
>
> At least when called from free_area_init_core() this should work just
> fine I think.
There is also a custom memmap init in ia64 which at least should be
tested ;-)
More broadly, while Baoquan's fix looks Ok to me, I think we can
calculate node->first_deferred_pfn earlier in, say,
free_area_init_node() rather than do defer_init() check for each pfn.
> > {
> > unsigned long start_pfn, end_pfn;
> > - unsigned long range_end_pfn = range_start_pfn + size;
> > + unsigned long zone_end_pfn = zone_start_pfn + size;
> > int i;
> >
> > for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
> > - start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
> > - end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
> > + start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
> > + end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
> >
> > if (end_pfn > start_pfn) {
> > size = end_pfn - start_pfn;
> > - memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
> > + memmap_init_range(size, nid, zone, start_pfn, zone_end_pfn,
> > MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> > }
> > }
> > @@ -6903,7 +6903,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
> > set_pageblock_order();
> > setup_usemap(pgdat, zone, zone_start_pfn, size);
> > init_currently_empty_zone(zone, zone_start_pfn, size);
> > - memmap_init(size, nid, j, zone_start_pfn);
> > + memmap_init_zone(size, nid, j, zone_start_pfn);
> > }
> > }
> >
> >
>
>
> --
> Thanks,
>
> David / dhildenb
>
--
Sincerely yours,
Mike.
VMware observed a performance regression during memmap init on their platform,
and bisected to commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock
regions rather that check each PFN") to cause it.
Before the commit:
[0.033176] Normal zone: 1445888 pages used for memmap
[0.033176] Normal zone: 89391104 pages, LIFO batch:63
[0.035851] ACPI: PM-Timer IO Port: 0x448
With commit
[0.026874] Normal zone: 1445888 pages used for memmap
[0.026875] Normal zone: 89391104 pages, LIFO batch:63
[2.028450] ACPI: PM-Timer IO Port: 0x448
The root cause is the current memmap defer init doesn't work as expected.
Before, memmap_init_zone() was used to do memmap init of one whole zone, to
initialize all low zones of one numa node, but defer memmap init of the
last zone in that numa node. However, since commit 73a6e474cb376, function
memmap_init() is adapted to iterater over memblock regions inside one zone,
then call memmap_init_zone() to do memmap init for each region.
E.g, on VMware's system, the memory layout is as below, there are two memory
regions in node 2. The current code will mistakenly initialize the whole 1st
region [mem 0xab00000000-0xfcffffffff], then do memmap defer to iniatialize
only one memmory section on the 2nd region [mem 0x10000000000-0x1033fffffff].
In fact, we only expect to see that there's only one memory section's memmap
initialized. That's why more time is costed at this time.
[ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x0009ffff]
[ 0.008842] ACPI: SRAT: Node 0 PXM 0 [mem 0x00100000-0xbfffffff]
[ 0.008843] ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x55ffffffff]
[ 0.008844] ACPI: SRAT: Node 1 PXM 1 [mem 0x5600000000-0xaaffffffff]
[ 0.008844] ACPI: SRAT: Node 2 PXM 2 [mem 0xab00000000-0xfcffffffff]
[ 0.008845] ACPI: SRAT: Node 2 PXM 2 [mem 0x10000000000-0x1033fffffff]
Now, let's add a parameter 'zone_end_pfn' to memmap_init_zone() to pass
down the read zone end pfn so that defer_init() can use it to judge whether
defer need be taken in zone wide.
Fixes: commit 73a6e474cb376 ("mm: memmap_init: iterate over memblock regions rather that check each PFN")
Signed-off-by: Baoquan He <[email protected]>
Cc: [email protected]
---
arch/ia64/mm/init.c | 4 ++--
include/linux/mm.h | 5 +++--
mm/memory_hotplug.c | 2 +-
mm/page_alloc.c | 8 +++++---
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ef12e097f318..27ca549ff47e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
if (map_start < map_end)
memmap_init_zone((unsigned long)(map_end - map_start),
- args->nid, args->zone, page_to_pfn(map_start),
+ args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
return 0;
}
@@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn)
{
if (!vmem_map) {
- memmap_init_zone(size, nid, zone, start_pfn,
+ memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
} else {
struct page *start;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index db6ae4d3fb4e..cd5c313729ea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2439,8 +2439,9 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
- enum meminit_context, struct vmem_altmap *, int migratetype);
+extern void memmap_init_zone(unsigned long, int, unsigned long,
+ unsigned long, unsigned long, enum meminit_context,
+ struct vmem_altmap *, int migratetype);
extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 63b2e46b6555..47b75da63f01 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -714,7 +714,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
* expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe
*/
- memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
+ memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
MEMINIT_HOTPLUG, altmap, migratetype);
set_zone_contiguous(zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eaa227a479e4..315c22974f0d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -448,6 +448,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
return false;
+ if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
+ return true;
/*
* We start only with one section of pages, more pages are added as
* needed until the rest of deferred pages are initialized.
@@ -6049,7 +6051,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
* zone stats (e.g., nr_isolate_pageblock) are touched.
*/
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn,
+ unsigned long start_pfn, unsigned long zone_end_pfn,
enum meminit_context context,
struct vmem_altmap *altmap, int migratetype)
{
@@ -6085,7 +6087,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
if (context == MEMINIT_EARLY) {
if (overlap_memmap_init(zone, &pfn))
continue;
- if (defer_init(nid, pfn, end_pfn))
+ if (defer_init(nid, pfn, zone_end_pfn))
break;
}
@@ -6199,7 +6201,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid,
if (end_pfn > start_pfn) {
size = end_pfn - start_pfn;
- memmap_init_zone(size, nid, zone, start_pfn,
+ memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
}
}
--
2.17.2
On 13.12.20 16:09, Baoquan He wrote:
> The current memmap_init_zone() only handles memory region inside one zone.
> Actually memmap_init() does the memmap init of one zone. So rename both of
> them accordingly.
>
> And also rename the function parameter 'range_start_pfn' and local variable
> 'range_end_pfn' to zone_start_pfn/zone_end_pfn.
>
> Signed-off-by: Baoquan He <[email protected]>
> ---
> arch/ia64/mm/init.c | 6 +++---
> include/linux/mm.h | 2 +-
> mm/memory_hotplug.c | 2 +-
> mm/page_alloc.c | 16 ++++++++--------
> 4 files changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> index 27ca549ff47e..af678197ac2d 100644
> --- a/arch/ia64/mm/init.c
> +++ b/arch/ia64/mm/init.c
> @@ -535,18 +535,18 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
> / sizeof(struct page));
>
> if (map_start < map_end)
> - memmap_init_zone((unsigned long)(map_end - map_start),
> + memmap_init_range((unsigned long)(map_end - map_start),
> args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
> MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> return 0;
> }
>
> void __meminit
> -memmap_init (unsigned long size, int nid, unsigned long zone,
> +memmap_init_zone (unsigned long size, int nid, unsigned long zone,
> unsigned long start_pfn)
While at it s/zone /zone/ please. :)
> {
> if (!vmem_map) {
> - memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
> + memmap_init_range(size, nid, zone, start_pfn, start_pfn + size,
> MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> } else {
> struct page *start;
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index cd5c313729ea..3d81ebbbef89 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2439,7 +2439,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
> #endif
>
> extern void set_dma_reserve(unsigned long new_dma_reserve);
> -extern void memmap_init_zone(unsigned long, int, unsigned long,
> +extern void memmap_init_range(unsigned long, int, unsigned long,
> unsigned long, unsigned long, enum meminit_context,
> struct vmem_altmap *, int migratetype);
> extern void setup_per_zone_wmarks(void);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 47b75da63f01..579762e4f8d8 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -714,7 +714,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
> * expects the zone spans the pfn range. All the pages in the range
> * are reserved so nobody should be touching them so we should be safe
> */
> - memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
> + memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
> MEMINIT_HOTPLUG, altmap, migratetype);
>
> set_zone_contiguous(zone);
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 315c22974f0d..fac599deba56 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6050,7 +6050,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
> * zone stats (e.g., nr_isolate_pageblock) are touched.
> */
> -void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
> +void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
> unsigned long start_pfn, unsigned long zone_end_pfn,
> enum meminit_context context,
> struct vmem_altmap *altmap, int migratetype)
> @@ -6187,21 +6187,21 @@ static void __meminit zone_init_free_lists(struct zone *zone)
> }
> }
>
> -void __meminit __weak memmap_init(unsigned long size, int nid,
> +void __meminit __weak memmap_init_zone(unsigned long size, int nid,
> unsigned long zone,
> - unsigned long range_start_pfn)
> + unsigned long zone_start_pfn)
Why are we not simply passing "struct zone" like
void __meminit __weak memmap_init_zone(struct zone *zone)
from which we can derive
- nid
- zone idx
- zone_start_pfn
- spanned_pages / zone_end_pfn
At least when called from free_area_init_core() this should work just
fine I think.
> {
> unsigned long start_pfn, end_pfn;
> - unsigned long range_end_pfn = range_start_pfn + size;
> + unsigned long zone_end_pfn = zone_start_pfn + size;
> int i;
>
> for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
> - start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
> - end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
> + start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
> + end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
>
> if (end_pfn > start_pfn) {
> size = end_pfn - start_pfn;
> - memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
> + memmap_init_range(size, nid, zone, start_pfn, zone_end_pfn,
> MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> }
> }
> @@ -6903,7 +6903,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
> set_pageblock_order();
> setup_usemap(pgdat, zone, zone_start_pfn, size);
> init_currently_empty_zone(zone, zone_start_pfn, size);
> - memmap_init(size, nid, j, zone_start_pfn);
> + memmap_init_zone(size, nid, j, zone_start_pfn);
> }
> }
>
>
--
Thanks,
David / dhildenb
On 12/14/20 at 11:00am, David Hildenbrand wrote:
> On 13.12.20 16:09, Baoquan He wrote:
> > The current memmap_init_zone() only handles memory region inside one zone.
> > Actually memmap_init() does the memmap init of one zone. So rename both of
> > them accordingly.
> >
> > And also rename the function parameter 'range_start_pfn' and local variable
> > 'range_end_pfn' to zone_start_pfn/zone_end_pfn.
> >
> > Signed-off-by: Baoquan He <[email protected]>
> > ---
> > arch/ia64/mm/init.c | 6 +++---
> > include/linux/mm.h | 2 +-
> > mm/memory_hotplug.c | 2 +-
> > mm/page_alloc.c | 16 ++++++++--------
> > 4 files changed, 13 insertions(+), 13 deletions(-)
> >
> > diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> > index 27ca549ff47e..af678197ac2d 100644
> > --- a/arch/ia64/mm/init.c
> > +++ b/arch/ia64/mm/init.c
> > @@ -535,18 +535,18 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
> > / sizeof(struct page));
> >
> > if (map_start < map_end)
> > - memmap_init_zone((unsigned long)(map_end - map_start),
> > + memmap_init_range((unsigned long)(map_end - map_start),
> > args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
> > MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> > return 0;
> > }
> >
> > void __meminit
> > -memmap_init (unsigned long size, int nid, unsigned long zone,
> > +memmap_init_zone (unsigned long size, int nid, unsigned long zone,
> > unsigned long start_pfn)
>
> While at it s/zone /zone/ please. :)
Yeah, when I git grep 'memmap_init(', I didn't searched the one in ia64,
didn't adjust it since I saw so many functions got a space between
name and parenthesis in arch/ia64/mm/. I will clean up this one anyway.
>
> > {
> > if (!vmem_map) {
> > - memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
> > + memmap_init_range(size, nid, zone, start_pfn, start_pfn + size,
> > MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> > } else {
> > struct page *start;
...
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 315c22974f0d..fac599deba56 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -6050,7 +6050,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> > * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
> > * zone stats (e.g., nr_isolate_pageblock) are touched.
> > */
> > -void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
> > +void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
> > unsigned long start_pfn, unsigned long zone_end_pfn,
> > enum meminit_context context,
> > struct vmem_altmap *altmap, int migratetype)
> > @@ -6187,21 +6187,21 @@ static void __meminit zone_init_free_lists(struct zone *zone)
> > }
> > }
> >
> > -void __meminit __weak memmap_init(unsigned long size, int nid,
> > +void __meminit __weak memmap_init_zone(unsigned long size, int nid,
> > unsigned long zone,
> > - unsigned long range_start_pfn)
> > + unsigned long zone_start_pfn)
>
> Why are we not simply passing "struct zone" like
>
> void __meminit __weak memmap_init_zone(struct zone *zone)
>
> from which we can derive
> - nid
> - zone idx
> - zone_start_pfn
> - spanned_pages / zone_end_pfn
>
> At least when called from free_area_init_core() this should work just
> fine I think.
Yes, passing 'struct zone *zone' looks much better, I will append a patch to
do this. Thanks.
>
>
>
> > {
> > unsigned long start_pfn, end_pfn;
> > - unsigned long range_end_pfn = range_start_pfn + size;
> > + unsigned long zone_end_pfn = zone_start_pfn + size;
> > int i;
> >
> > for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
> > - start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
> > - end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
> > + start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
> > + end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
> >
> > if (end_pfn > start_pfn) {
> > size = end_pfn - start_pfn;
> > - memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
> > + memmap_init_range(size, nid, zone, start_pfn, zone_end_pfn,
> > MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> > }
> > }
> > @@ -6903,7 +6903,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
> > set_pageblock_order();
> > setup_usemap(pgdat, zone, zone_start_pfn, size);
> > init_currently_empty_zone(zone, zone_start_pfn, size);
> > - memmap_init(size, nid, j, zone_start_pfn);
> > + memmap_init_zone(size, nid, j, zone_start_pfn);
> > }
> > }
> >
> >
>
>
> --
> Thanks,
>
> David / dhildenb
>
>
On 12/14/20 at 01:04pm, Mike Rapoport wrote:
> On Mon, Dec 14, 2020 at 11:00:07AM +0100, David Hildenbrand wrote:
> > On 13.12.20 16:09, Baoquan He wrote:
> > > The current memmap_init_zone() only handles memory region inside one zone.
> > > Actually memmap_init() does the memmap init of one zone. So rename both of
> > > them accordingly.
> > >
> > > And also rename the function parameter 'range_start_pfn' and local variable
> > > 'range_end_pfn' to zone_start_pfn/zone_end_pfn.
> > >
> > > Signed-off-by: Baoquan He <[email protected]>
> > > ---
......
> > > set_zone_contiguous(zone);
> > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > > index 315c22974f0d..fac599deba56 100644
> > > --- a/mm/page_alloc.c
> > > +++ b/mm/page_alloc.c
> > > @@ -6050,7 +6050,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
> > > * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
> > > * zone stats (e.g., nr_isolate_pageblock) are touched.
> > > */
> > > -void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
> > > +void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
> > > unsigned long start_pfn, unsigned long zone_end_pfn,
> > > enum meminit_context context,
> > > struct vmem_altmap *altmap, int migratetype)
> > > @@ -6187,21 +6187,21 @@ static void __meminit zone_init_free_lists(struct zone *zone)
> > > }
> > > }
> > >
> > > -void __meminit __weak memmap_init(unsigned long size, int nid,
> > > +void __meminit __weak memmap_init_zone(unsigned long size, int nid,
> > > unsigned long zone,
> > > - unsigned long range_start_pfn)
> > > + unsigned long zone_start_pfn)
> >
> > Why are we not simply passing "struct zone" like
> >
> > void __meminit __weak memmap_init_zone(struct zone *zone)
> >
> > from which we can derive
> > - nid
> > - zone idx
> > - zone_start_pfn
> > - spanned_pages / zone_end_pfn
> >
> > At least when called from free_area_init_core() this should work just
> > fine I think.
>
> There is also a custom memmap init in ia64 which at least should be
> tested ;-)
Right. Tried in arch/ia64/mm/init.c, the change is as below. Looks
simple, compiling passed on ia64 should be OK.
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index af678197ac2d..4fa49a762d58 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -541,12 +541,14 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
return 0;
}
-void __meminit
-memmap_init_zone (unsigned long size, int nid, unsigned long zone,
- unsigned long start_pfn)
+void __meminit memmap_init_zone (struct zone *zone)
{
+ unsigned long size = zone->spanned_size;
+ int nid = zone_to_nid(zone), zone_id = zone_idx(zone);
+ unsigned long start_pfn = zone->zone_start_pfn;
+
if (!vmem_map) {
- memmap_init_range(size, nid, zone, start_pfn, start_pfn + size,
+ memmap_init_range(size, nid, zone_id, start_pfn, start_pfn + size,
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
} else {
struct page *start;
@@ -556,7 +558,7 @@ memmap_init_zone (unsigned long size, int nid, unsigned long zone,
args.start = start;
args.end = start + size;
args.nid = nid;
- args.zone = zone;
+ args.zone = zone_id;
efi_memmap_walk(virtual_memmap_init, &args);
}
>
> More broadly, while Baoquan's fix looks Ok to me, I think we can
> calculate node->first_deferred_pfn earlier in, say,
> free_area_init_node() rather than do defer_init() check for each pfn.
Remember I ever tried to move the defer init up one level into memmap_init()
when making draft patch in the first place. I finally ended up with this
because there's overlap_memmap_init().
>
> > > {
> > > unsigned long start_pfn, end_pfn;
> > > - unsigned long range_end_pfn = range_start_pfn + size;
> > > + unsigned long zone_end_pfn = zone_start_pfn + size;
> > > int i;
> > >
> > > for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
> > > - start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
> > > - end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
> > > + start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
> > > + end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
> > >
> > > if (end_pfn > start_pfn) {
> > > size = end_pfn - start_pfn;
> > > - memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
> > > + memmap_init_range(size, nid, zone, start_pfn, zone_end_pfn,
> > > MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> > > }
> > > }
> > > @@ -6903,7 +6903,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
> > > set_pageblock_order();
> > > setup_usemap(pgdat, zone, zone_start_pfn, size);
> > > init_currently_empty_zone(zone, zone_start_pfn, size);
> > > - memmap_init(size, nid, j, zone_start_pfn);
> > > + memmap_init_zone(size, nid, j, zone_start_pfn);
> > > }
> > > }
> > >
> > >
> >
> >
> > --
> > Thanks,
> >
> > David / dhildenb
> >
>
> --
> Sincerely yours,
> Mike.
>