2018-07-17 10:58:38

by Oscar Salvador

[permalink] [raw]
Subject: [RFC PATCH 0/3] Cleanup for free_area_init_node / free_area_init_core

From: Oscar Salvador <[email protected]>

While trying to cleanup the memhotplug code, I found quite difficult to follow
free_area_init_node / free_area_init_core wrt which functions get called
from the memhotplug path.

This is en effort to try to refactor / cleanup those two functions a little bit,
to make them easier to read.

It compiles, but I did not test it.
I would like to get some feedback to see if it is worth or not.

Signed-off-by: Oscar Salvador <[email protected]>

Oscar Salvador (3):
mm: Make free_area_init_core more readable by moving the ifdefs
mm: Refactor free_area_init_core
mm: Make free_area_init_node call certain functions only when booting

mm/page_alloc.c | 193 ++++++++++++++++++++++++++++++++++----------------------
1 file changed, 116 insertions(+), 77 deletions(-)

--
2.13.6



2018-07-17 10:57:54

by Oscar Salvador

[permalink] [raw]
Subject: [RFC PATCH 3/3] mm: Make free_area_init_node call certain functions only when booting

From: Oscar Salvador <[email protected]>

If free_area_init_node got called from memhotplug code, we do not need
to call calculate_node_totalpages(), as the node has no pages.

We do not need to set the range for the deferred initialization either,
as memmap_init_zone skips that when the context is MEMMAP_HOTPLUG.

Signed-off-by: Oscar Salvador <[email protected]>
---
mm/page_alloc.c | 37 ++++++++++++++++++++++---------------
1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3bf939393ca1..d2562751dbfd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6299,8 +6299,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
spin_lock_init(&pgdat->lru_lock);
lruvec_init(node_lruvec(pgdat));

- pgdat->per_cpu_nodestats = &boot_nodestats;
-
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;

@@ -6386,6 +6384,21 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
#endif /* CONFIG_FLAT_NODE_MEM_MAP */

+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static void pgdat_set_deferred_range(pg_data_t *pgdat)
+{
+ /*
+ * We start only with one section of pages, more pages are added as
+ * needed until the rest of deferred pages are initialized.
+ */
+ pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
+ pgdat->node_spanned_pages);
+ pgdat->first_deferred_pfn = ULONG_MAX;
+}
+#else
+static void pgdat_set_deferred_range(pg_data_t *pgdat) {}
+#endif
+
void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
unsigned long node_start_pfn, unsigned long *zholes_size)
{
@@ -6407,20 +6420,14 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
#else
start_pfn = node_start_pfn;
#endif
- calculate_node_totalpages(pgdat, start_pfn, end_pfn,
- zones_size, zholes_size);
-
- alloc_node_mem_map(pgdat);
+ if (system_state == SYSTEM_BOOTING) {
+ calculate_node_totalpages(pgdat, start_pfn, end_pfn,
+ zones_size, zholes_size);
+ alloc_node_mem_map(pgdat);
+ pgdat_set_deferred_range(pgdat);
+ pgdat->per_cpu_nodestats = &boot_nodestats;
+ }

-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
- /*
- * We start only with one section of pages, more pages are added as
- * needed until the rest of deferred pages are initialized.
- */
- pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
- pgdat->node_spanned_pages);
- pgdat->first_deferred_pfn = ULONG_MAX;
-#endif
free_area_init_core(pgdat);
}

--
2.13.6


2018-07-17 10:58:57

by Oscar Salvador

[permalink] [raw]
Subject: [RFC PATCH 1/3] mm: Make free_area_init_core more readable by moving the ifdefs

From: Oscar Salvador <[email protected]>

Moving the #ifdefery out of the function makes it easier to follow.

Signed-off-by: Oscar Salvador <[email protected]>
---
mm/page_alloc.c | 50 +++++++++++++++++++++++++++++++++++++-------------
1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e357189cd24a..8a73305f7c55 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6206,6 +6206,37 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
}

+#ifdef CONFIG_NUMA_BALANCING
+static void pgdat_init_numabalancing(struct pglist_data *pgdat)
+{
+ spin_lock_init(&pgdat->numabalancing_migrate_lock);
+ pgdat->numabalancing_migrate_nr_pages = 0;
+ pgdat->numabalancing_migrate_next_window = jiffies;
+}
+#else
+static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pgdat_init_split_queue(struct pglist_data *pgdat)
+{
+ spin_lock_init(&pgdat->split_queue_lock);
+ INIT_LIST_HEAD(&pgdat->split_queue);
+ pgdat->split_queue_len = 0;
+}
+#else
+static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
+#endif
+
+#ifdef CONFIG_COMPACTION
+static void pgdat_init_kcompactd(struct pglist_data *pgdat)
+{
+ init_waitqueue_head(&pgdat->kcompactd_wait);
+}
+#else
+static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
+#endif
+
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -6220,21 +6251,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
int nid = pgdat->node_id;

pgdat_resize_init(pgdat);
-#ifdef CONFIG_NUMA_BALANCING
- spin_lock_init(&pgdat->numabalancing_migrate_lock);
- pgdat->numabalancing_migrate_nr_pages = 0;
- pgdat->numabalancing_migrate_next_window = jiffies;
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- spin_lock_init(&pgdat->split_queue_lock);
- INIT_LIST_HEAD(&pgdat->split_queue);
- pgdat->split_queue_len = 0;
-#endif
+
+ pgdat_init_numabalancing(pgdat);
+ pgdat_init_split_queue(pgdat);
+ pgdat_init_kcompactd(pgdat);
+
init_waitqueue_head(&pgdat->kswapd_wait);
init_waitqueue_head(&pgdat->pfmemalloc_wait);
-#ifdef CONFIG_COMPACTION
- init_waitqueue_head(&pgdat->kcompactd_wait);
-#endif
+
pgdat_page_ext_init(pgdat);
spin_lock_init(&pgdat->lru_lock);
lruvec_init(node_lruvec(pgdat));
--
2.13.6


2018-07-17 10:59:19

by Oscar Salvador

[permalink] [raw]
Subject: [RFC PATCH 2/3] mm: Refactor free_area_init_core

From: Oscar Salvador <[email protected]>

When free_area_init_core gets called from the memhotplug code,
we do not really need to go through all memmap calculations.

This structures the code a bit better.

Signed-off-by: Oscar Salvador <[email protected]>
---
mm/page_alloc.c | 106 ++++++++++++++++++++++++++++++--------------------------
1 file changed, 57 insertions(+), 49 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8a73305f7c55..3bf939393ca1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6237,6 +6237,42 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat)
static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
#endif

+static void calculate_pages(enum zone_type type, unsigned long *freesize,
+ unsigned long size)
+{
+ /*
+ * Adjust freesize so that it accounts for how much memory
+ * is used by this zone for memmap. This affects the watermark
+ * and per-cpu initialisations
+ */
+ unsigned long memmap_pages = calc_memmap_size(size, *freesize);
+
+ if (!is_highmem_idx(type)) {
+ if (*freesize >= memmap_pages) {
+ freesize -= memmap_pages;
+ if (memmap_pages)
+ printk(KERN_DEBUG
+ " %s zone: %lu pages used for memmap\n",
+ zone_names[type], memmap_pages);
+ } else
+ pr_warn(" %s zone: %lu pages exceeds freesize %lu\n",
+ zone_names[type], memmap_pages, *freesize);
+ }
+
+ if (type == 0 && *freesize > dma_reserve) {
+ *freesize -= dma_reserve;
+ printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
+ zone_names[0], dma_reserve);
+ }
+
+ if (!is_highmem_idx(type))
+ nr_kernel_pages += *freesize;
+ /* Charge for highmem memmap if there are enough kernel pages */
+ else if (nr_kernel_pages > memmap_pages * 2)
+ nr_kernel_pages -= memmap_pages;
+ nr_all_pages += *freesize;
+}
+
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -6267,50 +6303,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)

for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
- unsigned long size, freesize, memmap_pages;
- unsigned long zone_start_pfn = zone->zone_start_pfn;
-
- size = zone->spanned_pages;
- freesize = zone->present_pages;
-
- /*
- * Adjust freesize so that it accounts for how much memory
- * is used by this zone for memmap. This affects the watermark
- * and per-cpu initialisations
- */
- memmap_pages = calc_memmap_size(size, freesize);
- if (!is_highmem_idx(j)) {
- if (freesize >= memmap_pages) {
- freesize -= memmap_pages;
- if (memmap_pages)
- printk(KERN_DEBUG
- " %s zone: %lu pages used for memmap\n",
- zone_names[j], memmap_pages);
- } else
- pr_warn(" %s zone: %lu pages exceeds freesize %lu\n",
- zone_names[j], memmap_pages, freesize);
- }

- /* Account for reserved pages */
- if (j == 0 && freesize > dma_reserve) {
- freesize -= dma_reserve;
- printk(KERN_DEBUG " %s zone: %lu pages reserved\n",
- zone_names[0], dma_reserve);
- }
-
- if (!is_highmem_idx(j))
- nr_kernel_pages += freesize;
- /* Charge for highmem memmap if there are enough kernel pages */
- else if (nr_kernel_pages > memmap_pages * 2)
- nr_kernel_pages -= memmap_pages;
- nr_all_pages += freesize;
-
- /*
- * Set an approximate value for lowmem here, it will be adjusted
- * when the bootmem allocator frees pages into the buddy system.
- * And all highmem pages will be managed by the buddy system.
- */
- zone->managed_pages = freesize;
#ifdef CONFIG_NUMA
zone->node = nid;
#endif
@@ -6320,13 +6313,28 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
zone_seqlock_init(zone);
zone_pcp_init(zone);

- if (!size)
- continue;
+ if(system_state == SYSTEM_BOOTING) {
+ unsigned long size, freesize;
+ unsigned long zone_start_pfn = zone->zone_start_pfn;

- set_pageblock_order();
- setup_usemap(pgdat, zone, zone_start_pfn, size);
- init_currently_empty_zone(zone, zone_start_pfn, size);
- memmap_init(size, nid, j, zone_start_pfn);
+ size = zone->spanned_pages;
+ freesize = zone->present_pages;
+ calculate_pages(j, &freesize, size);
+
+ /*
+ * Set an approximate value for lowmem here, it will be adjusted
+ * when the bootmem allocator frees pages into the buddy system.
+ * And all highmem pages will be managed by the buddy system.
+ */
+ zone->managed_pages = freesize;
+ if (!size)
+ continue;
+
+ set_pageblock_order();
+ setup_usemap(pgdat, zone, zone_start_pfn, size);
+ init_currently_empty_zone(zone, zone_start_pfn, size);
+ memmap_init(size, nid, j, zone_start_pfn);
+ }
}
}

--
2.13.6