2021-05-25 08:44:51

by Mel Gorman

[permalink] [raw]
Subject: [PATCH 3/6] mm/page_alloc: Adjust pcp->high after CPU hotplug events

The PCP high watermark is based on the number of online CPUs so the
watermarks must be adjusted during CPU hotplug. At the time of
hot-remove, the number of online CPUs is already adjusted but during
hot-add, a delta needs to be applied to update PCP to the correct
value. After this patch is applied, the high watermarks are adjusted
correctly.

# grep high: /proc/zoneinfo | tail -1
high: 649
# echo 0 > /sys/devices/system/cpu/cpu4/online
# grep high: /proc/zoneinfo | tail -1
high: 664
# echo 1 > /sys/devices/system/cpu/cpu4/online
# grep high: /proc/zoneinfo | tail -1
high: 649

Signed-off-by: Mel Gorman <[email protected]>
---
include/linux/cpuhotplug.h | 2 +-
mm/internal.h | 2 +-
mm/memory_hotplug.c | 4 ++--
mm/page_alloc.c | 38 +++++++++++++++++++++++++++-----------
4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4a62b3980642..47e13582d9fc 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -54,7 +54,7 @@ enum cpuhp_state {
CPUHP_MM_MEMCQ_DEAD,
CPUHP_PERCPU_CNT_DEAD,
CPUHP_RADIX_DEAD,
- CPUHP_PAGE_ALLOC_DEAD,
+ CPUHP_PAGE_ALLOC,
CPUHP_NET_DEV_DEAD,
CPUHP_PCI_XGENE_DEAD,
CPUHP_IOMMU_IOVA_DEAD,
diff --git a/mm/internal.h b/mm/internal.h
index 54bd0dc2c23c..651250e59ef5 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -221,7 +221,7 @@ extern int user_min_free_kbytes;
extern void free_unref_page(struct page *page);
extern void free_unref_page_list(struct list_head *list);

-extern void zone_pcp_update(struct zone *zone);
+extern void zone_pcp_update(struct zone *zone, int cpu_online);
extern void zone_pcp_reset(struct zone *zone);
extern void zone_pcp_disable(struct zone *zone);
extern void zone_pcp_enable(struct zone *zone);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 70620d0dd923..bebb3cead810 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -961,7 +961,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *z
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
build_all_zonelists(NULL);
- zone_pcp_update(zone);
+ zone_pcp_update(zone, 0);

/* Basic onlining is complete, allow allocation of onlined pages. */
undo_isolate_page_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE);
@@ -1835,7 +1835,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
zone_pcp_reset(zone);
build_all_zonelists(NULL);
} else
- zone_pcp_update(zone);
+ zone_pcp_update(zone, 0);

node_states_clear_node(node, &arg);
if (arg.status_change_nid >= 0) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c0536e5d088a..dc4ac309bc21 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6628,7 +6628,7 @@ static int zone_batchsize(struct zone *zone)
#endif
}

-static int zone_highsize(struct zone *zone, int batch)
+static int zone_highsize(struct zone *zone, int batch, int cpu_online)
{
#ifdef CONFIG_MMU
int high;
@@ -6639,9 +6639,10 @@ static int zone_highsize(struct zone *zone, int batch)
* so that if they are full then background reclaim will not be
* started prematurely. The value is split across all online CPUs
* local to the zone. Note that early in boot that CPUs may not be
- * online yet.
+ * online yet and that during CPU hotplug that the cpumask is not
+ * yet updated when a CPU is being onlined.
*/
- nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone))));
+ nr_local_cpus = max(1U, cpumask_weight(cpumask_of_node(zone_to_nid(zone)))) + cpu_online;
high = low_wmark_pages(zone) / nr_local_cpus;

/*
@@ -6715,12 +6716,12 @@ static void __zone_set_pageset_high_and_batch(struct zone *zone, unsigned long h
* Calculate and set new high and batch values for all per-cpu pagesets of a
* zone based on the zone's size.
*/
-static void zone_set_pageset_high_and_batch(struct zone *zone)
+static void zone_set_pageset_high_and_batch(struct zone *zone, int cpu_online)
{
int new_high, new_batch;

new_batch = max(1, zone_batchsize(zone));
- new_high = zone_highsize(zone, new_batch);
+ new_high = zone_highsize(zone, new_batch, cpu_online);

if (zone->pageset_high == new_high &&
zone->pageset_batch == new_batch)
@@ -6750,7 +6751,7 @@ void __meminit setup_zone_pageset(struct zone *zone)
per_cpu_pages_init(pcp, pzstats);
}

- zone_set_pageset_high_and_batch(zone);
+ zone_set_pageset_high_and_batch(zone, 0);
}

/*
@@ -8008,6 +8009,7 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)

static int page_alloc_cpu_dead(unsigned int cpu)
{
+ struct zone *zone;

lru_add_drain_cpu(cpu);
drain_pages(cpu);
@@ -8028,6 +8030,19 @@ static int page_alloc_cpu_dead(unsigned int cpu)
* race with what we are doing.
*/
cpu_vm_stats_fold(cpu);
+
+ for_each_populated_zone(zone)
+ zone_pcp_update(zone, 0);
+
+ return 0;
+}
+
+static int page_alloc_cpu_online(unsigned int cpu)
+{
+ struct zone *zone;
+
+ for_each_populated_zone(zone)
+ zone_pcp_update(zone, 1);
return 0;
}

@@ -8053,8 +8068,9 @@ void __init page_alloc_init(void)
hashdist = 0;
#endif

- ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC_DEAD,
- "mm/page_alloc:dead", NULL,
+ ret = cpuhp_setup_state_nocalls(CPUHP_PAGE_ALLOC,
+ "mm/page_alloc:pcp",
+ page_alloc_cpu_online,
page_alloc_cpu_dead);
WARN_ON(ret < 0);
}
@@ -8192,7 +8208,7 @@ static void __setup_per_zone_wmarks(void)
* The watermark size have changed so update the pcpu batch
* and high limits or the limits may be inappropriate.
*/
- zone_set_pageset_high_and_batch(zone);
+ zone_set_pageset_high_and_batch(zone, 0);

spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -9014,10 +9030,10 @@ EXPORT_SYMBOL(free_contig_range);
* The zone indicated has a new number of managed_pages; batch sizes and percpu
* page high values need to be recalculated.
*/
-void __meminit zone_pcp_update(struct zone *zone)
+void zone_pcp_update(struct zone *zone, int cpu_online)
{
mutex_lock(&pcp_batch_high_lock);
- zone_set_pageset_high_and_batch(zone);
+ zone_set_pageset_high_and_batch(zone, cpu_online);
mutex_unlock(&pcp_batch_high_lock);
}

--
2.26.2


2021-05-28 13:33:12

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH 3/6] mm/page_alloc: Adjust pcp->high after CPU hotplug events

On 5/25/21 10:01 AM, Mel Gorman wrote:
> The PCP high watermark is based on the number of online CPUs so the
> watermarks must be adjusted during CPU hotplug. At the time of
> hot-remove, the number of online CPUs is already adjusted but during
> hot-add, a delta needs to be applied to update PCP to the correct
> value. After this patch is applied, the high watermarks are adjusted
> correctly.
>
> # grep high: /proc/zoneinfo | tail -1
> high: 649
> # echo 0 > /sys/devices/system/cpu/cpu4/online
> # grep high: /proc/zoneinfo | tail -1
> high: 664
> # echo 1 > /sys/devices/system/cpu/cpu4/online
> # grep high: /proc/zoneinfo | tail -1
> high: 649
>
> Signed-off-by: Mel Gorman <[email protected]>

Acked-by: Vlastimil Babka <[email protected]>