2022-06-06 04:48:17

by Huanpeng Xin

[permalink] [raw]
Subject: [PATCH] mm: add a new emergency page migratetype.

From: xinhuanpeng <[email protected]>

add a new page migratetype reserved for
non-costly non-NOWARN page allocation failure.

Signed-off-by: xinhuanpeng <[email protected]>
---
include/linux/gfp.h | 4 ++
include/linux/mmzone.h | 16 ++++++
init/main.c | 3 ++
mm/Kconfig | 11 +++++
mm/page_alloc.c | 107 +++++++++++++++++++++++++++++++++++++++++
5 files changed, 141 insertions(+)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2d2ccae933c2..fcc758a34e11 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -716,4 +716,8 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages);
extern void init_cma_reserved_pageblock(struct page *page);
#endif

+#ifdef CONFIG_EMERGENCY_MEMORY
+extern void emergency_mm_init(void);
+#endif
+
#endif /* __LINUX_GFP_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index aab70355d64f..5d3962d7cdaf 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -60,6 +60,13 @@ enum migratetype {
#endif
#ifdef CONFIG_MEMORY_ISOLATION
MIGRATE_ISOLATE, /* can't allocate from here */
+#endif
+#ifdef CONFIG_EMERGENCY_MEMORY
+ /*
+ * MIGRATE_EMERGENCY migration type is designed to save
+ * non-costly non-NOWARN page allocation failure.
+ */
+ MIGRATE_EMERGENCY,
#endif
MIGRATE_TYPES
};
@@ -413,6 +420,11 @@ struct per_cpu_nodestat {

#endif /* !__GENERATING_BOUNDS.H */

+#ifdef CONFIG_EMERGENCY_MEMORY
+/* The maximum number of pages in MIGRATE_EMERGENCY migration type */
+#define MAX_MANAGED_EMERGENCY 2048
+#endif
+
enum zone_type {
/*
* ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able
@@ -517,6 +529,10 @@ struct zone {

unsigned long nr_reserved_highatomic;

+#ifdef CONFIG_EMERGENCY_MEMORY
+ /* The actual number of pages in MIGRATE_EMERGENCY migration type */
+ unsigned long nr_reserved_emergency;
+#endif
/*
* We don't know if the memory that we're going to allocate will be
* freeable or/and it will be released eventually, so to avoid totally
diff --git a/init/main.c b/init/main.c
index 02eb533018f6..7ff2e7761198 100644
--- a/init/main.c
+++ b/init/main.c
@@ -852,6 +852,9 @@ static void __init mm_init(void)
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
pti_init();
+#ifdef CONFIG_EMERGENCY_MEMORY
+ emergency_mm_init();
+#endif
}

#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
diff --git a/mm/Kconfig b/mm/Kconfig
index 169e64192e48..a13559cf13cf 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1132,4 +1132,15 @@ config PTE_MARKER_UFFD_WP

source "mm/damon/Kconfig"

+config EMERGENCY_MEMORY
+ bool "Enable emergency memory"
+ default n
+ help
+
+ Add a new emergency page migratetype.
+
+ This enables the migration type MIGRATE_EMERGENCY,which reserves
+ a small amount of memory for non-costly non-NOWARN page allocation
+ failure.
+
endmenu
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e008a3df0485..a149fb7fc2f8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -324,6 +324,9 @@ const char * const migratetype_names[MIGRATE_TYPES] = {
#ifdef CONFIG_CMA
"CMA",
#endif
+#ifdef CONFIG_EMERGENCY_MEMORY
+ "Emergency",
+#endif
#ifdef CONFIG_MEMORY_ISOLATION
"Isolate",
#endif
@@ -2908,6 +2911,43 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
return false;
}

+#ifdef CONFIG_EMERGENCY_MEMORY
+/* Initialization of the migration type MIGRATE_EMERGENCY */
+void __init emergency_mm_init(void)
+{
+ /*
+ * If pageblock_order < MAX_ORDER - 1 ,then allocating a few pageblocks may
+ * cause the buddy system to merge two pageblocks of different migration types,
+ * for example, MIGRATE_EMERGENCY and MIGRATE_MOVABLE.
+ */
+ if (pageblock_order == MAX_ORDER - 1) {
+ int nid = 0;
+
+ pr_info("start to setup MIGRATE_EMERGENCY reserved memory.");
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ struct zone *zone = &pgdat->node_zones[ZONE_NORMAL];
+
+ while (zone->nr_reserved_emergency < MAX_MANAGED_EMERGENCY) {
+ struct page *page = alloc_pages(___GFP_MOVABLE, pageblock_order);
+
+ if (page == NULL) {
+ pr_warn("node id %d MIGRATE_EMERGENCY reserved pages failed, reserved %d pages.",
+ nid, zone->nr_reserved_emergency);
+ break;
+ }
+ set_pageblock_migratetype(page, MIGRATE_EMERGENCY);
+ __free_pages(page, pageblock_order);
+ zone->nr_reserved_emergency += pageblock_nr_pages;
+ }
+ pr_info("node id %d MIGRATE_EMERGENCY reserved %d pages.",
+ nid, zone->nr_reserved_emergency);
+
+ }
+ }
+}
+#endif
+
/*
* Try finding a free buddy page on the fallback list and put it on the free
* list of requested migratetype, possibly along with other pages from the same
@@ -3861,6 +3901,14 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
const bool alloc_harder = (alloc_flags & (ALLOC_HARDER|ALLOC_OOM));
long unusable_free = (1 << order) - 1;

+#ifdef CONFIG_EMERGENCY_MEMORY
+ /*
+ * If the migration type MIGRATE_EMERGENCY enable,then subtract
+ * reserved pages.
+ */
+ unusable_free -= z->nr_reserved_emergency;
+#endif
+
/*
* If the caller does not have rights to ALLOC_HARDER then subtract
* the high-atomic reserves. This will over-estimate the size of the
@@ -4073,6 +4121,50 @@ static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
return alloc_flags;
}

+#ifdef CONFIG_EMERGENCY_MEMORY
+/*
+ * get_emergency_page_from_freelist allocates pages in reserved memory
+ * in the migration type MIGRATE_EMERGENCY.
+ */
+static struct page *get_emergency_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+ int alloc_flags, const struct alloc_context *ac, int migratetype)
+{
+ struct page *page = NULL;
+
+ if (ac->high_zoneidx >= ZONE_NORMAL) {
+ struct zoneref *z = ac->preferred_zoneref;
+ struct pglist_data *pgdat = NODE_DATA(zonelist_node_idx(z));
+ struct zone *zone = &pgdat->node_zones[ZONE_NORMAL];
+ unsigned long flags;
+
+ if (cpusets_enabled() &&
+ (alloc_flags & ALLOC_CPUSET) &&
+ !__cpuset_zone_allowed(zone, gfp_mask))
+ return NULL;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ do {
+ page = __rmqueue_smallest(zone, order, migratetype);
+ } while (page && check_new_pages(page, order));
+
+ spin_unlock(&zone->lock);
+
+ if (page) {
+ __mod_zone_freepage_state(zone, -(1 << order),
+ get_pcppage_migratetype(page));
+
+ __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
+ zone_statistics(z->zone, zone);
+ prep_new_page(page, order, gfp_mask, alloc_flags);
+ }
+ local_irq_restore(flags);
+ }
+
+ return page;
+
+}
+#endif
+
/*
* get_page_from_freelist goes through the zonelist trying to allocate
* a page.
@@ -5169,6 +5261,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto retry;
}
fail:
+#ifdef CONFIG_EMERGENCY_MEMORY
+ if (!(gfp_mask & __GFP_NOWARN) && !costly_order) {
+ /*
+ * If this allocation belongs to non-costly non-NOWARN page allocation,
+ * then uses the reserved memory in the migration type MIGRATE_EMERGENCY.
+ */
+ page = get_emergency_page_from_freelist(gfp_mask, order, alloc_flags, ac,
+ MIGRATE_EMERGENCY);
+ if (page)
+ goto got_pg;
+ }
+#endif
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
@@ -5905,6 +6009,9 @@ static void show_migration_types(unsigned char type)
#ifdef CONFIG_CMA
[MIGRATE_CMA] = 'C',
#endif
+#ifdef CONFIG_EMERGENCY_MEMORY
+ [MIGRATE_EMERGENCY] = 'G',
+#endif
#ifdef CONFIG_MEMORY_ISOLATION
[MIGRATE_ISOLATE] = 'I',
#endif
--
2.36.1


2022-06-07 16:39:52

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH] mm: add a new emergency page migratetype.

On 06.06.22 05:27, Huanpeng Xin wrote:
> From: xinhuanpeng <[email protected]>
>
> add a new page migratetype reserved for
> non-costly non-NOWARN page allocation failure.

Sorry to say, but this patch description is not expressive enough. I
have absolutely no clue what you are trying to achieve and why we should
care.

Especially, why do we care about locally grouping these allocations
(that's what pageblock flags are for after all)?

Your Kconfig option is also not particularly user friendly to read either:

"This enables the migration type MIGRATE_EMERGENCY,which reserves
a small amount of memory for non-costly non-NOWARN page allocation
failure."

Usually we reserve memory via different mechanisms, like atomic
reserves? Why can't something like that be used.

On first sight, defining a new pageblock migratype feels wrong to me.
But then, I have no clue what you are actually trying to achieve.

--
Thanks,

David / dhildenb

2022-06-12 16:15:50

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] mm: add a new emergency page migratetype.

Hi Huanpeng,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]

url: https://github.com/intel-lab-lkp/linux/commits/Huanpeng-Xin/mm-add-a-new-emergency-page-migratetype/20220606-113050
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: hexagon-randconfig-r026-20220612 (https://download.01.org/0day-ci/archive/20220612/[email protected]/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project ff4abe755279a3a47cc416ef80dbc900d9a98a19)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/c3dbdae8f54c1335f2cb92b379a6eab1253500c8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Huanpeng-Xin/mm-add-a-new-emergency-page-migratetype/20220606-113050
git checkout c3dbdae8f54c1335f2cb92b379a6eab1253500c8
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All error/warnings (new ones prefixed by >>):

>> mm/page_alloc.c:2940:12: warning: format specifies type 'int' but the argument has type 'unsigned long' [-Wformat]
nid, zone->nr_reserved_emergency);
^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/printk.h:528:37: note: expanded from macro 'pr_warn'
printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
~~~ ^~~~~~~~~~~
include/linux/printk.h:475:60: note: expanded from macro 'printk'
#define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
~~~ ^~~~~~~~~~~
include/linux/printk.h:447:19: note: expanded from macro 'printk_index_wrap'
_p_func(_fmt, ##__VA_ARGS__); \
~~~~ ^~~~~~~~~~~
mm/page_alloc.c:2948:10: warning: format specifies type 'int' but the argument has type 'unsigned long' [-Wformat]
nid, zone->nr_reserved_emergency);
^~~~~~~~~~~~~~~~~~~~~~~~~~~
include/linux/printk.h:548:34: note: expanded from macro 'pr_info'
printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
~~~ ^~~~~~~~~~~
include/linux/printk.h:475:60: note: expanded from macro 'printk'
#define printk(fmt, ...) printk_index_wrap(_printk, fmt, ##__VA_ARGS__)
~~~ ^~~~~~~~~~~
include/linux/printk.h:447:19: note: expanded from macro 'printk_index_wrap'
_p_func(_fmt, ##__VA_ARGS__); \
~~~~ ^~~~~~~~~~~
>> mm/page_alloc.c:4215:10: error: no member named 'high_zoneidx' in 'struct alloc_context'; did you mean 'highest_zoneidx'?
if (ac->high_zoneidx >= ZONE_NORMAL) {
^~~~~~~~~~~~
highest_zoneidx
mm/internal.h:226:17: note: 'highest_zoneidx' declared here
enum zone_type highest_zoneidx;
^
>> mm/page_alloc.c:4238:33: error: too few arguments to function call, expected 3, have 2
zone_statistics(z->zone, zone);
~~~~~~~~~~~~~~~ ^
mm/page_alloc.c:3697:20: note: 'zone_statistics' declared here
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
^
2 warnings and 2 errors generated.


vim +4215 mm/page_alloc.c

4204
4205 #ifdef CONFIG_EMERGENCY_MEMORY
4206 /*
4207 * get_emergency_page_from_freelist allocates pages in reserved memory
4208 * in the migration type MIGRATE_EMERGENCY.
4209 */
4210 static struct page *get_emergency_page_from_freelist(gfp_t gfp_mask, unsigned int order,
4211 int alloc_flags, const struct alloc_context *ac, int migratetype)
4212 {
4213 struct page *page = NULL;
4214
> 4215 if (ac->high_zoneidx >= ZONE_NORMAL) {
4216 struct zoneref *z = ac->preferred_zoneref;
4217 struct pglist_data *pgdat = NODE_DATA(zonelist_node_idx(z));
4218 struct zone *zone = &pgdat->node_zones[ZONE_NORMAL];
4219 unsigned long flags;
4220
4221 if (cpusets_enabled() &&
4222 (alloc_flags & ALLOC_CPUSET) &&
4223 !__cpuset_zone_allowed(zone, gfp_mask))
4224 return NULL;
4225
4226 spin_lock_irqsave(&zone->lock, flags);
4227 do {
4228 page = __rmqueue_smallest(zone, order, migratetype);
4229 } while (page && check_new_pages(page, order));
4230
4231 spin_unlock(&zone->lock);
4232
4233 if (page) {
4234 __mod_zone_freepage_state(zone, -(1 << order),
4235 get_pcppage_migratetype(page));
4236
4237 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
> 4238 zone_statistics(z->zone, zone);
4239 prep_new_page(page, order, gfp_mask, alloc_flags);
4240 }
4241 local_irq_restore(flags);
4242 }
4243
4244 return page;
4245

--
0-DAY CI Kernel Test Service
https://01.org/lkp