2020-11-02 14:42:11

by Chris Goldsworthy

[permalink] [raw]
Subject: [PATCH 1/2] cma: redirect page allocation to CMA

From: Heesub Shin <[email protected]>

CMA pages are designed to be used as fallback for movable allocations
and cannot be used for non-movable allocations. If CMA pages are
utilized poorly, non-movable allocations may end up getting starved if
all regular movable pages are allocated and the only pages left are
CMA. Always using CMA pages first creates unacceptable performance
problems. As a midway alternative, use CMA pages for certain
userspace allocations. The userspace pages can be migrated or dropped
quickly which giving decent utilization.

Signed-off-by: Kyungmin Park <[email protected]>
Signed-off-by: Heesub Shin <[email protected]>
Signed-off-by: Vinayak Menon <[email protected]>
[[email protected]: Place in bugfixes]
Signed-off-by: Chris Goldsworthy <[email protected]>
---
include/linux/gfp.h | 15 +++++++++
include/linux/highmem.h | 4 ++-
include/linux/mmzone.h | 4 +++
mm/page_alloc.c | 83 +++++++++++++++++++++++++++++++------------------
4 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c603237..e80b7d2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -39,11 +39,21 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x100000u
#define ___GFP_THISNODE 0x200000u
#define ___GFP_ACCOUNT 0x400000u
+#ifdef CONFIG_CMA
+#define ___GFP_CMA 0x800000u
+#else
+#define ___GFP_CMA 0
+#endif
#ifdef CONFIG_LOCKDEP
+#ifdef CONFIG_CMA
+#define ___GFP_NOLOCKDEP 0x1000000u
+#else
#define ___GFP_NOLOCKDEP 0x800000u
+#endif
#else
#define ___GFP_NOLOCKDEP 0
#endif
+
/* If the above are modified, __GFP_BITS_SHIFT may need updating */

/*
@@ -57,6 +67,7 @@ struct vm_area_struct;
#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)
#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)
#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */
+#define __GFP_CMA ((__force gfp_t)___GFP_CMA)
#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)

/**
@@ -224,7 +235,11 @@ struct vm_area_struct;
#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)

/* Room for N __GFP_FOO bits */
+#ifdef CONFIG_CMA
+#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP))
+#else
#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
+#endif
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))

/**
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 14e6202..35f052b 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -274,7 +274,9 @@ static inline struct page *
alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
unsigned long vaddr)
{
- return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
+ return __alloc_zeroed_user_highpage(
+ __GFP_MOVABLE|__GFP_CMA, vma,
+ vaddr);
}

static inline void clear_highpage(struct page *page)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fb3bf69..3f913be 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -471,6 +471,10 @@ struct zone {
struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;

+#ifdef CONFIG_CMA
+ bool cma_alloc;
+#endif
+
#ifndef CONFIG_SPARSEMEM
/*
* Flags for a pageblock_nr_pages block. See pageblock-flags.h.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d772206..f938de7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2860,35 +2860,34 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
{
struct page *page;

-#ifdef CONFIG_CMA
- /*
- * Balance movable allocations between regular and CMA areas by
- * allocating from CMA when over half of the zone's free memory
- * is in the CMA area.
- */
- if (alloc_flags & ALLOC_CMA &&
- zone_page_state(zone, NR_FREE_CMA_PAGES) >
- zone_page_state(zone, NR_FREE_PAGES) / 2) {
- page = __rmqueue_cma_fallback(zone, order);
- if (page)
- return page;
- }
-#endif
retry:
page = __rmqueue_smallest(zone, order, migratetype);
- if (unlikely(!page)) {
- if (alloc_flags & ALLOC_CMA)
- page = __rmqueue_cma_fallback(zone, order);

- if (!page && __rmqueue_fallback(zone, order, migratetype,
- alloc_flags))
- goto retry;
- }
+ if (unlikely(!page) && __rmqueue_fallback(zone, order, migratetype,
+ alloc_flags))
+ goto retry;

trace_mm_page_alloc_zone_locked(page, order, migratetype);
return page;
}

+static struct page *__rmqueue_cma(struct zone *zone, unsigned int order,
+ int migratetype,
+ unsigned int alloc_flags)
+{
+ struct page *page = 0;
+
+#ifdef CONFIG_CMA
+ if (migratetype == MIGRATE_MOVABLE && !zone->cma_alloc)
+ page = __rmqueue_cma_fallback(zone, order);
+ else
+#endif
+ page = __rmqueue_smallest(zone, order, migratetype);
+
+ trace_mm_page_alloc_zone_locked(page, order, MIGRATE_CMA);
+ return page;
+}
+
/*
* Obtain a specified number of elements from the buddy allocator, all under
* a single hold of the lock, for efficiency. Add them to the supplied list.
@@ -2896,14 +2895,20 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
- int migratetype, unsigned int alloc_flags)
+ int migratetype, unsigned int alloc_flags, int cma)
{
int i, alloced = 0;

spin_lock(&zone->lock);
for (i = 0; i < count; ++i) {
- struct page *page = __rmqueue(zone, order, migratetype,
- alloc_flags);
+ struct page *page;
+
+ if (cma)
+ page = __rmqueue_cma(zone, order, migratetype,
+ alloc_flags);
+ else
+ page = __rmqueue(zone, order, migratetype, alloc_flags);
+
if (unlikely(page == NULL))
break;

@@ -3388,7 +3393,8 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
unsigned int alloc_flags,
struct per_cpu_pages *pcp,
- struct list_head *list)
+ struct list_head *list,
+ gfp_t gfp_flags)
{
struct page *page;

@@ -3396,7 +3402,8 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
- migratetype, alloc_flags);
+ migratetype, alloc_flags,
+ gfp_flags && __GFP_CMA);
if (unlikely(list_empty(list)))
return NULL;
}
@@ -3422,7 +3429,8 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
- page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
+ page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list,
+ gfp_flags);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
zone_statistics(preferred_zone, zone);
@@ -3448,7 +3456,7 @@ struct page *rmqueue(struct zone *preferred_zone,
* MIGRATE_MOVABLE pcplist could have the pages on CMA area and
* we need to skip it when CMA area isn't allowed.
*/
- if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
+ if (!IS_ENABLED(CONFIG_CMA) || gfp_flags & __GFP_CMA ||
migratetype != MIGRATE_MOVABLE) {
page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
migratetype, alloc_flags);
@@ -3476,8 +3484,14 @@ struct page *rmqueue(struct zone *preferred_zone,
if (page)
trace_mm_page_alloc_zone_locked(page, order, migratetype);
}
- if (!page)
- page = __rmqueue(zone, order, migratetype, alloc_flags);
+ if (!page) {
+ if (gfp_flags & __GFP_CMA)
+ page = __rmqueue_cma(zone, order, migratetype,
+ alloc_flags);
+ else
+ page = __rmqueue(zone, order, migratetype,
+ alloc_flags);
+ }
} while (page && check_new_pages(page, order));
spin_unlock(&zone->lock);
if (!page)
@@ -3790,7 +3804,8 @@ static inline unsigned int current_alloc_flags(gfp_t gfp_mask,
unsigned int pflags = current->flags;

if (!(pflags & PF_MEMALLOC_NOCMA) &&
- gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE &&
+ gfp_mask & __GFP_CMA)
alloc_flags |= ALLOC_CMA;

#endif
@@ -8529,6 +8544,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
if (ret)
return ret;

+#ifdef CONFIG_CMA
+ cc.zone->cma_alloc = 1;
+#endif
/*
* In case of -EBUSY, we'd like to know which page causes problem.
* So, just fall through. test_pages_isolated() has a tracepoint
@@ -8610,6 +8628,9 @@ int alloc_contig_range(unsigned long start, unsigned long end,
done:
undo_isolate_page_range(pfn_max_align_down(start),
pfn_max_align_up(end), migratetype);
+#ifdef CONFIG_CMA
+ cc.zone->cma_alloc = 0;
+#endif
return ret;
}
EXPORT_SYMBOL(alloc_contig_range);
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project


2020-11-03 05:35:15

by Chen, Rong A

[permalink] [raw]
Subject: [cma] 1ea6c22c9b: page_allocation_failure:order:#,mode:#(__GFP_RECLAIMABLE),nodemask=(null)

Greeting,

FYI, we noticed the following commit (built with gcc-9):

commit: 1ea6c22c9b85ec176bb78d7076be06a4142f8bdd ("[PATCH 1/2] cma: redirect page allocation to CMA")
url: https://github.com/0day-ci/linux/commits/Chris-Goldsworthy/Increasing-CMA-Utilization-with-a-GFP-Flag/20201102-224143
base: https://git.kernel.org/cgit/linux/kernel/git/axboe/linux-block.git for-next

in testcase: boot

on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):


+---------------------------------------------------------------------------+------------+------------+
| | 67b6d2ce11 | 1ea6c22c9b |
+---------------------------------------------------------------------------+------------+------------+
| boot_successes | 6 | 0 |
| boot_failures | 0 | 8 |
| page_allocation_failure:order:#,mode:#(__GFP_RECLAIMABLE),nodemask=(null) | 0 | 8 |
| Mem-Info | 0 | 8 |
| kernel_BUG_at_kernel/workqueue.c | 0 | 8 |
| invalid_opcode:#[##] | 0 | 8 |
| EIP:workqueue_init_early | 0 | 8 |
| Kernel_panic-not_syncing:Fatal_exception | 0 | 8 |
+---------------------------------------------------------------------------+------------+------------+


If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>


[ 2.108390] Memory: 8203240K/8388088K available (12819K kernel code, 6610K rwdata, 11292K rodata, 628K init, 12064K bss, 184848K reserved, 0K cma-reserved, 7481224K highmem)
[ 2.111999] Checking if this processor honours the WP bit even in supervisor mode...Ok.
[ 2.113918] random: get_random_u32 called from kmem_cache_open+0x1f/0x240 with crng_init=0
[ 2.114387] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
[ 2.117656] Kernel/User page tables isolation: enabled
[ 2.119610] swapper: page allocation failure: order:0, mode:0x10(__GFP_RECLAIMABLE), nodemask=(null)
[ 2.121604] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-rc1-00365-g1ea6c22c9b85 #1
[ 2.123401] Call Trace:
[ 2.123990] dump_stack+0x1b/0x1d
[ 2.124787] warn_alloc+0x81/0xd9
[ 2.125599] __alloc_pages_slowpath+0x79c/0x7a9
[ 2.126690] ? get_page_from_freelist+0xb8/0x20d
[ 2.127750] __alloc_pages_nodemask+0x107/0x188
[ 2.128795] __alloc_pages_node+0x17/0x1c
[ 2.129757] alloc_slab_page+0x26/0x4e
[ 2.130649] allocate_slab+0x80/0x27e
[ 2.131521] ___slab_alloc+0x247/0x2ec
[ 2.132605] ? radix_tree_node_alloc+0x5e/0x8e
[ 2.133915] ? validate_chain+0x5a8/0x5c3
[ 2.134838] __slab_alloc+0x34/0x4d
[ 2.135830] ? __slab_alloc+0x34/0x4d
[ 2.136909] ? fs_reclaim_release+0x8/0x13
[ 2.137910] kmem_cache_alloc+0x46/0x157
[ 2.138854] ? radix_tree_node_alloc+0x5e/0x8e
[ 2.140103] radix_tree_node_alloc+0x5e/0x8e
[ 2.141369] idr_get_free+0xc1/0x21a
[ 2.142230] idr_alloc_u32+0x4d/0x80
[ 2.143016] idr_alloc+0x30/0x3e
[ 2.143751] worker_pool_assign_id+0x37/0x47
[ 2.144682] workqueue_init_early+0x9f/0x1f6
[ 2.145707] start_kernel+0x206/0x467
[ 2.146592] ? early_idt_handler_common+0x44/0x44
[ 2.147671] i386_start_kernel+0x42/0x44
[ 2.148607] startup_32_smp+0x164/0x168
[ 2.149567] Mem-Info:
[ 2.150101] active_anon:0 inactive_anon:0 isolated_anon:0
[ 2.150101] active_file:0 inactive_file:0 isolated_file:0
[ 2.150101] unevictable:0 dirty:0 writeback:0
[ 2.150101] slab_reclaimable:0 slab_unreclaimable:88
[ 2.150101] mapped:0 shmem:0 pagetables:0 bounce:0
[ 2.150101] free:2050715 free_pcp:0 free_cma:0
[ 2.156588] Node 0 active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:0kB dirty:0kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB kernel_stack:0kB all_unreclaimable? no
[ 2.162313] Normal free:721636kB min:0kB low:0kB high:0kB reserved_highatomic:0KB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:906864kB managed:722016kB mlocked:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
[ 2.162318] lowmem_reserve[]: 0 0 0
[ 2.169009] HighMem free:7481224kB min:0kB low:0kB high:0kB reserved_highatomic:0KB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:7481224kB managed:7481224kB mlocked:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
[ 2.169014] lowmem_reserve[]: 0 0 0
[ 2.175914] Normal: 1*4kB (M) 2*8kB (M) 1*16kB (U) 2*32kB (UM) 2*64kB (M) 2*128kB (UM) 1*256kB (M) 0*512kB 2*1024kB (UM) 1*2048kB (M) 175*4096kB (M) = 721636kB
[ 2.179229] HighMem: 0*4kB 1*8kB (M) 0*16kB 0*32kB 0*64kB 1*128kB (M) 1*256kB (M) 1*512kB (M) 1*1024kB (M) 2*2048kB (M) 1825*4096kB (M) = 7481224kB
[ 2.182343] 0 total pagecache pages
[ 2.183068] 2097022 pages RAM
[ 2.183799] 1870306 pages HighMem/MovableOnly
[ 2.184853] 46212 pages reserved
[ 2.185675] 0 pages cma reserved
[ 2.186442] SLUB: Unable to allocate memory on node -1, gfp=0xcc0(GFP_KERNEL)
[ 2.188136] cache: radix_tree_node, object size: 300, buffer size: 480, default order: 0, min order: 0
[ 2.190339] node 0: slabs: 0, objs: 0, free: 0
[ 2.191414] ------------[ cut here ]------------
[ 2.192442] kernel BUG at kernel/workqueue.c:5944!
[ 2.193614] invalid opcode: 0000 [#1] PTI
[ 2.194549] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-rc1-00365-g1ea6c22c9b85 #1
[ 2.196348] EIP: workqueue_init_early+0xa3/0x1f6
[ 2.197445] Code: 8b 83 fc 01 00 00 8b 54 bd e8 89 10 31 d2 b8 a0 40 7e c2 c7 43 30 00 00 00 00 e8 56 7a e4 fe 89 d8 e8 f1 64 21 fe 85 c0 74 02 <0f> 0b b8 a0 40 7e c2 81 c3 18 02 00 00 e8 db 76 e4 fe 83 ff 02 be
[ 2.201824] EAX: fffffff4 EBX: c27e3bc0 ECX: 00000cc0 EDX: 07ffffff
[ 2.203273] ESI: 00000000 EDI: 00000001 EBP: c278df7c ESP: c278df68
[ 2.204737] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 EFLAGS: 00210082
[ 2.206288] CR0: 80050033 CR2: ffbff000 CR3: 02ec4000 CR4: 000006b0
[ 2.207717] Call Trace:
[ 2.208256] start_kernel+0x206/0x467
[ 2.209038] ? early_idt_handler_common+0x44/0x44
[ 2.210192] i386_start_kernel+0x42/0x44
[ 2.211088] startup_32_smp+0x164/0x168
[ 2.211986] Modules linked in:
[ 2.212723] random: get_random_bytes called from init_oops_id+0x23/0x3b with crng_init=0
[ 2.212729] ---[ end trace 7c274a1c59fae664 ]---
[ 2.215660] EIP: workqueue_init_early+0xa3/0x1f6
[ 2.216748] Code: 8b 83 fc 01 00 00 8b 54 bd e8 89 10 31 d2 b8 a0 40 7e c2 c7 43 30 00 00 00 00 e8 56 7a e4 fe 89 d8 e8 f1 64 21 fe 85 c0 74 02 <0f> 0b b8 a0 40 7e c2 81 c3 18 02 00 00 e8 db 76 e4 fe 83 ff 02 be
[ 2.220942] EAX: fffffff4 EBX: c27e3bc0 ECX: 00000cc0 EDX: 07ffffff
[ 2.222365] ESI: 00000000 EDI: 00000001 EBP: c278df7c ESP: c278df68
[ 2.223834] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 EFLAGS: 00210082
[ 2.225389] CR0: 80050033 CR2: ffbff000 CR3: 02ec4000 CR4: 000006b0
[ 2.226784] Kernel panic - not syncing: Fatal exception

Kboot worker: lkp-worker51
Elapsed time: 60

kvm=(


To reproduce:

# build kernel
cd linux
cp config-5.10.0-rc1-00365-g1ea6c22c9b85 .config
make HOSTCC=gcc-9 CC=gcc-9 ARCH=i386 olddefconfig prepare modules_prepare bzImage

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp qemu -k <bzImage> job-script # job-script is attached in this email



Thanks,
Rong Chen


Attachments:
(No filename) (8.03 kB)
config-5.10.0-rc1-00365-g1ea6c22c9b85 (144.86 kB)
job-script (4.61 kB)
dmesg.xz (5.45 kB)
Download all attachments

2020-11-03 18:57:29

by David Rientjes

[permalink] [raw]
Subject: Re: [cma] 1ea6c22c9b: page_allocation_failure:order:#,mode:#(__GFP_RECLAIMABLE),nodemask=(null)

On Tue, 3 Nov 2020, kernel test robot wrote:

> Greeting,
>
> FYI, we noticed the following commit (built with gcc-9):
>
> commit: 1ea6c22c9b85ec176bb78d7076be06a4142f8bdd ("[PATCH 1/2] cma: redirect page allocation to CMA")
> url: https://github.com/0day-ci/linux/commits/Chris-Goldsworthy/Increasing-CMA-Utilization-with-a-GFP-Flag/20201102-224143
> base: https://git.kernel.org/cgit/linux/kernel/git/axboe/linux-block.git for-next
>
> in testcase: boot
>
> on test machine: qemu-system-i386 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>
> +---------------------------------------------------------------------------+------------+------------+
> | | 67b6d2ce11 | 1ea6c22c9b |
> +---------------------------------------------------------------------------+------------+------------+
> | boot_successes | 6 | 0 |
> | boot_failures | 0 | 8 |
> | page_allocation_failure:order:#,mode:#(__GFP_RECLAIMABLE),nodemask=(null) | 0 | 8 |
> | Mem-Info | 0 | 8 |
> | kernel_BUG_at_kernel/workqueue.c | 0 | 8 |
> | invalid_opcode:#[##] | 0 | 8 |
> | EIP:workqueue_init_early | 0 | 8 |
> | Kernel_panic-not_syncing:Fatal_exception | 0 | 8 |
> +---------------------------------------------------------------------------+------------+------------+
>
>
> If you fix the issue, kindly add following tag
> Reported-by: kernel test robot <[email protected]>
>
>
> [ 2.108390] Memory: 8203240K/8388088K available (12819K kernel code, 6610K rwdata, 11292K rodata, 628K init, 12064K bss, 184848K reserved, 0K cma-reserved, 7481224K highmem)
> [ 2.111999] Checking if this processor honours the WP bit even in supervisor mode...Ok.
> [ 2.113918] random: get_random_u32 called from kmem_cache_open+0x1f/0x240 with crng_init=0
> [ 2.114387] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
> [ 2.117656] Kernel/User page tables isolation: enabled
> [ 2.119610] swapper: page allocation failure: order:0, mode:0x10(__GFP_RECLAIMABLE), nodemask=(null)
> [ 2.121604] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-rc1-00365-g1ea6c22c9b85 #1
> [ 2.123401] Call Trace:
> [ 2.123990] dump_stack+0x1b/0x1d
> [ 2.124787] warn_alloc+0x81/0xd9
> [ 2.125599] __alloc_pages_slowpath+0x79c/0x7a9
> [ 2.126690] ? get_page_from_freelist+0xb8/0x20d
> [ 2.127750] __alloc_pages_nodemask+0x107/0x188
> [ 2.128795] __alloc_pages_node+0x17/0x1c
> [ 2.129757] alloc_slab_page+0x26/0x4e
> [ 2.130649] allocate_slab+0x80/0x27e
> [ 2.131521] ___slab_alloc+0x247/0x2ec
> [ 2.132605] ? radix_tree_node_alloc+0x5e/0x8e
> [ 2.133915] ? validate_chain+0x5a8/0x5c3
> [ 2.134838] __slab_alloc+0x34/0x4d
> [ 2.135830] ? __slab_alloc+0x34/0x4d
> [ 2.136909] ? fs_reclaim_release+0x8/0x13
> [ 2.137910] kmem_cache_alloc+0x46/0x157
> [ 2.138854] ? radix_tree_node_alloc+0x5e/0x8e
> [ 2.140103] radix_tree_node_alloc+0x5e/0x8e
> [ 2.141369] idr_get_free+0xc1/0x21a
> [ 2.142230] idr_alloc_u32+0x4d/0x80
> [ 2.143016] idr_alloc+0x30/0x3e
> [ 2.143751] worker_pool_assign_id+0x37/0x47
> [ 2.144682] workqueue_init_early+0x9f/0x1f6
> [ 2.145707] start_kernel+0x206/0x467
> [ 2.146592] ? early_idt_handler_common+0x44/0x44
> [ 2.147671] i386_start_kernel+0x42/0x44
> [ 2.148607] startup_32_smp+0x164/0x168
> [ 2.149567] Mem-Info:
> [ 2.150101] active_anon:0 inactive_anon:0 isolated_anon:0
> [ 2.150101] active_file:0 inactive_file:0 isolated_file:0
> [ 2.150101] unevictable:0 dirty:0 writeback:0
> [ 2.150101] slab_reclaimable:0 slab_unreclaimable:88
> [ 2.150101] mapped:0 shmem:0 pagetables:0 bounce:0
> [ 2.150101] free:2050715 free_pcp:0 free_cma:0
> [ 2.156588] Node 0 active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:0kB dirty:0kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB kernel_stack:0kB all_unreclaimable? no
> [ 2.162313] Normal free:721636kB min:0kB low:0kB high:0kB reserved_highatomic:0KB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:906864kB managed:722016kB mlocked:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
> [ 2.162318] lowmem_reserve[]: 0 0 0
> [ 2.169009] HighMem free:7481224kB min:0kB low:0kB high:0kB reserved_highatomic:0KB active_anon:0kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:7481224kB managed:7481224kB mlocked:0kB pagetables:0kB bounce:0kB free_pcp:0kB local_pcp:0kB free_cma:0kB
> [ 2.169014] lowmem_reserve[]: 0 0 0
> [ 2.175914] Normal: 1*4kB (M) 2*8kB (M) 1*16kB (U) 2*32kB (UM) 2*64kB (M) 2*128kB (UM) 1*256kB (M) 0*512kB 2*1024kB (UM) 1*2048kB (M) 175*4096kB (M) = 721636kB
> [ 2.179229] HighMem: 0*4kB 1*8kB (M) 0*16kB 0*32kB 0*64kB 1*128kB (M) 1*256kB (M) 1*512kB (M) 1*1024kB (M) 2*2048kB (M) 1825*4096kB (M) = 7481224kB
> [ 2.182343] 0 total pagecache pages
> [ 2.183068] 2097022 pages RAM
> [ 2.183799] 1870306 pages HighMem/MovableOnly
> [ 2.184853] 46212 pages reserved
> [ 2.185675] 0 pages cma reserved

Normally what would happen for a __GFP_RECLAIMABLE allocation is that we'd
grab one of the free order-10 MIGRATE_MOVABLE pageblocks and convert it to
MIGRATE_RECLAIMABLE as fallback and then allocate the page from there.
Looks like the commit is breaking the fallback logic since I see an
abundance of free memory yet no MIGRATE_RECLAIMABLE pageblocks (which
would show 'E' in these lines):

Normal: 1*4kB (M) 2*8kB (M) 1*16kB (U) 2*32kB (UM) 2*64kB (M) 2*128kB (UM) 1*256kB (M) 0*512kB 2*1024kB (UM) 1*2048kB (M) 175*4096kB (M) = 721636kB
HighMem: 0*4kB 1*8kB (M) 0*16kB 0*32kB 0*64kB 1*128kB (M) 1*256kB (M) 1*512kB (M) 1*1024kB (M) 2*2048kB (M) 1825*4096kB (M) = 7481224kB

I'm concerned about the change in general, however, because it seems like
__GFP_CMA doesn't absolutely *have* to exist and we get code like this
that will quickly become unmaintainable:

#ifdef CONFIG_CMA
#define ___GFP_CMA 0x800000u
#else
#define ___GFP_CMA 0
#endif
#ifdef CONFIG_LOCKDEP
#ifdef CONFIG_CMA
#define ___GFP_NOLOCKDEP 0x1000000u
#else
#define ___GFP_NOLOCKDEP 0x800000u
#endif

I suspect that Michal Hocko <[email protected]> may also have an opinion on
this patch.