LinuxLists.cc - [PATCH v2 1/4] mm/page_alloc: fix non cma alloc context

2020-07-20 04:57:14

Subject: [PATCH v2 1/4] mm/page_alloc: fix non cma alloc context

From: Joonsoo Kim <[email protected]>

Currently, preventing cma area in page allocation is implemented by using
current_gfp_context(). However, there are two problems of this
implementation.

First, this doesn't work for allocation fastpath. In the fastpath,
original gfp_mask is used since current_gfp_context() is introduced in
order to control reclaim and it is on slowpath.
Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
on the ZONE_MOVABLE for allocation target.

To fix these problems, this patch changes the implementation to exclude
cma area in page allocation. Main point of this change is using the
alloc_flags. alloc_flags is mainly used to control allocation so it fits
for excluding cma area in allocation.

Fixes: d7fefcc8de91 (mm/cma: add PF flag to force non cma alloc)
Cc: <[email protected]>
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/sched/mm.h | 8 +-------
mm/page_alloc.c | 37 ++++++++++++++++++++++++-------------
2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 44ad5b7..6c652ec 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -175,14 +175,12 @@ static inline bool in_vfork(struct task_struct *tsk)
* Applies per-task gfp context to the given allocation flags.
* PF_MEMALLOC_NOIO implies GFP_NOIO
* PF_MEMALLOC_NOFS implies GFP_NOFS
- * PF_MEMALLOC_NOCMA implies no allocation from CMA region.
*/
static inline gfp_t current_gfp_context(gfp_t flags)
{
unsigned int pflags = READ_ONCE(current->flags);

- if (unlikely(pflags &
- (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_NOCMA))) {
+ if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) {
/*
* NOIO implies both NOIO and NOFS and it is a weaker context
* so always make sure it makes precedence
@@ -191,10 +189,6 @@ static inline gfp_t current_gfp_context(gfp_t flags)
flags &= ~(__GFP_IO | __GFP_FS);
else if (pflags & PF_MEMALLOC_NOFS)
flags &= ~__GFP_FS;
-#ifdef CONFIG_CMA
- if (pflags & PF_MEMALLOC_NOCMA)
- flags &= ~__GFP_MOVABLE;
-#endif
}
return flags;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6416d08..b529220 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2791,7 +2791,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
* allocating from CMA when over half of the zone's free memory
* is in the CMA area.
*/
- if (migratetype == MIGRATE_MOVABLE &&
+ if (alloc_flags & ALLOC_CMA &&
zone_page_state(zone, NR_FREE_CMA_PAGES) >
zone_page_state(zone, NR_FREE_PAGES) / 2) {
page = __rmqueue_cma_fallback(zone, order);
@@ -2802,7 +2802,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
retry:
page = __rmqueue_smallest(zone, order, migratetype);
if (unlikely(!page)) {
- if (migratetype == MIGRATE_MOVABLE)
+ if (alloc_flags & ALLOC_CMA)
page = __rmqueue_cma_fallback(zone, order);

if (!page && __rmqueue_fallback(zone, order, migratetype,
@@ -3502,11 +3502,9 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
if (likely(!alloc_harder))
unusable_free += z->nr_reserved_highatomic;

-#ifdef CONFIG_CMA
/* If allocation can't use CMA areas don't use free CMA pages */
- if (!(alloc_flags & ALLOC_CMA))
+ if (IS_ENABLED(CONFIG_CMA) && !(alloc_flags & ALLOC_CMA))
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
-#endif

return unusable_free;
}
@@ -3693,6 +3691,20 @@ alloc_flags_nofragment(struct zone *zone, gfp_t gfp_mask)
return alloc_flags;
}

+static inline unsigned int current_alloc_flags(gfp_t gfp_mask,
+ unsigned int alloc_flags)
+{
+#ifdef CONFIG_CMA
+ unsigned int pflags = current->flags;
+
+ if (!(pflags & PF_MEMALLOC_NOCMA) &&
+ gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ alloc_flags |= ALLOC_CMA;
+
+#endif
+ return alloc_flags;
+}
+
/*
* get_page_from_freelist goes through the zonelist trying to allocate
* a page.
@@ -4339,10 +4351,8 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
} else if (unlikely(rt_task(current)) && !in_interrupt())
alloc_flags |= ALLOC_HARDER;

-#ifdef CONFIG_CMA
- if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
- alloc_flags |= ALLOC_CMA;
-#endif
+ alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
+
return alloc_flags;
}

@@ -4642,8 +4652,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
wake_all_kswapds(order, gfp_mask, ac);

reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
- if (reserve_flags)
+ if (reserve_flags) {
alloc_flags = reserve_flags;
+ alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
+ }

/*
* Reset the nodemask and zonelist iterators if memory policies can be
@@ -4720,7 +4732,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,

/* Avoid allocations with no watermarks from looping endlessly */
if (tsk_is_oom_victim(current) &&
- (alloc_flags == ALLOC_OOM ||
+ (alloc_flags & ALLOC_OOM ||
(gfp_mask & __GFP_NOMEMALLOC)))
goto nopage;

@@ -4808,8 +4820,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
if (should_fail_alloc_page(gfp_mask, order))
return false;

- if (IS_ENABLED(CONFIG_CMA) && ac->migratetype == MIGRATE_MOVABLE)
- *alloc_flags |= ALLOC_CMA;
+ *alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags);

return true;
}
--
2.7.4

2020-07-20 04:57:50

by Joonsoo Kim

[permalink] [raw]

Subject: [PATCH v2 2/4] mm/gup: restrict CMA region by using allocation scope API

From: Joonsoo Kim <[email protected]>

We have well defined scope API to exclude CMA region.
Use it rather than manipulating gfp_mask manually. With this change,
we can now restore __GFP_MOVABLE for gfp_mask like as usual migration
target allocation. It would result in that the ZONE_MOVABLE is also
searched by page allocator. For hugetlb, gfp_mask is redefined since
it has a regular allocation mask filter for migration target.
__GPF_NOWARN is added to hugetlb gfp_mask filter since a new user for
gfp_mask filter, gup, want to be silent when allocation fails.

Note that this can be considered as a fix for the commit 9a4e9f3b2d73
("mm: update get_user_pages_longterm to migrate pages allocated from
CMA region"). However, "Fixes" tag isn't added here since it is just
suboptimal but it doesn't cause any problem.

Suggested-by: Michal Hocko <[email protected]>
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/hugetlb.h | 2 ++
mm/gup.c | 17 ++++++++---------
2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b9508d..2660b04 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -708,6 +708,8 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
/* Some callers might want to enfoce node */
modified_mask |= (gfp_mask & __GFP_THISNODE);

+ modified_mask |= (gfp_mask & __GFP_NOWARN);
+
return modified_mask;
}

diff --git a/mm/gup.c b/mm/gup.c
index 5daadae..bbd36a1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1619,10 +1619,12 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
* Trying to allocate a page for migration. Ignore allocation
* failure warnings. We don't force __GFP_THISNODE here because
* this node here is the node where we have CMA reservation and
- * in some case these nodes will have really less non movable
+ * in some case these nodes will have really less non CMA
* allocation memory.
+ *
+ * Note that CMA region is prohibited by allocation scope.
*/
- gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+ gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN;

if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;
@@ -1630,6 +1632,8 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
+
+ gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
/*
* We don't want to dequeue from the pool because pool pages will
* mostly be from the CMA region.
@@ -1644,11 +1648,6 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
*/
gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;

- /*
- * Remove the movable mask so that we don't allocate from
- * CMA area again.
- */
- thp_gfpmask &= ~__GFP_MOVABLE;
thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
if (!thp)
return NULL;
@@ -1794,7 +1793,6 @@ static long __gup_longterm_locked(struct task_struct *tsk,
vmas_tmp, NULL, gup_flags);

if (gup_flags & FOLL_LONGTERM) {
- memalloc_nocma_restore(flags);
if (rc < 0)
goto out;

@@ -1807,9 +1805,10 @@ static long __gup_longterm_locked(struct task_struct *tsk,

rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
vmas_tmp, gup_flags);
+out:
+ memalloc_nocma_restore(flags);
}

-out:
if (vmas_tmp != vmas)
kfree(vmas_tmp);
return rc;
--
2.7.4

2020-07-20 04:58:17

by Joonsoo Kim

[permalink] [raw]

Subject: [PATCH v2 3/4] mm/hugetlb: make hugetlb migration callback CMA aware

From: Joonsoo Kim <[email protected]>

new_non_cma_page() in gup.c requires to allocate the new page that is not
on the CMA area. new_non_cma_page() implements it by using allocation
scope APIs.

However, there is a work-around for hugetlb. Normal hugetlb page
allocation API for migration is alloc_huge_page_nodemask(). It consists
of two steps. First is dequeing from the pool. Second is, if there is no
available page on the queue, allocating by using the page allocator.

new_non_cma_page() can't use this API since first step (deque) isn't
aware of scope API to exclude CMA area. So, new_non_cma_page() exports
hugetlb internal function for the second step, alloc_migrate_huge_page(),
to global scope and uses it directly. This is suboptimal since hugetlb
pages on the queue cannot be utilized.

This patch tries to fix this situation by making the deque function on
hugetlb CMA aware. In the deque function, CMA memory is skipped if
PF_MEMALLOC_NOCMA flag is found.

Acked-by: Mike Kravetz <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/hugetlb.h | 2 --
mm/gup.c | 6 +-----
mm/hugetlb.c | 11 +++++++++--
3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2660b04..fb2b5aa 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -509,8 +509,6 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);

diff --git a/mm/gup.c b/mm/gup.c
index bbd36a1..4ba822a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1634,11 +1634,7 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
struct hstate *h = page_hstate(page);

gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
- /*
- * We don't want to dequeue from the pool because pool pages will
- * mostly be from the CMA region.
- */
- return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+ return alloc_huge_page_nodemask(h, nid, NULL, gfp_mask);
}
#endif
if (PageTransHuge(page)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3245aa0..d9eb923 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -29,6 +29,7 @@
#include <linux/numa.h>
#include <linux/llist.h>
#include <linux/cma.h>
+#include <linux/sched/mm.h>

#include <asm/page.h>
#include <asm/tlb.h>
@@ -1036,10 +1037,16 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
{
struct page *page;
+ bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA);
+
+ list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
+ if (nocma && is_migrate_cma_page(page))
+ continue;

- list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
if (!PageHWPoison(page))
break;
+ }
+
/*
* if 'non-isolated free hugepage' not found on the list,
* the allocation fails.
@@ -1928,7 +1935,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}

-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nmask)
{
struct page *page;
--
2.7.4

2020-07-20 05:00:44

by Joonsoo Kim

[permalink] [raw]

Subject: [PATCH v2 4/4] mm/gup: use a standard migration target allocation callback

From: Joonsoo Kim <[email protected]>

There is a well-defined migration target allocation callback. Use it.

Acked-by: Vlastimil Babka <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Signed-off-by: Joonsoo Kim <[email protected]>
---
mm/gup.c | 54 ++++++------------------------------------------------
1 file changed, 6 insertions(+), 48 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 4ba822a..628ca4c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1608,52 +1608,6 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
}

#ifdef CONFIG_CMA
-static struct page *new_non_cma_page(struct page *page, unsigned long private)
-{
- /*
- * We want to make sure we allocate the new page from the same node
- * as the source page.
- */
- int nid = page_to_nid(page);
- /*
- * Trying to allocate a page for migration. Ignore allocation
- * failure warnings. We don't force __GFP_THISNODE here because
- * this node here is the node where we have CMA reservation and
- * in some case these nodes will have really less non CMA
- * allocation memory.
- *
- * Note that CMA region is prohibited by allocation scope.
- */
- gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN;
-
- if (PageHighMem(page))
- gfp_mask |= __GFP_HIGHMEM;
-
-#ifdef CONFIG_HUGETLB_PAGE
- if (PageHuge(page)) {
- struct hstate *h = page_hstate(page);
-
- gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
- return alloc_huge_page_nodemask(h, nid, NULL, gfp_mask);
- }
-#endif
- if (PageTransHuge(page)) {
- struct page *thp;
- /*
- * ignore allocation failure warnings
- */
- gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
-
- thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
- if (!thp)
- return NULL;
- prep_transhuge_page(thp);
- return thp;
- }
-
- return __alloc_pages_node(nid, gfp_mask, 0);
-}
-
static long check_and_migrate_cma_pages(struct task_struct *tsk,
struct mm_struct *mm,
unsigned long start,
@@ -1668,6 +1622,10 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
bool migrate_allow = true;
LIST_HEAD(cma_page_list);
long ret = nr_pages;
+ struct migration_target_control mtc = {
+ .nid = NUMA_NO_NODE,
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN,
+ };

check_again:
for (i = 0; i < nr_pages;) {
@@ -1713,8 +1671,8 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
for (i = 0; i < nr_pages; i++)
put_page(pages[i]);

- if (migrate_pages(&cma_page_list, new_non_cma_page,
- NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+ if (migrate_pages(&cma_page_list, alloc_migration_target, NULL,
+ (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
/*
* some of the pages failed migration. Do get_user_pages
* without migration.
--
2.7.4

2020-07-20 23:18:51

by Andrew Morton

[permalink] [raw]

Subject: Re: [PATCH v2 1/4] mm/page_alloc: fix non cma alloc context

On Mon, 20 Jul 2020 13:56:15 +0900 [email protected] wrote:

> Currently, preventing cma area in page allocation is implemented by using
> current_gfp_context(). However, there are two problems of this
> implementation.
>
> First, this doesn't work for allocation fastpath. In the fastpath,
> original gfp_mask is used since current_gfp_context() is introduced in
> order to control reclaim and it is on slowpath.
> Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
> on the ZONE_MOVABLE for allocation target.
>
> To fix these problems, this patch changes the implementation to exclude
> cma area in page allocation. Main point of this change is using the
> alloc_flags. alloc_flags is mainly used to control allocation so it fits
> for excluding cma area in allocation.

What are the end user visible runtime effects of this change?

This is pretty much essential information when proposing a -stable
backport.

2020-07-20 23:26:00

by Andrew Morton

[permalink] [raw]

Subject: Re: [PATCH v2 1/4] mm/page_alloc: fix non cma alloc context

On Mon, 20 Jul 2020 13:56:15 +0900 [email protected] wrote:

> Currently, preventing cma area in page allocation is implemented by using
> current_gfp_context(). However, there are two problems of this
> implementation.
>
> First, this doesn't work for allocation fastpath. In the fastpath,
> original gfp_mask is used since current_gfp_context() is introduced in
> order to control reclaim and it is on slowpath.
> Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
> on the ZONE_MOVABLE for allocation target.
>
> To fix these problems, this patch changes the implementation to exclude
> cma area in page allocation. Main point of this change is using the
> alloc_flags. alloc_flags is mainly used to control allocation so it fits
> for excluding cma area in allocation.
>
> Fixes: d7fefcc8de91 (mm/cma: add PF flag to force non cma alloc)
> Cc: <[email protected]>

This patch is against linux-next (or -mm) and has a lot of issues
applying to mainline. If we indeed wish to backport it to -stable, it
should be against mainline, please.

2020-07-21 03:34:20

by Joonsoo Kim

[permalink] [raw]

Subject: Re: [PATCH v2 1/4] mm/page_alloc: fix non cma alloc context

2020년 7월 21일 (화) 오전 8:23, Andrew Morton <[email protected]>님이 작성:
>
> On Mon, 20 Jul 2020 13:56:15 +0900 [email protected] wrote:
>
> > Currently, preventing cma area in page allocation is implemented by using
> > current_gfp_context(). However, there are two problems of this
> > implementation.
> >
> > First, this doesn't work for allocation fastpath. In the fastpath,
> > original gfp_mask is used since current_gfp_context() is introduced in
> > order to control reclaim and it is on slowpath.
> > Second, clearing __GFP_MOVABLE has a side effect to exclude the memory
> > on the ZONE_MOVABLE for allocation target.
> >
> > To fix these problems, this patch changes the implementation to exclude
> > cma area in page allocation. Main point of this change is using the
> > alloc_flags. alloc_flags is mainly used to control allocation so it fits
> > for excluding cma area in allocation.
> >
> > Fixes: d7fefcc8de91 (mm/cma: add PF flag to force non cma alloc)
> > Cc: <[email protected]>
>
> This patch is against linux-next (or -mm) and has a lot of issues
> applying to mainline. If we indeed wish to backport it to -stable, it
> should be against mainline, please.

I sent a revised patch against the mainline a minute ago. Subject and commit
description is updated.

Thanks.