From: Joonsoo Kim <[email protected]>
This patchset clean-up the migration target allocation functions.
Contributions of this patchset are:
1. unify two hugetlb alloc functions. As a result, one is remained.
2. make one external hugetlb alloc function to internal one.
3. unify three functions for migration target allocation.
The patchset is based on next-20200515.
The patchset is available on:
https://github.com/JoonsooKim/linux/tree/cleanup-migration-target-allocation-v1.00-next-20200515
Thanks.
Joonsoo Kim (11):
mm/page_isolation: prefer the node of the source page
mm/migrate: move migration helper from .h to .c
mm/hugetlb: introduce alloc_control structure to simplify migration
target allocation APIs
mm/hugetlb: unify hugetlb migration callback function
mm/hugetlb: make hugetlb migration target allocation APIs CMA aware
mm/hugetlb: do not modify user provided gfp_mask
mm/migrate: change the interface of the migration target alloc/free
functions
mm/migrate: make standard migration target allocation functions
mm/gup: use standard migration target allocation function
mm/mempolicy: use standard migration target allocation function
mm/page_alloc: use standard migration target allocation function
directly
include/linux/hugetlb.h | 33 ++++++---------
include/linux/migrate.h | 44 +++++---------------
include/linux/page-isolation.h | 4 +-
mm/compaction.c | 15 ++++---
mm/gup.c | 60 +++++-----------------------
mm/hugetlb.c | 91 ++++++++++++++++++++----------------------
mm/internal.h | 13 +++++-
mm/memory-failure.c | 14 ++++---
mm/memory_hotplug.c | 10 +++--
mm/mempolicy.c | 39 ++++++------------
mm/migrate.c | 75 ++++++++++++++++++++++++++--------
mm/page_alloc.c | 9 ++++-
mm/page_isolation.c | 5 ---
13 files changed, 196 insertions(+), 216 deletions(-)
--
2.7.4
From: Joonsoo Kim <[email protected]>
For locality, it's better to migrate the page to the same node
rather than the node of the current caller's cpu.
Signed-off-by: Joonsoo Kim <[email protected]>
---
mm/page_isolation.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 2c11a38..7df89bd 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -300,5 +300,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
struct page *alloc_migrate_target(struct page *page, unsigned long private)
{
- return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
+ int nid = page_to_nid(page);
+
+ return new_page_nodemask(page, nid, &node_states[N_MEMORY]);
}
--
2.7.4
From: Joonsoo Kim <[email protected]>
It's not performance sensitive function. Move it to .c.
This is a preparation step for future change.
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/migrate.h | 33 +++++----------------------------
mm/migrate.c | 29 +++++++++++++++++++++++++++++
2 files changed, 34 insertions(+), 28 deletions(-)
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3e546cb..1d70b4a 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -31,34 +31,6 @@ enum migrate_reason {
/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
extern const char *migrate_reason_names[MR_TYPES];
-static inline struct page *new_page_nodemask(struct page *page,
- int preferred_nid, nodemask_t *nodemask)
-{
- gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
- unsigned int order = 0;
- struct page *new_page = NULL;
-
- if (PageHuge(page))
- return alloc_huge_page_nodemask(page_hstate(compound_head(page)),
- preferred_nid, nodemask);
-
- if (PageTransHuge(page)) {
- gfp_mask |= GFP_TRANSHUGE;
- order = HPAGE_PMD_ORDER;
- }
-
- if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
- gfp_mask |= __GFP_HIGHMEM;
-
- new_page = __alloc_pages_nodemask(gfp_mask, order,
- preferred_nid, nodemask);
-
- if (new_page && PageTransHuge(new_page))
- prep_transhuge_page(new_page);
-
- return new_page;
-}
-
#ifdef CONFIG_MIGRATION
extern void putback_movable_pages(struct list_head *l);
@@ -67,6 +39,8 @@ extern int migrate_page(struct address_space *mapping,
enum migrate_mode mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
unsigned long private, enum migrate_mode mode, int reason);
+extern struct page *new_page_nodemask(struct page *page,
+ int preferred_nid, nodemask_t *nodemask);
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
@@ -85,6 +59,9 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
free_page_t free, unsigned long private, enum migrate_mode mode,
int reason)
{ return -ENOSYS; }
+static inline struct page *new_page_nodemask(struct page *page,
+ int preferred_nid, nodemask_t *nodemask)
+ { return NULL; }
static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
{ return -EBUSY; }
diff --git a/mm/migrate.c b/mm/migrate.c
index 5fed030..a298a8c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1519,6 +1519,35 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
return rc;
}
+struct page *new_page_nodemask(struct page *page,
+ int preferred_nid, nodemask_t *nodemask)
+{
+ gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
+ unsigned int order = 0;
+ struct page *new_page = NULL;
+
+ if (PageHuge(page))
+ return alloc_huge_page_nodemask(
+ page_hstate(compound_head(page)),
+ preferred_nid, nodemask);
+
+ if (PageTransHuge(page)) {
+ gfp_mask |= GFP_TRANSHUGE;
+ order = HPAGE_PMD_ORDER;
+ }
+
+ if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
+ gfp_mask |= __GFP_HIGHMEM;
+
+ new_page = __alloc_pages_nodemask(gfp_mask, order,
+ preferred_nid, nodemask);
+
+ if (new_page && PageTransHuge(new_page))
+ prep_transhuge_page(new_page);
+
+ return new_page;
+}
+
#ifdef CONFIG_NUMA
static int store_status(int __user *status, int start, int value, int nr)
--
2.7.4
From: Joonsoo Kim <[email protected]>
It's not good practice to modify user input. Instead of using it to
build correct gfp_mask for APIs, this patch introduces another gfp_mask
field, __gfp_mask, for internal usage.
Signed-off-by: Joonsoo Kim <[email protected]>
---
mm/hugetlb.c | 15 ++++++++-------
mm/internal.h | 2 ++
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 53edd02..5f43b7e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1069,15 +1069,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h,
struct zoneref *z;
int node = NUMA_NO_NODE;
- zonelist = node_zonelist(ac->nid, ac->gfp_mask);
+ zonelist = node_zonelist(ac->nid, ac->__gfp_mask);
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
for_each_zone_zonelist_nodemask(zone, z, zonelist,
- gfp_zone(ac->gfp_mask), ac->nmask) {
+ gfp_zone(ac->__gfp_mask), ac->nmask) {
struct page *page;
- if (!cpuset_zone_allowed(zone, ac->gfp_mask))
+ if (!cpuset_zone_allowed(zone, ac->__gfp_mask))
continue;
/*
* no need to ask again on the same node. Pool is node rather than
@@ -1952,7 +1952,7 @@ static struct page *alloc_migrate_huge_page(struct hstate *h,
if (hstate_is_gigantic(h))
return NULL;
- page = alloc_fresh_huge_page(h, ac->gfp_mask,
+ page = alloc_fresh_huge_page(h, ac->__gfp_mask,
ac->nid, ac->nmask, NULL);
if (!page)
return NULL;
@@ -1990,9 +1990,10 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
struct page *alloc_huge_page_nodemask(struct hstate *h,
struct alloc_control *ac)
{
- ac->gfp_mask |= htlb_alloc_mask(h);
+ ac->__gfp_mask = htlb_alloc_mask(h);
+ ac->__gfp_mask |= ac->gfp_mask;
if (ac->thisnode && ac->nid != NUMA_NO_NODE)
- ac->gfp_mask |= __GFP_THISNODE;
+ ac->__gfp_mask |= __GFP_THISNODE;
spin_lock(&hugetlb_lock);
if (h->free_huge_pages - h->resv_huge_pages > 0) {
@@ -2011,7 +2012,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h,
* will not come from CMA area
*/
if (ac->skip_cma)
- ac->gfp_mask &= ~__GFP_MOVABLE;
+ ac->__gfp_mask &= ~__GFP_MOVABLE;
return alloc_migrate_huge_page(h, ac);
}
diff --git a/mm/internal.h b/mm/internal.h
index 6b6507e..3239d71 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -620,6 +620,8 @@ struct alloc_control {
gfp_t gfp_mask;
bool thisnode;
bool skip_cma;
+
+ gfp_t __gfp_mask; /* Used internally in API implementation */
};
#endif /* __MM_INTERNAL_H */
--
2.7.4
From: Joonsoo Kim <[email protected]>
There is a user who do not want to use CMA memory for migration. Until
now, it is implemented by caller side but it's not optimal since there
is limited information on caller. This patch implements it on callee side
to get better result.
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/hugetlb.h | 2 --
mm/gup.c | 9 +++------
mm/hugetlb.c | 21 +++++++++++++++++----
mm/internal.h | 1 +
4 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 4892ed3..6485e92 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -503,8 +503,6 @@ struct huge_bootmem_page {
struct hstate *hstate;
};
-struct page *alloc_migrate_huge_page(struct hstate *h,
- struct alloc_control *ac);
struct page *alloc_huge_page_nodemask(struct hstate *h,
struct alloc_control *ac);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
diff --git a/mm/gup.c b/mm/gup.c
index 9890fb0..1c86db5 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1618,14 +1618,11 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
struct alloc_control ac = {
.nid = nid,
.nmask = NULL,
- .gfp_mask = gfp_mask,
+ .gfp_mask = __GFP_NOWARN,
+ .skip_cma = true,
};
- /*
- * We don't want to dequeue from the pool because pool pages will
- * mostly be from the CMA region.
- */
- return alloc_migrate_huge_page(h, &ac);
+ return alloc_huge_page_nodemask(h, &ac);
}
if (PageTransHuge(page)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 60b0983..53edd02 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1034,13 +1034,19 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
h->free_huge_pages_node[nid]++;
}
-static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
+static struct page *dequeue_huge_page_node_exact(struct hstate *h,
+ int nid, bool skip_cma)
{
struct page *page;
- list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
+ list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
+ if (skip_cma && is_migrate_cma_page(page))
+ continue;
+
if (!PageHWPoison(page))
break;
+ }
+
/*
* if 'non-isolated free hugepage' not found on the list,
* the allocation fails.
@@ -1081,7 +1087,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h,
continue;
node = zone_to_nid(zone);
- page = dequeue_huge_page_node_exact(h, node);
+ page = dequeue_huge_page_node_exact(h, node, ac->skip_cma);
if (page)
return page;
}
@@ -1938,7 +1944,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}
-struct page *alloc_migrate_huge_page(struct hstate *h,
+static struct page *alloc_migrate_huge_page(struct hstate *h,
struct alloc_control *ac)
{
struct page *page;
@@ -2000,6 +2006,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h,
}
spin_unlock(&hugetlb_lock);
+ /*
+ * clearing __GFP_MOVABLE flag ensure that allocated page
+ * will not come from CMA area
+ */
+ if (ac->skip_cma)
+ ac->gfp_mask &= ~__GFP_MOVABLE;
+
return alloc_migrate_huge_page(h, ac);
}
diff --git a/mm/internal.h b/mm/internal.h
index 574722d0..6b6507e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -619,6 +619,7 @@ struct alloc_control {
nodemask_t *nmask;
gfp_t gfp_mask;
bool thisnode;
+ bool skip_cma;
};
#endif /* __MM_INTERNAL_H */
--
2.7.4
From: Joonsoo Kim <[email protected]>
Currently, page allocation functions for migration requires some arguments.
More worse, in the following patch, more argument will be needed to unify
the similar functions. To simplify them, in this patch, unified data
structure that controls allocation behaviour is introduced.
For clean-up, function declarations are re-ordered.
Note that, gfp_mask handling on alloc_huge_page_(node|nodemask) is
slightly changed, from ASSIGN to OR. It's safe since caller of these
functions doesn't pass extra gfp_mask except htlb_alloc_mask().
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/hugetlb.h | 35 +++++++++++++++-------------
mm/gup.c | 11 ++++++---
mm/hugetlb.c | 62 ++++++++++++++++++++++++-------------------------
mm/internal.h | 7 ++++++
mm/mempolicy.c | 13 +++++++----
mm/migrate.c | 13 +++++++----
6 files changed, 83 insertions(+), 58 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 0cced41..6da217e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,7 @@
struct ctl_table;
struct user_struct;
struct mmu_gather;
+struct alloc_control;
#ifndef is_hugepd
typedef struct { unsigned long pd; } hugepd_t;
@@ -502,15 +503,16 @@ struct huge_bootmem_page {
struct hstate *hstate;
};
-struct page *alloc_huge_page(struct vm_area_struct *vma,
- unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_node(struct hstate *h, int nid);
-struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask);
+struct page *alloc_migrate_huge_page(struct hstate *h,
+ struct alloc_control *ac);
+struct page *alloc_huge_page_node(struct hstate *h,
+ struct alloc_control *ac);
+struct page *alloc_huge_page_nodemask(struct hstate *h,
+ struct alloc_control *ac);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask);
+struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr, int avoid_reserve);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
@@ -752,20 +754,14 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
-static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
- unsigned long addr,
- int avoid_reserve)
-{
- return NULL;
-}
-
-static inline struct page *alloc_huge_page_node(struct hstate *h, int nid)
+static inline struct page *
+alloc_huge_page_node(struct hstate *h, struct alloc_control *ac)
{
return NULL;
}
static inline struct page *
-alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask)
+alloc_huge_page_nodemask(struct hstate *h, struct alloc_control *ac)
{
return NULL;
}
@@ -777,6 +773,13 @@ static inline struct page *alloc_huge_page_vma(struct hstate *h,
return NULL;
}
+static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr,
+ int avoid_reserve)
+{
+ return NULL;
+}
+
static inline int __alloc_bootmem_huge_page(struct hstate *h)
{
return 0;
diff --git a/mm/gup.c b/mm/gup.c
index 0d64ea8..9890fb0 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1613,16 +1613,21 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;
-#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
+ struct alloc_control ac = {
+ .nid = nid,
+ .nmask = NULL,
+ .gfp_mask = gfp_mask,
+ };
+
/*
* We don't want to dequeue from the pool because pool pages will
* mostly be from the CMA region.
*/
- return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+ return alloc_migrate_huge_page(h, &ac);
}
-#endif
+
if (PageTransHuge(page)) {
struct page *thp;
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dcb34d7..859dba4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1054,8 +1054,8 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
return page;
}
-static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
- nodemask_t *nmask)
+static struct page *dequeue_huge_page_nodemask(struct hstate *h,
+ struct alloc_control *ac)
{
unsigned int cpuset_mems_cookie;
struct zonelist *zonelist;
@@ -1063,14 +1063,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
struct zoneref *z;
int node = NUMA_NO_NODE;
- zonelist = node_zonelist(nid, gfp_mask);
+ zonelist = node_zonelist(ac->nid, ac->gfp_mask);
retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
- for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ gfp_zone(ac->gfp_mask), ac->nmask) {
struct page *page;
- if (!cpuset_zone_allowed(zone, gfp_mask))
+ if (!cpuset_zone_allowed(zone, ac->gfp_mask))
continue;
/*
* no need to ask again on the same node. Pool is node rather than
@@ -1106,9 +1107,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
{
struct page *page;
struct mempolicy *mpol;
- gfp_t gfp_mask;
- nodemask_t *nodemask;
- int nid;
+ struct alloc_control ac = {0};
/*
* A child process with MAP_PRIVATE mappings created by their parent
@@ -1123,9 +1122,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
goto err;
- gfp_mask = htlb_alloc_mask(h);
- nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+ ac.gfp_mask = htlb_alloc_mask(h);
+ ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
+
+ page = dequeue_huge_page_nodemask(h, &ac);
if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
SetPagePrivate(page);
h->resv_huge_pages--;
@@ -1938,15 +1938,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}
-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask)
+struct page *alloc_migrate_huge_page(struct hstate *h,
+ struct alloc_control *ac)
{
struct page *page;
if (hstate_is_gigantic(h))
return NULL;
- page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
+ page = alloc_fresh_huge_page(h, ac->gfp_mask,
+ ac->nid, ac->nmask, NULL);
if (!page)
return NULL;
@@ -1980,36 +1981,37 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
}
/* page migration callback function */
-struct page *alloc_huge_page_node(struct hstate *h, int nid)
+struct page *alloc_huge_page_node(struct hstate *h,
+ struct alloc_control *ac)
{
- gfp_t gfp_mask = htlb_alloc_mask(h);
struct page *page = NULL;
- if (nid != NUMA_NO_NODE)
- gfp_mask |= __GFP_THISNODE;
+ ac->gfp_mask |= htlb_alloc_mask(h);
+ if (ac->nid != NUMA_NO_NODE)
+ ac->gfp_mask |= __GFP_THISNODE;
spin_lock(&hugetlb_lock);
if (h->free_huge_pages - h->resv_huge_pages > 0)
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
+ page = dequeue_huge_page_nodemask(h, ac);
spin_unlock(&hugetlb_lock);
if (!page)
- page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+ page = alloc_migrate_huge_page(h, ac);
return page;
}
/* page migration callback function */
-struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask)
+struct page *alloc_huge_page_nodemask(struct hstate *h,
+ struct alloc_control *ac)
{
- gfp_t gfp_mask = htlb_alloc_mask(h);
+ ac->gfp_mask |= htlb_alloc_mask(h);
spin_lock(&hugetlb_lock);
if (h->free_huge_pages - h->resv_huge_pages > 0) {
struct page *page;
- page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
+ page = dequeue_huge_page_nodemask(h, ac);
if (page) {
spin_unlock(&hugetlb_lock);
return page;
@@ -2017,22 +2019,20 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
}
spin_unlock(&hugetlb_lock);
- return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
+ return alloc_migrate_huge_page(h, ac);
}
/* mempolicy aware migration callback */
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address)
{
+ struct alloc_control ac = {0};
struct mempolicy *mpol;
- nodemask_t *nodemask;
struct page *page;
- gfp_t gfp_mask;
- int node;
- gfp_mask = htlb_alloc_mask(h);
- node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- page = alloc_huge_page_nodemask(h, node, nodemask);
+ ac.gfp_mask = htlb_alloc_mask(h);
+ ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
+ page = alloc_huge_page_nodemask(h, &ac);
mpol_cond_put(mpol);
return page;
diff --git a/mm/internal.h b/mm/internal.h
index 791e4b5a..75b3f8e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -613,4 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page)
void setup_zone_pageset(struct zone *zone);
extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
+
+struct alloc_control {
+ int nid;
+ nodemask_t *nmask;
+ gfp_t gfp_mask;
+};
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1965e26..06f60a5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1068,10 +1068,15 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
/* page allocation callback for NUMA node migration */
struct page *alloc_new_node_page(struct page *page, unsigned long node)
{
- if (PageHuge(page))
- return alloc_huge_page_node(page_hstate(compound_head(page)),
- node);
- else if (PageTransHuge(page)) {
+ if (PageHuge(page)) {
+ struct hstate *h = page_hstate(page);
+ struct alloc_control ac = {
+ .nid = node,
+ .nmask = NULL,
+ };
+
+ return alloc_huge_page_node(h, &ac);
+ } else if (PageTransHuge(page)) {
struct page *thp;
thp = alloc_pages_node(node,
diff --git a/mm/migrate.c b/mm/migrate.c
index a298a8c..94d2386 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1526,10 +1526,15 @@ struct page *new_page_nodemask(struct page *page,
unsigned int order = 0;
struct page *new_page = NULL;
- if (PageHuge(page))
- return alloc_huge_page_nodemask(
- page_hstate(compound_head(page)),
- preferred_nid, nodemask);
+ if (PageHuge(page)) {
+ struct hstate *h = page_hstate(page);
+ struct alloc_control ac = {
+ .nid = preferred_nid,
+ .nmask = nodemask,
+ };
+
+ return alloc_huge_page_nodemask(h, &ac);
+ }
if (PageTransHuge(page)) {
gfp_mask |= GFP_TRANSHUGE;
--
2.7.4
From: Joonsoo Kim <[email protected]>
To prepare unifying duplicated functions in following patches, this patch
changes the interface of the migration target alloc/free functions.
Functions now use struct alloc_control as an argument.
There is no functional change.
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/migrate.h | 15 +++++++------
include/linux/page-isolation.h | 4 +++-
mm/compaction.c | 15 ++++++++-----
mm/gup.c | 5 +++--
mm/internal.h | 5 ++++-
mm/memory-failure.c | 13 ++++++-----
mm/memory_hotplug.c | 9 +++++---
mm/mempolicy.c | 22 +++++++++++-------
mm/migrate.c | 51 ++++++++++++++++++++++--------------------
mm/page_alloc.c | 2 +-
mm/page_isolation.c | 9 +++++---
11 files changed, 89 insertions(+), 61 deletions(-)
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 1d70b4a..923c4f3 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -7,8 +7,9 @@
#include <linux/migrate_mode.h>
#include <linux/hugetlb.h>
-typedef struct page *new_page_t(struct page *page, unsigned long private);
-typedef void free_page_t(struct page *page, unsigned long private);
+struct alloc_control;
+typedef struct page *new_page_t(struct page *page, struct alloc_control *ac);
+typedef void free_page_t(struct page *page, struct alloc_control *ac);
/*
* Return values from addresss_space_operations.migratepage():
@@ -38,9 +39,9 @@ extern int migrate_page(struct address_space *mapping,
struct page *newpage, struct page *page,
enum migrate_mode mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
- unsigned long private, enum migrate_mode mode, int reason);
+ struct alloc_control *ac, enum migrate_mode mode, int reason);
extern struct page *new_page_nodemask(struct page *page,
- int preferred_nid, nodemask_t *nodemask);
+ struct alloc_control *ac);
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
@@ -56,11 +57,11 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
static inline void putback_movable_pages(struct list_head *l) {}
static inline int migrate_pages(struct list_head *l, new_page_t new,
- free_page_t free, unsigned long private, enum migrate_mode mode,
- int reason)
+ free_page_t free, struct alloc_control *ac,
+ enum migrate_mode mode, int reason)
{ return -ENOSYS; }
static inline struct page *new_page_nodemask(struct page *page,
- int preferred_nid, nodemask_t *nodemask)
+ struct alloc_control *ac)
{ return NULL; }
static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
{ return -EBUSY; }
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 5724580..35e3bdb 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,6 +2,8 @@
#ifndef __LINUX_PAGEISOLATION_H
#define __LINUX_PAGEISOLATION_H
+struct alloc_control;
+
#ifdef CONFIG_MEMORY_ISOLATION
static inline bool has_isolate_pageblock(struct zone *zone)
{
@@ -60,6 +62,6 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
int isol_flags);
-struct page *alloc_migrate_target(struct page *page, unsigned long private);
+struct page *alloc_migrate_target(struct page *page, struct alloc_control *ac);
#endif
diff --git a/mm/compaction.c b/mm/compaction.c
index 67fd317..aec1c1f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1561,9 +1561,9 @@ static void isolate_freepages(struct compact_control *cc)
* from the isolated freelists in the block we are migrating to.
*/
static struct page *compaction_alloc(struct page *migratepage,
- unsigned long data)
+ struct alloc_control *ac)
{
- struct compact_control *cc = (struct compact_control *)data;
+ struct compact_control *cc = (struct compact_control *)ac->private;
struct page *freepage;
if (list_empty(&cc->freepages)) {
@@ -1585,9 +1585,9 @@ static struct page *compaction_alloc(struct page *migratepage,
* freelist. All pages on the freelist are from the same zone, so there is no
* special handling needed for NUMA.
*/
-static void compaction_free(struct page *page, unsigned long data)
+static void compaction_free(struct page *page, struct alloc_control *ac)
{
- struct compact_control *cc = (struct compact_control *)data;
+ struct compact_control *cc = (struct compact_control *)ac->private;
list_add(&page->lru, &cc->freepages);
cc->nr_freepages++;
@@ -2095,6 +2095,9 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
unsigned long last_migrated_pfn;
const bool sync = cc->mode != MIGRATE_ASYNC;
bool update_cached;
+ struct alloc_control alloc_control = {
+ .private = (unsigned long)cc,
+ };
/*
* These counters track activities during zone compaction. Initialize
@@ -2212,8 +2215,8 @@ compact_zone(struct compact_control *cc, struct capture_control *capc)
}
err = migrate_pages(&cc->migratepages, compaction_alloc,
- compaction_free, (unsigned long)cc, cc->mode,
- MR_COMPACTION);
+ compaction_free, &alloc_control,
+ cc->mode, MR_COMPACTION);
trace_mm_compaction_migratepages(cc->nr_migratepages, err,
&cc->migratepages);
diff --git a/mm/gup.c b/mm/gup.c
index 1c86db5..be9cb79 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1594,7 +1594,8 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
}
#ifdef CONFIG_CMA
-static struct page *new_non_cma_page(struct page *page, unsigned long private)
+static struct page *new_non_cma_page(struct page *page,
+ struct alloc_control *ac)
{
/*
* We want to make sure we allocate the new page from the same node
@@ -1707,7 +1708,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
put_page(pages[i]);
if (migrate_pages(&cma_page_list, new_non_cma_page,
- NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+ NULL, NULL, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
/*
* some of the pages failed migration. Do get_user_pages
* without migration.
diff --git a/mm/internal.h b/mm/internal.h
index 3239d71..abe94a7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -612,7 +612,9 @@ static inline bool is_migrate_highatomic_page(struct page *page)
}
void setup_zone_pageset(struct zone *zone);
-extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
+struct alloc_control;
+extern struct page *alloc_new_node_page(struct page *page,
+ struct alloc_control *ac);
struct alloc_control {
int nid;
@@ -620,6 +622,7 @@ struct alloc_control {
gfp_t gfp_mask;
bool thisnode;
bool skip_cma;
+ unsigned long private;
gfp_t __gfp_mask; /* Used internally in API implementation */
};
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a96364b..3f92e70 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1621,11 +1621,14 @@ int unpoison_memory(unsigned long pfn)
}
EXPORT_SYMBOL(unpoison_memory);
-static struct page *new_page(struct page *p, unsigned long private)
+static struct page *new_page(struct page *p, struct alloc_control *__ac)
{
- int nid = page_to_nid(p);
+ struct alloc_control ac = {
+ .nid = page_to_nid(p),
+ .nmask = &node_states[N_MEMORY],
+ };
- return new_page_nodemask(p, nid, &node_states[N_MEMORY]);
+ return new_page_nodemask(p, &ac);
}
/*
@@ -1722,7 +1725,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
return -EBUSY;
}
- ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
+ ret = migrate_pages(&pagelist, new_page, NULL, NULL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
pr_info("soft offline: %#lx: hugepage migration failed %d, type %lx (%pGp)\n",
@@ -1812,7 +1815,7 @@ static int __soft_offline_page(struct page *page, int flags)
inc_node_page_state(page, NR_ISOLATED_ANON +
page_is_file_lru(page));
list_add(&page->lru, &pagelist);
- ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
+ ret = migrate_pages(&pagelist, new_page, NULL, NULL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
if (!list_empty(&pagelist))
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c4d5c45..89642f9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1232,10 +1232,11 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
return 0;
}
-static struct page *new_node_page(struct page *page, unsigned long private)
+static struct page *new_node_page(struct page *page, struct alloc_control *__ac)
{
int nid = page_to_nid(page);
nodemask_t nmask = node_states[N_MEMORY];
+ struct alloc_control ac = {0};
/*
* try to allocate from a different node but reuse this node if there
@@ -1246,7 +1247,9 @@ static struct page *new_node_page(struct page *page, unsigned long private)
if (nodes_empty(nmask))
node_set(nid, nmask);
- return new_page_nodemask(page, nid, &nmask);
+ ac.nid = nid;
+ ac.nmask = &nmask;
+ return new_page_nodemask(page, &ac);
}
static int
@@ -1310,7 +1313,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
}
if (!list_empty(&source)) {
/* Allocate a new page from the nearest neighbor node */
- ret = migrate_pages(&source, new_node_page, NULL, 0,
+ ret = migrate_pages(&source, new_node_page, NULL, NULL,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
if (ret) {
list_for_each_entry(page, &source, lru) {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 629feaa..7241621 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1066,12 +1066,12 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
}
/* page allocation callback for NUMA node migration */
-struct page *alloc_new_node_page(struct page *page, unsigned long node)
+struct page *alloc_new_node_page(struct page *page, struct alloc_control *__ac)
{
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
struct alloc_control ac = {
- .nid = node,
+ .nid = __ac->nid,
.nmask = NULL,
.thisnode = true,
};
@@ -1080,7 +1080,7 @@ struct page *alloc_new_node_page(struct page *page, unsigned long node)
} else if (PageTransHuge(page)) {
struct page *thp;
- thp = alloc_pages_node(node,
+ thp = alloc_pages_node(__ac->nid,
(GFP_TRANSHUGE | __GFP_THISNODE),
HPAGE_PMD_ORDER);
if (!thp)
@@ -1088,7 +1088,7 @@ struct page *alloc_new_node_page(struct page *page, unsigned long node)
prep_transhuge_page(thp);
return thp;
} else
- return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE |
+ return __alloc_pages_node(__ac->nid, GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE, 0);
}
@@ -1102,6 +1102,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
nodemask_t nmask;
LIST_HEAD(pagelist);
int err = 0;
+ struct alloc_control ac = {
+ .nid = dest,
+ };
nodes_clear(nmask);
node_set(source, nmask);
@@ -1116,7 +1119,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, alloc_new_node_page, NULL, dest,
+ err = migrate_pages(&pagelist, alloc_new_node_page, NULL, &ac,
MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(&pagelist);
@@ -1237,10 +1240,11 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
* list of pages handed to migrate_pages()--which is how we get here--
* is in virtual address order.
*/
-static struct page *new_page(struct page *page, unsigned long start)
+static struct page *new_page(struct page *page, struct alloc_control *ac)
{
struct vm_area_struct *vma;
unsigned long uninitialized_var(address);
+ unsigned long start = ac->private;
vma = find_vma(current->mm, start);
while (vma) {
@@ -1283,7 +1287,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
return -ENOSYS;
}
-static struct page *new_page(struct page *page, unsigned long start)
+static struct page *new_page(struct page *page, struct alloc_control *ac)
{
return NULL;
}
@@ -1299,6 +1303,7 @@ static long do_mbind(unsigned long start, unsigned long len,
int err;
int ret;
LIST_HEAD(pagelist);
+ struct alloc_control ac = {0};
if (flags & ~(unsigned long)MPOL_MF_VALID)
return -EINVAL;
@@ -1374,8 +1379,9 @@ static long do_mbind(unsigned long start, unsigned long len,
if (!list_empty(&pagelist)) {
WARN_ON_ONCE(flags & MPOL_MF_LAZY);
+ ac.private = start;
nr_failed = migrate_pages(&pagelist, new_page, NULL,
- start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
+ &ac, MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
if (nr_failed)
putback_movable_pages(&pagelist);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 94d2386..ba31153 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1183,7 +1183,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
*/
static ICE_noinline int unmap_and_move(new_page_t get_new_page,
free_page_t put_new_page,
- unsigned long private, struct page *page,
+ struct alloc_control *ac, struct page *page,
int force, enum migrate_mode mode,
enum migrate_reason reason)
{
@@ -1206,7 +1206,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
goto out;
}
- newpage = get_new_page(page, private);
+ newpage = get_new_page(page, ac);
if (!newpage)
return -ENOMEM;
@@ -1266,7 +1266,7 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
}
put_new:
if (put_new_page)
- put_new_page(newpage, private);
+ put_new_page(newpage, ac);
else
put_page(newpage);
}
@@ -1293,9 +1293,9 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
* will wait in the page fault for migration to complete.
*/
static int unmap_and_move_huge_page(new_page_t get_new_page,
- free_page_t put_new_page, unsigned long private,
- struct page *hpage, int force,
- enum migrate_mode mode, int reason)
+ free_page_t put_new_page, struct alloc_control *ac,
+ struct page *hpage, int force,
+ enum migrate_mode mode, int reason)
{
int rc = -EAGAIN;
int page_was_mapped = 0;
@@ -1315,7 +1315,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return -ENOSYS;
}
- new_hpage = get_new_page(hpage, private);
+ new_hpage = get_new_page(hpage, ac);
if (!new_hpage)
return -ENOMEM;
@@ -1402,7 +1402,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
* isolation.
*/
if (put_new_page)
- put_new_page(new_hpage, private);
+ put_new_page(new_hpage, ac);
else
putback_active_hugepage(new_hpage);
@@ -1431,7 +1431,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
* Returns the number of pages that were not migrated, or an error code.
*/
int migrate_pages(struct list_head *from, new_page_t get_new_page,
- free_page_t put_new_page, unsigned long private,
+ free_page_t put_new_page, struct alloc_control *ac,
enum migrate_mode mode, int reason)
{
int retry = 1;
@@ -1455,11 +1455,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
if (PageHuge(page))
rc = unmap_and_move_huge_page(get_new_page,
- put_new_page, private, page,
+ put_new_page, ac, page,
pass > 2, mode, reason);
else
rc = unmap_and_move(get_new_page, put_new_page,
- private, page, pass > 2, mode,
+ ac, page, pass > 2, mode,
reason);
switch(rc) {
@@ -1519,8 +1519,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
return rc;
}
-struct page *new_page_nodemask(struct page *page,
- int preferred_nid, nodemask_t *nodemask)
+struct page *new_page_nodemask(struct page *page, struct alloc_control *ac)
{
gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
unsigned int order = 0;
@@ -1528,12 +1527,12 @@ struct page *new_page_nodemask(struct page *page,
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
- struct alloc_control ac = {
- .nid = preferred_nid,
- .nmask = nodemask,
+ struct alloc_control __ac = {
+ .nid = ac->nid,
+ .nmask = ac->nmask,
};
- return alloc_huge_page_nodemask(h, &ac);
+ return alloc_huge_page_nodemask(h, &__ac);
}
if (PageTransHuge(page)) {
@@ -1544,8 +1543,7 @@ struct page *new_page_nodemask(struct page *page,
if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
gfp_mask |= __GFP_HIGHMEM;
- new_page = __alloc_pages_nodemask(gfp_mask, order,
- preferred_nid, nodemask);
+ new_page = __alloc_pages_nodemask(gfp_mask, order, ac->nid, ac->nmask);
if (new_page && PageTransHuge(new_page))
prep_transhuge_page(new_page);
@@ -1570,8 +1568,11 @@ static int do_move_pages_to_node(struct mm_struct *mm,
struct list_head *pagelist, int node)
{
int err;
+ struct alloc_control ac = {
+ .nid = node,
+ };
- err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
+ err = migrate_pages(pagelist, alloc_new_node_page, NULL, &ac,
MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(pagelist);
@@ -1961,12 +1962,11 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
}
static struct page *alloc_misplaced_dst_page(struct page *page,
- unsigned long data)
+ struct alloc_control *ac)
{
- int nid = (int) data;
struct page *newpage;
- newpage = __alloc_pages_node(nid,
+ newpage = __alloc_pages_node(ac->nid,
(GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE | __GFP_NOMEMALLOC |
__GFP_NORETRY | __GFP_NOWARN) &
@@ -2031,6 +2031,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
int isolated;
int nr_remaining;
LIST_HEAD(migratepages);
+ struct alloc_control ac = {
+ .nid = node,
+ };
/*
* Don't migrate file pages that are mapped in multiple processes
@@ -2053,7 +2056,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
list_add(&page->lru, &migratepages);
nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
- NULL, node, MIGRATE_ASYNC,
+ NULL, &ac, MIGRATE_ASYNC,
MR_NUMA_MISPLACED);
if (nr_remaining) {
if (!list_empty(&migratepages)) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cef05d3..afdd0fb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8315,7 +8315,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
cc->nr_migratepages -= nr_reclaimed;
ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
- NULL, 0, cc->mode, MR_CONTIG_RANGE);
+ NULL, NULL, cc->mode, MR_CONTIG_RANGE);
}
if (ret < 0) {
putback_movable_pages(&cc->migratepages);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 7df89bd..1e1828b 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -298,9 +298,12 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
return pfn < end_pfn ? -EBUSY : 0;
}
-struct page *alloc_migrate_target(struct page *page, unsigned long private)
+struct page *alloc_migrate_target(struct page *page, struct alloc_control *__ac)
{
- int nid = page_to_nid(page);
+ struct alloc_control ac = {
+ .nid = page_to_nid(page),
+ .nmask = &node_states[N_MEMORY],
+ };
- return new_page_nodemask(page, nid, &node_states[N_MEMORY]);
+ return new_page_nodemask(page, &ac);
}
--
2.7.4
From: Joonsoo Kim <[email protected]>
There is no need to make a function in order to call standard migration
target allocation function. Use standard one directly.
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/page-isolation.h | 2 --
mm/page_alloc.c | 9 +++++++--
mm/page_isolation.c | 11 -----------
3 files changed, 7 insertions(+), 15 deletions(-)
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 35e3bdb..20a4b63 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -62,6 +62,4 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
int isol_flags);
-struct page *alloc_migrate_target(struct page *page, struct alloc_control *ac);
-
#endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index afdd0fb..2a7ab2b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8288,6 +8288,11 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
unsigned long pfn = start;
unsigned int tries = 0;
int ret = 0;
+ struct alloc_control ac = {
+ .nid = zone_to_nid(cc->zone),
+ .nmask = &node_states[N_MEMORY],
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
+ };
migrate_prep();
@@ -8314,8 +8319,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
&cc->migratepages);
cc->nr_migratepages -= nr_reclaimed;
- ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
- NULL, NULL, cc->mode, MR_CONTIG_RANGE);
+ ret = migrate_pages(&cc->migratepages, alloc_migration_target,
+ NULL, &ac, cc->mode, MR_CONTIG_RANGE);
}
if (ret < 0) {
putback_movable_pages(&cc->migratepages);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index aba799d..03d6cad 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -297,14 +297,3 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
return pfn < end_pfn ? -EBUSY : 0;
}
-
-struct page *alloc_migrate_target(struct page *page, struct alloc_control *__ac)
-{
- struct alloc_control ac = {
- .nid = page_to_nid(page),
- .nmask = &node_states[N_MEMORY],
- .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
- };
-
- return alloc_migration_target(page, &ac);
-}
--
2.7.4
From: Joonsoo Kim <[email protected]>
There is no reason to implement it's own function for migration
target allocation. Use standard one.
Signed-off-by: Joonsoo Kim <[email protected]>
---
mm/internal.h | 3 ---
mm/mempolicy.c | 33 ++++-----------------------------
mm/migrate.c | 4 +++-
3 files changed, 7 insertions(+), 33 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index abe94a7..5ade079 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -612,9 +612,6 @@ static inline bool is_migrate_highatomic_page(struct page *page)
}
void setup_zone_pageset(struct zone *zone);
-struct alloc_control;
-extern struct page *alloc_new_node_page(struct page *page,
- struct alloc_control *ac);
struct alloc_control {
int nid;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7241621..8d3ccab 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1065,33 +1065,6 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
return 0;
}
-/* page allocation callback for NUMA node migration */
-struct page *alloc_new_node_page(struct page *page, struct alloc_control *__ac)
-{
- if (PageHuge(page)) {
- struct hstate *h = page_hstate(page);
- struct alloc_control ac = {
- .nid = __ac->nid,
- .nmask = NULL,
- .thisnode = true,
- };
-
- return alloc_huge_page_nodemask(h, &ac);
- } else if (PageTransHuge(page)) {
- struct page *thp;
-
- thp = alloc_pages_node(__ac->nid,
- (GFP_TRANSHUGE | __GFP_THISNODE),
- HPAGE_PMD_ORDER);
- if (!thp)
- return NULL;
- prep_transhuge_page(thp);
- return thp;
- } else
- return __alloc_pages_node(__ac->nid, GFP_HIGHUSER_MOVABLE |
- __GFP_THISNODE, 0);
-}
-
/*
* Migrate pages from one node to a target node.
* Returns error or the number of pages not migrated.
@@ -1104,6 +1077,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
int err = 0;
struct alloc_control ac = {
.nid = dest,
+ .gfp_mask = GFP_HIGHUSER_MOVABLE,
+ .thisnode = true,
};
nodes_clear(nmask);
@@ -1119,8 +1094,8 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
flags | MPOL_MF_DISCONTIG_OK, &pagelist);
if (!list_empty(&pagelist)) {
- err = migrate_pages(&pagelist, alloc_new_node_page, NULL, &ac,
- MIGRATE_SYNC, MR_SYSCALL);
+ err = migrate_pages(&pagelist, alloc_migration_target, NULL,
+ &ac, MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(&pagelist);
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 029af0b..3dfb108 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1574,9 +1574,11 @@ static int do_move_pages_to_node(struct mm_struct *mm,
int err;
struct alloc_control ac = {
.nid = node,
+ .gfp_mask = GFP_HIGHUSER_MOVABLE,
+ .thisnode = true,
};
- err = migrate_pages(pagelist, alloc_new_node_page, NULL, &ac,
+ err = migrate_pages(pagelist, alloc_migration_target, NULL, &ac,
MIGRATE_SYNC, MR_SYSCALL);
if (err)
putback_movable_pages(pagelist);
--
2.7.4
From: Joonsoo Kim <[email protected]>
There are some similar functions for migration target allocation. Since
there is no fundamental difference, it's better to keep just one rather
than keeping all variants. This patch implements base migration target
allocation function. In the following patches, variants will be converted
to use this function.
Note that PageHighmem() call in previous function is changed to open-code
"is_highmem_idx()" since it provides more readability.
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/migrate.h | 6 +++---
mm/memory-failure.c | 3 ++-
mm/memory_hotplug.c | 3 ++-
mm/migrate.c | 26 +++++++++++++++-----------
mm/page_isolation.c | 3 ++-
5 files changed, 24 insertions(+), 17 deletions(-)
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 923c4f3..abf09b3 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -40,8 +40,8 @@ extern int migrate_page(struct address_space *mapping,
enum migrate_mode mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
struct alloc_control *ac, enum migrate_mode mode, int reason);
-extern struct page *new_page_nodemask(struct page *page,
- struct alloc_control *ac);
+extern struct page *alloc_migration_target(struct page *page,
+ struct alloc_control *ac);
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
@@ -60,7 +60,7 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
free_page_t free, struct alloc_control *ac,
enum migrate_mode mode, int reason)
{ return -ENOSYS; }
-static inline struct page *new_page_nodemask(struct page *page,
+static inline struct page *alloc_migration_target(struct page *page,
struct alloc_control *ac)
{ return NULL; }
static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 3f92e70..b400161 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1626,9 +1626,10 @@ static struct page *new_page(struct page *p, struct alloc_control *__ac)
struct alloc_control ac = {
.nid = page_to_nid(p),
.nmask = &node_states[N_MEMORY],
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
- return new_page_nodemask(p, &ac);
+ return alloc_migration_target(p, &ac);
}
/*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 89642f9..185f4c9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1249,7 +1249,8 @@ static struct page *new_node_page(struct page *page, struct alloc_control *__ac)
ac.nid = nid;
ac.nmask = &nmask;
- return new_page_nodemask(page, &ac);
+ ac.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
+ return alloc_migration_target(page, &ac);
}
static int
diff --git a/mm/migrate.c b/mm/migrate.c
index ba31153..029af0b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1519,31 +1519,35 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
return rc;
}
-struct page *new_page_nodemask(struct page *page, struct alloc_control *ac)
+struct page *alloc_migration_target(struct page *page, struct alloc_control *ac)
{
- gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL;
unsigned int order = 0;
struct page *new_page = NULL;
+ int zidx;
+ /* hugetlb has it's own gfp handling logic */
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
- struct alloc_control __ac = {
- .nid = ac->nid,
- .nmask = ac->nmask,
- };
- return alloc_huge_page_nodemask(h, &__ac);
+ return alloc_huge_page_nodemask(h, ac);
}
+ ac->__gfp_mask = ac->gfp_mask;
if (PageTransHuge(page)) {
- gfp_mask |= GFP_TRANSHUGE;
+ ac->__gfp_mask |= GFP_TRANSHUGE;
order = HPAGE_PMD_ORDER;
}
+ zidx = zone_idx(page_zone(page));
+ if (is_highmem_idx(zidx) || zidx == ZONE_MOVABLE)
+ ac->__gfp_mask |= __GFP_HIGHMEM;
- if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
- gfp_mask |= __GFP_HIGHMEM;
+ if (ac->thisnode)
+ ac->__gfp_mask |= __GFP_THISNODE;
+ if (ac->skip_cma)
+ ac->__gfp_mask &= ~__GFP_MOVABLE;
- new_page = __alloc_pages_nodemask(gfp_mask, order, ac->nid, ac->nmask);
+ new_page = __alloc_pages_nodemask(ac->__gfp_mask, order,
+ ac->nid, ac->nmask);
if (new_page && PageTransHuge(new_page))
prep_transhuge_page(new_page);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 1e1828b..aba799d 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -303,7 +303,8 @@ struct page *alloc_migrate_target(struct page *page, struct alloc_control *__ac)
struct alloc_control ac = {
.nid = page_to_nid(page),
.nmask = &node_states[N_MEMORY],
+ .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
- return new_page_nodemask(page, &ac);
+ return alloc_migration_target(page, &ac);
}
--
2.7.4
From: Joonsoo Kim <[email protected]>
There is no reason to implement it's own function for migration
target allocation. Use standard one.
Signed-off-by: Joonsoo Kim <[email protected]>
---
mm/gup.c | 61 ++++++++++---------------------------------------------------
1 file changed, 10 insertions(+), 51 deletions(-)
diff --git a/mm/gup.c b/mm/gup.c
index be9cb79..d88a965 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1594,58 +1594,16 @@ static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
}
#ifdef CONFIG_CMA
-static struct page *new_non_cma_page(struct page *page,
+static struct page *alloc_migration_target_non_cma(struct page *page,
struct alloc_control *ac)
{
- /*
- * We want to make sure we allocate the new page from the same node
- * as the source page.
- */
- int nid = page_to_nid(page);
- /*
- * Trying to allocate a page for migration. Ignore allocation
- * failure warnings. We don't force __GFP_THISNODE here because
- * this node here is the node where we have CMA reservation and
- * in some case these nodes will have really less non movable
- * allocation memory.
- */
- gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
-
- if (PageHighMem(page))
- gfp_mask |= __GFP_HIGHMEM;
-
- if (PageHuge(page)) {
- struct hstate *h = page_hstate(page);
- struct alloc_control ac = {
- .nid = nid,
- .nmask = NULL,
- .gfp_mask = __GFP_NOWARN,
- .skip_cma = true,
- };
-
- return alloc_huge_page_nodemask(h, &ac);
- }
-
- if (PageTransHuge(page)) {
- struct page *thp;
- /*
- * ignore allocation failure warnings
- */
- gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
-
- /*
- * Remove the movable mask so that we don't allocate from
- * CMA area again.
- */
- thp_gfpmask &= ~__GFP_MOVABLE;
- thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
- if (!thp)
- return NULL;
- prep_transhuge_page(thp);
- return thp;
- }
+ struct alloc_control __ac = {
+ .nid = page_to_nid(page),
+ .gfp_mask = GFP_USER | __GFP_NOWARN,
+ .skip_cma = true,
+ };
- return __alloc_pages_node(nid, gfp_mask, 0);
+ return alloc_migration_target(page, &__ac);
}
static long check_and_migrate_cma_pages(struct task_struct *tsk,
@@ -1707,8 +1665,9 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
for (i = 0; i < nr_pages; i++)
put_page(pages[i]);
- if (migrate_pages(&cma_page_list, new_non_cma_page,
- NULL, NULL, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+ if (migrate_pages(&cma_page_list,
+ alloc_migration_target_non_cma, NULL, NULL,
+ MIGRATE_SYNC, MR_CONTIG_RANGE)) {
/*
* some of the pages failed migration. Do get_user_pages
* without migration.
--
2.7.4
On Mon, May 18, 2020 at 10:20:47AM +0900, [email protected] wrote:
> From: Joonsoo Kim <[email protected]>
>
> For locality, it's better to migrate the page to the same node
> rather than the node of the current caller's cpu.
>
> Signed-off-by: Joonsoo Kim <[email protected]>
Acked-by: Roman Gushchin <[email protected]>
> ---
> mm/page_isolation.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index 2c11a38..7df89bd 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -300,5 +300,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
>
> struct page *alloc_migrate_target(struct page *page, unsigned long private)
> {
> - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
> + int nid = page_to_nid(page);
> +
> + return new_page_nodemask(page, nid, &node_states[N_MEMORY]);
Why not new_page_nodemask(page, page_to_nid(page), &node_states[N_MEMORY]) ?
Still fits into 80 characters.
On Mon, May 18, 2020 at 10:20:49AM +0900, [email protected] wrote:
> From: Joonsoo Kim <[email protected]>
>
> Currently, page allocation functions for migration requires some arguments.
> More worse, in the following patch, more argument will be needed to unify
> the similar functions. To simplify them, in this patch, unified data
> structure that controls allocation behaviour is introduced.
Is it all about huge pages only? If so, maybe adding huge_page suffix/prefix
to struct alloc_control? E.g. huge_alloc_control or something like this.
>
> For clean-up, function declarations are re-ordered.
>
> Note that, gfp_mask handling on alloc_huge_page_(node|nodemask) is
> slightly changed, from ASSIGN to OR. It's safe since caller of these
> functions doesn't pass extra gfp_mask except htlb_alloc_mask().
Changes make sense to me, but it feels like this patch is doing several
thing simultaneously. Do you mind splitting it into few?
Thanks!
>
> Signed-off-by: Joonsoo Kim <[email protected]>
> ---
> include/linux/hugetlb.h | 35 +++++++++++++++-------------
> mm/gup.c | 11 ++++++---
> mm/hugetlb.c | 62 ++++++++++++++++++++++++-------------------------
> mm/internal.h | 7 ++++++
> mm/mempolicy.c | 13 +++++++----
> mm/migrate.c | 13 +++++++----
> 6 files changed, 83 insertions(+), 58 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 0cced41..6da217e 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -14,6 +14,7 @@
> struct ctl_table;
> struct user_struct;
> struct mmu_gather;
> +struct alloc_control;
>
> #ifndef is_hugepd
> typedef struct { unsigned long pd; } hugepd_t;
> @@ -502,15 +503,16 @@ struct huge_bootmem_page {
> struct hstate *hstate;
> };
>
> -struct page *alloc_huge_page(struct vm_area_struct *vma,
> - unsigned long addr, int avoid_reserve);
> -struct page *alloc_huge_page_node(struct hstate *h, int nid);
> -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> - nodemask_t *nmask);
> +struct page *alloc_migrate_huge_page(struct hstate *h,
> + struct alloc_control *ac);
> +struct page *alloc_huge_page_node(struct hstate *h,
> + struct alloc_control *ac);
> +struct page *alloc_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac);
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address);
> -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> - int nid, nodemask_t *nmask);
> +struct page *alloc_huge_page(struct vm_area_struct *vma,
> + unsigned long addr, int avoid_reserve);
> int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
> pgoff_t idx);
>
> @@ -752,20 +754,14 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
> #else /* CONFIG_HUGETLB_PAGE */
> struct hstate {};
>
> -static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
> - unsigned long addr,
> - int avoid_reserve)
> -{
> - return NULL;
> -}
> -
> -static inline struct page *alloc_huge_page_node(struct hstate *h, int nid)
> +static inline struct page *
> +alloc_huge_page_node(struct hstate *h, struct alloc_control *ac)
> {
> return NULL;
> }
>
> static inline struct page *
> -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask)
> +alloc_huge_page_nodemask(struct hstate *h, struct alloc_control *ac)
> {
> return NULL;
> }
> @@ -777,6 +773,13 @@ static inline struct page *alloc_huge_page_vma(struct hstate *h,
> return NULL;
> }
>
> +static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
> + unsigned long addr,
> + int avoid_reserve)
> +{
> + return NULL;
> +}
> +
> static inline int __alloc_bootmem_huge_page(struct hstate *h)
> {
> return 0;
> diff --git a/mm/gup.c b/mm/gup.c
> index 0d64ea8..9890fb0 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1613,16 +1613,21 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
> if (PageHighMem(page))
> gfp_mask |= __GFP_HIGHMEM;
>
> -#ifdef CONFIG_HUGETLB_PAGE
> if (PageHuge(page)) {
> struct hstate *h = page_hstate(page);
> + struct alloc_control ac = {
> + .nid = nid,
> + .nmask = NULL,
> + .gfp_mask = gfp_mask,
> + };
> +
> /*
> * We don't want to dequeue from the pool because pool pages will
> * mostly be from the CMA region.
> */
> - return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
> + return alloc_migrate_huge_page(h, &ac);
> }
> -#endif
> +
> if (PageTransHuge(page)) {
> struct page *thp;
> /*
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index dcb34d7..859dba4 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1054,8 +1054,8 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
> return page;
> }
>
> -static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
> - nodemask_t *nmask)
> +static struct page *dequeue_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac)
> {
> unsigned int cpuset_mems_cookie;
> struct zonelist *zonelist;
> @@ -1063,14 +1063,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
> struct zoneref *z;
> int node = NUMA_NO_NODE;
>
> - zonelist = node_zonelist(nid, gfp_mask);
> + zonelist = node_zonelist(ac->nid, ac->gfp_mask);
>
> retry_cpuset:
> cpuset_mems_cookie = read_mems_allowed_begin();
> - for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
> + for_each_zone_zonelist_nodemask(zone, z, zonelist,
> + gfp_zone(ac->gfp_mask), ac->nmask) {
> struct page *page;
>
> - if (!cpuset_zone_allowed(zone, gfp_mask))
> + if (!cpuset_zone_allowed(zone, ac->gfp_mask))
> continue;
> /*
> * no need to ask again on the same node. Pool is node rather than
> @@ -1106,9 +1107,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> {
> struct page *page;
> struct mempolicy *mpol;
> - gfp_t gfp_mask;
> - nodemask_t *nodemask;
> - int nid;
> + struct alloc_control ac = {0};
>
> /*
> * A child process with MAP_PRIVATE mappings created by their parent
> @@ -1123,9 +1122,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
> goto err;
>
> - gfp_mask = htlb_alloc_mask(h);
> - nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
> + ac.gfp_mask = htlb_alloc_mask(h);
> + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
> +
> + page = dequeue_huge_page_nodemask(h, &ac);
> if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
> SetPagePrivate(page);
> h->resv_huge_pages--;
> @@ -1938,15 +1938,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
> return page;
> }
>
> -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> - int nid, nodemask_t *nmask)
> +struct page *alloc_migrate_huge_page(struct hstate *h,
> + struct alloc_control *ac)
> {
> struct page *page;
>
> if (hstate_is_gigantic(h))
> return NULL;
>
> - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
> + page = alloc_fresh_huge_page(h, ac->gfp_mask,
> + ac->nid, ac->nmask, NULL);
> if (!page)
> return NULL;
>
> @@ -1980,36 +1981,37 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
> }
>
> /* page migration callback function */
> -struct page *alloc_huge_page_node(struct hstate *h, int nid)
> +struct page *alloc_huge_page_node(struct hstate *h,
> + struct alloc_control *ac)
> {
> - gfp_t gfp_mask = htlb_alloc_mask(h);
> struct page *page = NULL;
>
> - if (nid != NUMA_NO_NODE)
> - gfp_mask |= __GFP_THISNODE;
> + ac->gfp_mask |= htlb_alloc_mask(h);
> + if (ac->nid != NUMA_NO_NODE)
> + ac->gfp_mask |= __GFP_THISNODE;
>
> spin_lock(&hugetlb_lock);
> if (h->free_huge_pages - h->resv_huge_pages > 0)
> - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
> + page = dequeue_huge_page_nodemask(h, ac);
> spin_unlock(&hugetlb_lock);
>
> if (!page)
> - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
> + page = alloc_migrate_huge_page(h, ac);
>
> return page;
> }
>
> /* page migration callback function */
> -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> - nodemask_t *nmask)
> +struct page *alloc_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac)
> {
> - gfp_t gfp_mask = htlb_alloc_mask(h);
> + ac->gfp_mask |= htlb_alloc_mask(h);
>
> spin_lock(&hugetlb_lock);
> if (h->free_huge_pages - h->resv_huge_pages > 0) {
> struct page *page;
>
> - page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
> + page = dequeue_huge_page_nodemask(h, ac);
> if (page) {
> spin_unlock(&hugetlb_lock);
> return page;
> @@ -2017,22 +2019,20 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> }
> spin_unlock(&hugetlb_lock);
>
> - return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
> + return alloc_migrate_huge_page(h, ac);
> }
>
> /* mempolicy aware migration callback */
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address)
> {
> + struct alloc_control ac = {0};
> struct mempolicy *mpol;
> - nodemask_t *nodemask;
> struct page *page;
> - gfp_t gfp_mask;
> - int node;
>
> - gfp_mask = htlb_alloc_mask(h);
> - node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> - page = alloc_huge_page_nodemask(h, node, nodemask);
> + ac.gfp_mask = htlb_alloc_mask(h);
> + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
> + page = alloc_huge_page_nodemask(h, &ac);
> mpol_cond_put(mpol);
>
> return page;
> diff --git a/mm/internal.h b/mm/internal.h
> index 791e4b5a..75b3f8e 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -613,4 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page)
>
> void setup_zone_pageset(struct zone *zone);
> extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
> +
> +struct alloc_control {
> + int nid;
> + nodemask_t *nmask;
> + gfp_t gfp_mask;
> +};
> +
> #endif /* __MM_INTERNAL_H */
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 1965e26..06f60a5 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -1068,10 +1068,15 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
> /* page allocation callback for NUMA node migration */
> struct page *alloc_new_node_page(struct page *page, unsigned long node)
> {
> - if (PageHuge(page))
> - return alloc_huge_page_node(page_hstate(compound_head(page)),
> - node);
> - else if (PageTransHuge(page)) {
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(page);
> + struct alloc_control ac = {
> + .nid = node,
> + .nmask = NULL,
> + };
> +
> + return alloc_huge_page_node(h, &ac);
> + } else if (PageTransHuge(page)) {
> struct page *thp;
>
> thp = alloc_pages_node(node,
> diff --git a/mm/migrate.c b/mm/migrate.c
> index a298a8c..94d2386 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1526,10 +1526,15 @@ struct page *new_page_nodemask(struct page *page,
> unsigned int order = 0;
> struct page *new_page = NULL;
>
> - if (PageHuge(page))
> - return alloc_huge_page_nodemask(
> - page_hstate(compound_head(page)),
> - preferred_nid, nodemask);
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(page);
> + struct alloc_control ac = {
> + .nid = preferred_nid,
> + .nmask = nodemask,
> + };
> +
> + return alloc_huge_page_nodemask(h, &ac);
> + }
>
> if (PageTransHuge(page)) {
> gfp_mask |= GFP_TRANSHUGE;
> --
> 2.7.4
>
2020년 5월 21일 (목) 오전 9:37, Roman Gushchin <[email protected]>님이 작성:
>
> On Mon, May 18, 2020 at 10:20:47AM +0900, [email protected] wrote:
> > From: Joonsoo Kim <[email protected]>
> >
> > For locality, it's better to migrate the page to the same node
> > rather than the node of the current caller's cpu.
> >
> > Signed-off-by: Joonsoo Kim <[email protected]>
>
> Acked-by: Roman Gushchin <[email protected]>
Thanks for review!
> > ---
> > mm/page_isolation.c | 4 +++-
> > 1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> > index 2c11a38..7df89bd 100644
> > --- a/mm/page_isolation.c
> > +++ b/mm/page_isolation.c
> > @@ -300,5 +300,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
> >
> > struct page *alloc_migrate_target(struct page *page, unsigned long private)
> > {
> > - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
> > + int nid = page_to_nid(page);
> > +
> > + return new_page_nodemask(page, nid, &node_states[N_MEMORY]);
>
> Why not new_page_nodemask(page, page_to_nid(page), &node_states[N_MEMORY]) ?
> Still fits into 80 characters.
It's just my preference not directly using function call in argument
as much as possible.
If you don't like it, I will change it as you suggested. However,
since alloc_migrate_target()
will be removed in following patch, there will be no difference in the end.
Thanks.
2020년 5월 21일 (목) 오전 9:43, Roman Gushchin <[email protected]>님이 작성:
>
> On Mon, May 18, 2020 at 10:20:49AM +0900, [email protected] wrote:
> > From: Joonsoo Kim <[email protected]>
> >
> > Currently, page allocation functions for migration requires some arguments.
> > More worse, in the following patch, more argument will be needed to unify
> > the similar functions. To simplify them, in this patch, unified data
> > structure that controls allocation behaviour is introduced.
>
> Is it all about huge pages only? If so, maybe adding huge_page suffix/prefix
> to struct alloc_control? E.g. huge_alloc_control or something like this.
No, it will be used for other migration target allocation functions. You can
see it on following patches.
> >
> > For clean-up, function declarations are re-ordered.
> >
> > Note that, gfp_mask handling on alloc_huge_page_(node|nodemask) is
> > slightly changed, from ASSIGN to OR. It's safe since caller of these
> > functions doesn't pass extra gfp_mask except htlb_alloc_mask().
>
> Changes make sense to me, but it feels like this patch is doing several
> thing simultaneously. Do you mind splitting it into few?
I will try to split it on next spin.
Thanks.
On 5/17/20 6:20 PM, [email protected] wrote:
> From: Joonsoo Kim <[email protected]>
>
> It's not performance sensitive function. Move it to .c.
> This is a preparation step for future change.
>
> Signed-off-by: Joonsoo Kim <[email protected]>
Agreed, this is not performance sensitive and can be moved.
Acked-by: Mike Kravetz <[email protected]>
--
Mike Kravetz
On 5/17/20 6:20 PM, [email protected] wrote:
> From: Joonsoo Kim <[email protected]>
>
> Currently, page allocation functions for migration requires some arguments.
> More worse, in the following patch, more argument will be needed to unify
> the similar functions. To simplify them, in this patch, unified data
> structure that controls allocation behaviour is introduced.
As a followup to Roman's question and your answer about adding a suffix/prefix
to the new structure. It 'may' be a bit confusing as alloc_context is already
defined and *ac is passsed around for page allocations. Perhaps, this new
structure could somehow have migrate in the name as it is all about allocating
migrate targets?
>
> For clean-up, function declarations are re-ordered.
>
> Note that, gfp_mask handling on alloc_huge_page_(node|nodemask) is
> slightly changed, from ASSIGN to OR. It's safe since caller of these
> functions doesn't pass extra gfp_mask except htlb_alloc_mask().
>
> Signed-off-by: Joonsoo Kim <[email protected]>
Patch makes sense.
> diff --git a/mm/migrate.c b/mm/migrate.c
> index a298a8c..94d2386 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1526,10 +1526,15 @@ struct page *new_page_nodemask(struct page *page,
> unsigned int order = 0;
> struct page *new_page = NULL;
>
> - if (PageHuge(page))
> - return alloc_huge_page_nodemask(
> - page_hstate(compound_head(page)),
> - preferred_nid, nodemask);
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(page);
I assume the removal of compound_head(page) was intentional? Just asking
because PageHuge will look at head page while page_hstate will not. So,
if passed a non-head page things could go bad.
--
Mike Kravetz
On 5/17/20 6:20 PM, [email protected] wrote:
> From: Joonsoo Kim <[email protected]>
>
> There is a user who do not want to use CMA memory for migration. Until
> now, it is implemented by caller side but it's not optimal since there
> is limited information on caller. This patch implements it on callee side
> to get better result.
>
> Signed-off-by: Joonsoo Kim <[email protected]>
Thank you!
Avoiding CMA works much better with this new skip_cma field.
Acked-by: Mike Kravetz <[email protected]>
--
Mike Kravetz
On 5/17/20 6:20 PM, [email protected] wrote:
> From: Joonsoo Kim <[email protected]>
>
> It's not good practice to modify user input. Instead of using it to
> build correct gfp_mask for APIs, this patch introduces another gfp_mask
> field, __gfp_mask, for internal usage.
Modifying the flags as is done in the existing code does not bother me
too much, but that is just my opinion. Adding __gfp_mask for modifications
is fine with me if others think it is a good thing.
Does dequeue_huge_page_vma() need to be modified so that it will set
ac.__gfp_mask before calling dequeue_huge_page_nodemask()?
--
Mike Kravetz
>
> Signed-off-by: Joonsoo Kim <[email protected]>
> ---
> mm/hugetlb.c | 15 ++++++++-------
> mm/internal.h | 2 ++
> 2 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 53edd02..5f43b7e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1069,15 +1069,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h,
> struct zoneref *z;
> int node = NUMA_NO_NODE;
>
> - zonelist = node_zonelist(ac->nid, ac->gfp_mask);
> + zonelist = node_zonelist(ac->nid, ac->__gfp_mask);
>
> retry_cpuset:
> cpuset_mems_cookie = read_mems_allowed_begin();
> for_each_zone_zonelist_nodemask(zone, z, zonelist,
> - gfp_zone(ac->gfp_mask), ac->nmask) {
> + gfp_zone(ac->__gfp_mask), ac->nmask) {
> struct page *page;
>
> - if (!cpuset_zone_allowed(zone, ac->gfp_mask))
> + if (!cpuset_zone_allowed(zone, ac->__gfp_mask))
> continue;
> /*
> * no need to ask again on the same node. Pool is node rather than
> @@ -1952,7 +1952,7 @@ static struct page *alloc_migrate_huge_page(struct hstate *h,
> if (hstate_is_gigantic(h))
> return NULL;
>
> - page = alloc_fresh_huge_page(h, ac->gfp_mask,
> + page = alloc_fresh_huge_page(h, ac->__gfp_mask,
> ac->nid, ac->nmask, NULL);
> if (!page)
> return NULL;
> @@ -1990,9 +1990,10 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
> struct page *alloc_huge_page_nodemask(struct hstate *h,
> struct alloc_control *ac)
> {
> - ac->gfp_mask |= htlb_alloc_mask(h);
> + ac->__gfp_mask = htlb_alloc_mask(h);
> + ac->__gfp_mask |= ac->gfp_mask;
> if (ac->thisnode && ac->nid != NUMA_NO_NODE)
> - ac->gfp_mask |= __GFP_THISNODE;
> + ac->__gfp_mask |= __GFP_THISNODE;
>
> spin_lock(&hugetlb_lock);
> if (h->free_huge_pages - h->resv_huge_pages > 0) {
> @@ -2011,7 +2012,7 @@ struct page *alloc_huge_page_nodemask(struct hstate *h,
> * will not come from CMA area
> */
> if (ac->skip_cma)
> - ac->gfp_mask &= ~__GFP_MOVABLE;
> + ac->__gfp_mask &= ~__GFP_MOVABLE;
>
> return alloc_migrate_huge_page(h, ac);
> }
> diff --git a/mm/internal.h b/mm/internal.h
> index 6b6507e..3239d71 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -620,6 +620,8 @@ struct alloc_control {
> gfp_t gfp_mask;
> bool thisnode;
> bool skip_cma;
> +
> + gfp_t __gfp_mask; /* Used internally in API implementation */
> };
>
> #endif /* __MM_INTERNAL_H */
>
2020년 5월 22일 (금) 오전 3:57, Mike Kravetz <[email protected]>님이 작성:
>
> On 5/17/20 6:20 PM, [email protected] wrote:
> > From: Joonsoo Kim <[email protected]>
> >
> > Currently, page allocation functions for migration requires some arguments.
> > More worse, in the following patch, more argument will be needed to unify
> > the similar functions. To simplify them, in this patch, unified data
> > structure that controls allocation behaviour is introduced.
>
> As a followup to Roman's question and your answer about adding a suffix/prefix
> to the new structure. It 'may' be a bit confusing as alloc_context is already
> defined and *ac is passsed around for page allocations. Perhaps, this new
> structure could somehow have migrate in the name as it is all about allocating
> migrate targets?
I have considered that but I cannot find appropriate prefix. In hugetlb code,
struct alloc_control is passed to the internal function which is not
fully dedicated
to the migration so 'migrate' would not be appropriate prefix.
alloc_context is used by page allocation core and alloc_control would be used by
outside of it so I think that we can endure it. If there is a good
suggestion, I will change
the name happily.
> >
> > For clean-up, function declarations are re-ordered.
> >
> > Note that, gfp_mask handling on alloc_huge_page_(node|nodemask) is
> > slightly changed, from ASSIGN to OR. It's safe since caller of these
> > functions doesn't pass extra gfp_mask except htlb_alloc_mask().
> >
> > Signed-off-by: Joonsoo Kim <[email protected]>
>
> Patch makes sense.
Thanks!
> > diff --git a/mm/migrate.c b/mm/migrate.c
> > index a298a8c..94d2386 100644
> > --- a/mm/migrate.c
> > +++ b/mm/migrate.c
> > @@ -1526,10 +1526,15 @@ struct page *new_page_nodemask(struct page *page,
> > unsigned int order = 0;
> > struct page *new_page = NULL;
> >
> > - if (PageHuge(page))
> > - return alloc_huge_page_nodemask(
> > - page_hstate(compound_head(page)),
> > - preferred_nid, nodemask);
> > + if (PageHuge(page)) {
> > + struct hstate *h = page_hstate(page);
>
> I assume the removal of compound_head(page) was intentional? Just asking
> because PageHuge will look at head page while page_hstate will not. So,
> if passed a non-head page things could go bad.
I was thinking that page_hstate() can handle the tail page but it seems that
it's not. Thanks for correction. I will change it on next version.
Thanks.
2020년 5월 22일 (금) 오전 7:19, Mike Kravetz <[email protected]>님이 작성:
>
> On 5/17/20 6:20 PM, [email protected] wrote:
> > From: Joonsoo Kim <[email protected]>
> >
> > It's not good practice to modify user input. Instead of using it to
> > build correct gfp_mask for APIs, this patch introduces another gfp_mask
> > field, __gfp_mask, for internal usage.
>
> Modifying the flags as is done in the existing code does not bother me
> too much, but that is just my opinion. Adding __gfp_mask for modifications
> is fine with me if others think it is a good thing.
With the following patches, in some cases, ac->gfp_mask is set up once and
used many times. If we modify ac->gfp_mask in place, there would be side-effect.
> Does dequeue_huge_page_vma() need to be modified so that it will set
> ac.__gfp_mask before calling dequeue_huge_page_nodemask()?
Good catch! Will change!
Thanks.
> > > mm/page_isolation.c | 4 +++-
> > > 1 file changed, 3 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> > > index 2c11a38..7df89bd 100644
> > > --- a/mm/page_isolation.c
> > > +++ b/mm/page_isolation.c
> > > @@ -300,5 +300,7 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
> > >
> > > struct page *alloc_migrate_target(struct page *page, unsigned long private)
> > > {
> > > - return new_page_nodemask(page, numa_node_id(), &node_states[N_MEMORY]);
> > > + int nid = page_to_nid(page);
> > > +
> > > + return new_page_nodemask(page, nid, &node_states[N_MEMORY]);
> >
> > Why not new_page_nodemask(page, page_to_nid(page), &node_states[N_MEMORY]) ?
> > Still fits into 80 characters.
>
> It's just my preference not directly using function call in argument
> as much as possible.
> If you don't like it, I will change it as you suggested. However,
> since alloc_migrate_target()
> will be removed in following patch, there will be no difference in the end.
Oops... I found that your suggestion exceed 80 character.
I will leave the patch as it is.
Thanks.