LinuxLists.cc - [PATCH v2 03/12] mm/hugetlb: introduce alloc_control structure to simplify migration target allocation APIs

2020-05-27 10:44:37

Subject: [PATCH v2 03/12] mm/hugetlb: introduce alloc_control structure to simplify migration target allocation APIs

From: Joonsoo Kim <[email protected]>

Currently, page allocation functions for migration requires some arguments.
More worse, in the following patch, more argument will be needed to unify
the similar functions. To simplify them, in this patch, unified data
structure that controls allocation behaviour is introduced.

For clean-up, function declarations are re-ordered.

Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/hugetlb.h | 35 +++++++++++++++-------------
mm/gup.c | 11 ++++++---
mm/hugetlb.c | 62 ++++++++++++++++++++++++-------------------------
mm/internal.h | 7 ++++++
mm/mempolicy.c | 13 +++++++----
mm/migrate.c | 13 +++++++----
6 files changed, 83 insertions(+), 58 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 50650d0..15c8fb8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -14,6 +14,7 @@
struct ctl_table;
struct user_struct;
struct mmu_gather;
+struct alloc_control;

#ifndef is_hugepd
typedef struct { unsigned long pd; } hugepd_t;
@@ -502,15 +503,16 @@ struct huge_bootmem_page {
struct hstate *hstate;
};

-struct page *alloc_huge_page(struct vm_area_struct *vma,
- unsigned long addr, int avoid_reserve);
-struct page *alloc_huge_page_node(struct hstate *h, int nid);
-struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask);
+struct page *alloc_migrate_huge_page(struct hstate *h,
+ struct alloc_control *ac);
+struct page *alloc_huge_page_node(struct hstate *h,
+ struct alloc_control *ac);
+struct page *alloc_huge_page_nodemask(struct hstate *h,
+ struct alloc_control *ac);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask);
+struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr, int avoid_reserve);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);

@@ -752,20 +754,14 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};

-static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
- unsigned long addr,
- int avoid_reserve)
-{
- return NULL;
-}
-
-static inline struct page *alloc_huge_page_node(struct hstate *h, int nid)
+static inline struct page *
+alloc_huge_page_node(struct hstate *h, struct alloc_control *ac)
{
return NULL;
}

static inline struct page *
-alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask)
+alloc_huge_page_nodemask(struct hstate *h, struct alloc_control *ac)
{
return NULL;
}
@@ -777,6 +773,13 @@ static inline struct page *alloc_huge_page_vma(struct hstate *h,
return NULL;
}

+static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr,
+ int avoid_reserve)
+{
+ return NULL;
+}
+
static inline int __alloc_bootmem_huge_page(struct hstate *h)
{
return 0;
diff --git a/mm/gup.c b/mm/gup.c
index ee039d4..6b78f11 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1612,16 +1612,21 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;

-#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(page)) {
struct hstate *h = page_hstate(page);
+ struct alloc_control ac = {
+ .nid = nid,
+ .nmask = NULL,
+ .gfp_mask = gfp_mask,
+ };
+
/*
* We don't want to dequeue from the pool because pool pages will
* mostly be from the CMA region.
*/
- return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+ return alloc_migrate_huge_page(h, &ac);
}
-#endif
+
if (PageTransHuge(page)) {
struct page *thp;
/*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 57ece74..453ba94 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1053,8 +1053,8 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
return page;
}

-static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
- nodemask_t *nmask)
+static struct page *dequeue_huge_page_nodemask(struct hstate *h,
+ struct alloc_control *ac)
{
unsigned int cpuset_mems_cookie;
struct zonelist *zonelist;
@@ -1062,14 +1062,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
struct zoneref *z;
int node = NUMA_NO_NODE;

- zonelist = node_zonelist(nid, gfp_mask);
+ zonelist = node_zonelist(ac->nid, ac->gfp_mask);

retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
- for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ gfp_zone(ac->gfp_mask), ac->nmask) {
struct page *page;

- if (!cpuset_zone_allowed(zone, gfp_mask))
+ if (!cpuset_zone_allowed(zone, ac->gfp_mask))
continue;
/*
* no need to ask again on the same node. Pool is node rather than
@@ -1105,9 +1106,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
{
struct page *page;
struct mempolicy *mpol;
- gfp_t gfp_mask;
- nodemask_t *nodemask;
- int nid;
+ struct alloc_control ac = {0};

/*
* A child process with MAP_PRIVATE mappings created by their parent
@@ -1122,9 +1121,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
goto err;

- gfp_mask = htlb_alloc_mask(h);
- nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
+ ac.gfp_mask = htlb_alloc_mask(h);
+ ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
+
+ page = dequeue_huge_page_nodemask(h, &ac);
if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
SetPagePrivate(page);
h->resv_huge_pages--;
@@ -1937,15 +1937,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}

-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask)
+struct page *alloc_migrate_huge_page(struct hstate *h,
+ struct alloc_control *ac)
{
struct page *page;

if (hstate_is_gigantic(h))
return NULL;

- page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
+ page = alloc_fresh_huge_page(h, ac->gfp_mask,
+ ac->nid, ac->nmask, NULL);
if (!page)
return NULL;

@@ -1979,36 +1980,37 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
}

/* page migration callback function */
-struct page *alloc_huge_page_node(struct hstate *h, int nid)
+struct page *alloc_huge_page_node(struct hstate *h,
+ struct alloc_control *ac)
{
- gfp_t gfp_mask = htlb_alloc_mask(h);
struct page *page = NULL;

- if (nid != NUMA_NO_NODE)
- gfp_mask |= __GFP_THISNODE;
+ ac->gfp_mask = htlb_alloc_mask(h);
+ if (ac->nid != NUMA_NO_NODE)
+ ac->gfp_mask |= __GFP_THISNODE;

spin_lock(&hugetlb_lock);
if (h->free_huge_pages - h->resv_huge_pages > 0)
- page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
+ page = dequeue_huge_page_nodemask(h, ac);
spin_unlock(&hugetlb_lock);

if (!page)
- page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+ page = alloc_migrate_huge_page(h, ac);

return page;
}

/* page migration callback function */
-struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
- nodemask_t *nmask)
+struct page *alloc_huge_page_nodemask(struct hstate *h,
+ struct alloc_control *ac)
{
- gfp_t gfp_mask = htlb_alloc_mask(h);
+ ac->gfp_mask = htlb_alloc_mask(h);

spin_lock(&hugetlb_lock);
if (h->free_huge_pages - h->resv_huge_pages > 0) {
struct page *page;

- page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
+ page = dequeue_huge_page_nodemask(h, ac);
if (page) {
spin_unlock(&hugetlb_lock);
return page;
@@ -2016,22 +2018,20 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
}
spin_unlock(&hugetlb_lock);

- return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
+ return alloc_migrate_huge_page(h, ac);
}

/* mempolicy aware migration callback */
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address)
{
+ struct alloc_control ac = {0};
struct mempolicy *mpol;
- nodemask_t *nodemask;
struct page *page;
- gfp_t gfp_mask;
- int node;

- gfp_mask = htlb_alloc_mask(h);
- node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
- page = alloc_huge_page_nodemask(h, node, nodemask);
+ ac.gfp_mask = htlb_alloc_mask(h);
+ ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
+ page = alloc_huge_page_nodemask(h, &ac);
mpol_cond_put(mpol);

return page;
diff --git a/mm/internal.h b/mm/internal.h
index 9886db2..6e613ce 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -613,4 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page)

void setup_zone_pageset(struct zone *zone);
extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
+
+struct alloc_control {
+ int nid; /* preferred node id */
+ nodemask_t *nmask;
+ gfp_t gfp_mask;
+};
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3813206..3b6b551 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1068,10 +1068,15 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
/* page allocation callback for NUMA node migration */
struct page *alloc_new_node_page(struct page *page, unsigned long node)
{
- if (PageHuge(page))
- return alloc_huge_page_node(page_hstate(compound_head(page)),
- node);
- else if (PageTransHuge(page)) {
+ if (PageHuge(page)) {
+ struct hstate *h = page_hstate(compound_head(page));
+ struct alloc_control ac = {
+ .nid = node,
+ .nmask = NULL,
+ };
+
+ return alloc_huge_page_node(h, &ac);
+ } else if (PageTransHuge(page)) {
struct page *thp;

thp = alloc_pages_node(node,
diff --git a/mm/migrate.c b/mm/migrate.c
index 824c22e..30217537 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1544,10 +1544,15 @@ struct page *new_page_nodemask(struct page *page,
unsigned int order = 0;
struct page *new_page = NULL;

- if (PageHuge(page))
- return alloc_huge_page_nodemask(
- page_hstate(compound_head(page)),
- preferred_nid, nodemask);
+ if (PageHuge(page)) {
+ struct hstate *h = page_hstate(compound_head(page));
+ struct alloc_control ac = {
+ .nid = preferred_nid,
+ .nmask = nodemask,
+ };
+
+ return alloc_huge_page_nodemask(h, &ac);
+ }

if (PageTransHuge(page)) {
gfp_mask |= GFP_TRANSHUGE;
--
2.7.4

2020-06-09 13:28:22

by Michal Hocko

[permalink] [raw]

Subject: Re: [PATCH v2 03/12] mm/hugetlb: introduce alloc_control structure to simplify migration target allocation APIs

On Wed 27-05-20 15:44:54, Joonsoo Kim wrote:
> From: Joonsoo Kim <[email protected]>
>
> Currently, page allocation functions for migration requires some arguments.
> More worse, in the following patch, more argument will be needed to unify
> the similar functions. To simplify them, in this patch, unified data
> structure that controls allocation behaviour is introduced.
>
> For clean-up, function declarations are re-ordered.

This is really hard to review without having a clear picture of the
resulting code so bear with me. I can see some reasons why allocation
callbacks might benefit from a agragated argument but you seem to touch
the internal hugetlb dequeue_huge_page_vma which shouldn't really need
that. I wouldn't mind much but I remember the hugetlb allocation
functions layering is quite complex for hugetlb specific reasons (see
0c397daea1d4 ("mm, hugetlb: further simplify hugetlb allocation API")
for more background).

Is there any reason why the agregated argument cannot be limited only to
migration callbacks. That would be alloc_huge_page_node, alloc_huge_page_nodemask
and alloc_huge_page_vma.

> Signed-off-by: Joonsoo Kim <[email protected]>
> ---
> include/linux/hugetlb.h | 35 +++++++++++++++-------------
> mm/gup.c | 11 ++++++---
> mm/hugetlb.c | 62 ++++++++++++++++++++++++-------------------------
> mm/internal.h | 7 ++++++
> mm/mempolicy.c | 13 +++++++----
> mm/migrate.c | 13 +++++++----
> 6 files changed, 83 insertions(+), 58 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 50650d0..15c8fb8 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -14,6 +14,7 @@
> struct ctl_table;
> struct user_struct;
> struct mmu_gather;
> +struct alloc_control;
>
> #ifndef is_hugepd
> typedef struct { unsigned long pd; } hugepd_t;
> @@ -502,15 +503,16 @@ struct huge_bootmem_page {
> struct hstate *hstate;
> };
>
> -struct page *alloc_huge_page(struct vm_area_struct *vma,
> - unsigned long addr, int avoid_reserve);
> -struct page *alloc_huge_page_node(struct hstate *h, int nid);
> -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> - nodemask_t *nmask);
> +struct page *alloc_migrate_huge_page(struct hstate *h,
> + struct alloc_control *ac);
> +struct page *alloc_huge_page_node(struct hstate *h,
> + struct alloc_control *ac);
> +struct page *alloc_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac);
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address);
> -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> - int nid, nodemask_t *nmask);
> +struct page *alloc_huge_page(struct vm_area_struct *vma,
> + unsigned long addr, int avoid_reserve);
> int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
> pgoff_t idx);
>
> @@ -752,20 +754,14 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
> #else /* CONFIG_HUGETLB_PAGE */
> struct hstate {};
>
> -static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
> - unsigned long addr,
> - int avoid_reserve)
> -{
> - return NULL;
> -}
> -
> -static inline struct page *alloc_huge_page_node(struct hstate *h, int nid)
> +static inline struct page *
> +alloc_huge_page_node(struct hstate *h, struct alloc_control *ac)
> {
> return NULL;
> }
>
> static inline struct page *
> -alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, nodemask_t *nmask)
> +alloc_huge_page_nodemask(struct hstate *h, struct alloc_control *ac)
> {
> return NULL;
> }
> @@ -777,6 +773,13 @@ static inline struct page *alloc_huge_page_vma(struct hstate *h,
> return NULL;
> }
>
> +static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
> + unsigned long addr,
> + int avoid_reserve)
> +{
> + return NULL;
> +}
> +
> static inline int __alloc_bootmem_huge_page(struct hstate *h)
> {
> return 0;
> diff --git a/mm/gup.c b/mm/gup.c
> index ee039d4..6b78f11 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1612,16 +1612,21 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
> if (PageHighMem(page))
> gfp_mask |= __GFP_HIGHMEM;
>
> -#ifdef CONFIG_HUGETLB_PAGE
> if (PageHuge(page)) {
> struct hstate *h = page_hstate(page);
> + struct alloc_control ac = {
> + .nid = nid,
> + .nmask = NULL,
> + .gfp_mask = gfp_mask,
> + };
> +
> /*
> * We don't want to dequeue from the pool because pool pages will
> * mostly be from the CMA region.
> */
> - return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
> + return alloc_migrate_huge_page(h, &ac);
> }
> -#endif
> +
> if (PageTransHuge(page)) {
> struct page *thp;
> /*
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 57ece74..453ba94 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1053,8 +1053,8 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
> return page;
> }
>
> -static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid,
> - nodemask_t *nmask)
> +static struct page *dequeue_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac)
> {
> unsigned int cpuset_mems_cookie;
> struct zonelist *zonelist;
> @@ -1062,14 +1062,15 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
> struct zoneref *z;
> int node = NUMA_NO_NODE;
>
> - zonelist = node_zonelist(nid, gfp_mask);
> + zonelist = node_zonelist(ac->nid, ac->gfp_mask);
>
> retry_cpuset:
> cpuset_mems_cookie = read_mems_allowed_begin();
> - for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) {
> + for_each_zone_zonelist_nodemask(zone, z, zonelist,
> + gfp_zone(ac->gfp_mask), ac->nmask) {
> struct page *page;
>
> - if (!cpuset_zone_allowed(zone, gfp_mask))
> + if (!cpuset_zone_allowed(zone, ac->gfp_mask))
> continue;
> /*
> * no need to ask again on the same node. Pool is node rather than
> @@ -1105,9 +1106,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> {
> struct page *page;
> struct mempolicy *mpol;
> - gfp_t gfp_mask;
> - nodemask_t *nodemask;
> - int nid;
> + struct alloc_control ac = {0};
>
> /*
> * A child process with MAP_PRIVATE mappings created by their parent
> @@ -1122,9 +1121,10 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
> if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
> goto err;
>
> - gfp_mask = htlb_alloc_mask(h);
> - nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
> + ac.gfp_mask = htlb_alloc_mask(h);
> + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
> +
> + page = dequeue_huge_page_nodemask(h, &ac);
> if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
> SetPagePrivate(page);
> h->resv_huge_pages--;
> @@ -1937,15 +1937,16 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
> return page;
> }
>
> -struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> - int nid, nodemask_t *nmask)
> +struct page *alloc_migrate_huge_page(struct hstate *h,
> + struct alloc_control *ac)
> {
> struct page *page;
>
> if (hstate_is_gigantic(h))
> return NULL;
>
> - page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
> + page = alloc_fresh_huge_page(h, ac->gfp_mask,
> + ac->nid, ac->nmask, NULL);
> if (!page)
> return NULL;
>
> @@ -1979,36 +1980,37 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
> }
>
> /* page migration callback function */
> -struct page *alloc_huge_page_node(struct hstate *h, int nid)
> +struct page *alloc_huge_page_node(struct hstate *h,
> + struct alloc_control *ac)
> {
> - gfp_t gfp_mask = htlb_alloc_mask(h);
> struct page *page = NULL;
>
> - if (nid != NUMA_NO_NODE)
> - gfp_mask |= __GFP_THISNODE;
> + ac->gfp_mask = htlb_alloc_mask(h);
> + if (ac->nid != NUMA_NO_NODE)
> + ac->gfp_mask |= __GFP_THISNODE;
>
> spin_lock(&hugetlb_lock);
> if (h->free_huge_pages - h->resv_huge_pages > 0)
> - page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
> + page = dequeue_huge_page_nodemask(h, ac);
> spin_unlock(&hugetlb_lock);
>
> if (!page)
> - page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
> + page = alloc_migrate_huge_page(h, ac);
>
> return page;
> }
>
> /* page migration callback function */
> -struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> - nodemask_t *nmask)
> +struct page *alloc_huge_page_nodemask(struct hstate *h,
> + struct alloc_control *ac)
> {
> - gfp_t gfp_mask = htlb_alloc_mask(h);
> + ac->gfp_mask = htlb_alloc_mask(h);
>
> spin_lock(&hugetlb_lock);
> if (h->free_huge_pages - h->resv_huge_pages > 0) {
> struct page *page;
>
> - page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
> + page = dequeue_huge_page_nodemask(h, ac);
> if (page) {
> spin_unlock(&hugetlb_lock);
> return page;
> @@ -2016,22 +2018,20 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> }
> spin_unlock(&hugetlb_lock);
>
> - return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
> + return alloc_migrate_huge_page(h, ac);
> }
>
> /* mempolicy aware migration callback */
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address)
> {
> + struct alloc_control ac = {0};
> struct mempolicy *mpol;
> - nodemask_t *nodemask;
> struct page *page;
> - gfp_t gfp_mask;
> - int node;
>
> - gfp_mask = htlb_alloc_mask(h);
> - node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> - page = alloc_huge_page_nodemask(h, node, nodemask);
> + ac.gfp_mask = htlb_alloc_mask(h);
> + ac.nid = huge_node(vma, address, ac.gfp_mask, &mpol, &ac.nmask);
> + page = alloc_huge_page_nodemask(h, &ac);
> mpol_cond_put(mpol);
>
> return page;
> diff --git a/mm/internal.h b/mm/internal.h
> index 9886db2..6e613ce 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -613,4 +613,11 @@ static inline bool is_migrate_highatomic_page(struct page *page)
>
> void setup_zone_pageset(struct zone *zone);
> extern struct page *alloc_new_node_page(struct page *page, unsigned long node);
> +
> +struct alloc_control {
> + int nid; /* preferred node id */
> + nodemask_t *nmask;
> + gfp_t gfp_mask;
> +};
> +
> #endif /* __MM_INTERNAL_H */
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 3813206..3b6b551 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -1068,10 +1068,15 @@ static int migrate_page_add(struct page *page, struct list_head *pagelist,
> /* page allocation callback for NUMA node migration */
> struct page *alloc_new_node_page(struct page *page, unsigned long node)
> {
> - if (PageHuge(page))
> - return alloc_huge_page_node(page_hstate(compound_head(page)),
> - node);
> - else if (PageTransHuge(page)) {
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(compound_head(page));
> + struct alloc_control ac = {
> + .nid = node,
> + .nmask = NULL,
> + };
> +
> + return alloc_huge_page_node(h, &ac);
> + } else if (PageTransHuge(page)) {
> struct page *thp;
>
> thp = alloc_pages_node(node,
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 824c22e..30217537 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -1544,10 +1544,15 @@ struct page *new_page_nodemask(struct page *page,
> unsigned int order = 0;
> struct page *new_page = NULL;
>
> - if (PageHuge(page))
> - return alloc_huge_page_nodemask(
> - page_hstate(compound_head(page)),
> - preferred_nid, nodemask);
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(compound_head(page));
> + struct alloc_control ac = {
> + .nid = preferred_nid,
> + .nmask = nodemask,
> + };
> +
> + return alloc_huge_page_nodemask(h, &ac);
> + }
>
> if (PageTransHuge(page)) {
> gfp_mask |= GFP_TRANSHUGE;
> --
> 2.7.4
>

--
Michal Hocko
SUSE Labs

2020-06-10 03:10:08

by Joonsoo Kim

[permalink] [raw]

Subject: Re: [PATCH v2 03/12] mm/hugetlb: introduce alloc_control structure to simplify migration target allocation APIs

2020년 6월 9일 (화) 오후 10:24, Michal Hocko <[email protected]>님이 작성:
>
> On Wed 27-05-20 15:44:54, Joonsoo Kim wrote:
> > From: Joonsoo Kim <[email protected]>
> >
> > Currently, page allocation functions for migration requires some arguments.
> > More worse, in the following patch, more argument will be needed to unify
> > the similar functions. To simplify them, in this patch, unified data
> > structure that controls allocation behaviour is introduced.
> >
> > For clean-up, function declarations are re-ordered.
>
> This is really hard to review without having a clear picture of the
> resulting code so bear with me. I can see some reasons why allocation
> callbacks might benefit from a agragated argument but you seem to touch
> the internal hugetlb dequeue_huge_page_vma which shouldn't really need
> that. I wouldn't mind much but I remember the hugetlb allocation
> functions layering is quite complex for hugetlb specific reasons (see
> 0c397daea1d4 ("mm, hugetlb: further simplify hugetlb allocation API")
> for more background).
>
> Is there any reason why the agregated argument cannot be limited only to
> migration callbacks. That would be alloc_huge_page_node, alloc_huge_page_nodemask
> and alloc_huge_page_vma.

I did it since it's simple for me, but, yes, it's not good to touch
the internal functions.

Anyway, Vlastimil already suggested not to introduce alloc_control for
any hugetlb
functions. I will try it on the next version so the next version would not have
alloc_control in any hugetlb functions.

Thanks.