From: Joonsoo Kim <[email protected]>
There is a user who do not want to use CMA memory for migration. Until
now, it is implemented by caller side but it's not optimal since there
is limited information on caller. This patch implements it on callee side
to get better result.
Acked-by: Mike Kravetz <[email protected]>
Signed-off-by: Joonsoo Kim <[email protected]>
---
include/linux/hugetlb.h | 2 --
mm/gup.c | 9 +++------
mm/hugetlb.c | 21 +++++++++++++++++----
mm/internal.h | 1 +
4 files changed, 21 insertions(+), 12 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f482563..3d05f7d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -503,8 +503,6 @@ struct huge_bootmem_page {
struct hstate *hstate;
};
-struct page *alloc_migrate_huge_page(struct hstate *h,
- struct alloc_control *ac);
struct page *alloc_huge_page_nodemask(struct hstate *h,
struct alloc_control *ac);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
diff --git a/mm/gup.c b/mm/gup.c
index 6b78f11..87eca79 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1617,14 +1617,11 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
struct alloc_control ac = {
.nid = nid,
.nmask = NULL,
- .gfp_mask = gfp_mask,
+ .gfp_mask = __GFP_NOWARN,
+ .skip_cma = true,
};
- /*
- * We don't want to dequeue from the pool because pool pages will
- * mostly be from the CMA region.
- */
- return alloc_migrate_huge_page(h, &ac);
+ return alloc_huge_page_nodemask(h, &ac);
}
if (PageTransHuge(page)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8132985..e465582 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1033,13 +1033,19 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
h->free_huge_pages_node[nid]++;
}
-static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
+static struct page *dequeue_huge_page_node_exact(struct hstate *h,
+ int nid, bool skip_cma)
{
struct page *page;
- list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
+ list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
+ if (skip_cma && is_migrate_cma_page(page))
+ continue;
+
if (!PageHWPoison(page))
break;
+ }
+
/*
* if 'non-isolated free hugepage' not found on the list,
* the allocation fails.
@@ -1080,7 +1086,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h,
continue;
node = zone_to_nid(zone);
- page = dequeue_huge_page_node_exact(h, node);
+ page = dequeue_huge_page_node_exact(h, node, ac->skip_cma);
if (page)
return page;
}
@@ -1937,7 +1943,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}
-struct page *alloc_migrate_huge_page(struct hstate *h,
+static struct page *alloc_migrate_huge_page(struct hstate *h,
struct alloc_control *ac)
{
struct page *page;
@@ -1999,6 +2005,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h,
}
spin_unlock(&hugetlb_lock);
+ /*
+ * clearing __GFP_MOVABLE flag ensure that allocated page
+ * will not come from CMA area
+ */
+ if (ac->skip_cma)
+ ac->gfp_mask &= ~__GFP_MOVABLE;
+
return alloc_migrate_huge_page(h, ac);
}
diff --git a/mm/internal.h b/mm/internal.h
index 6e613ce..159cfd6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -618,6 +618,7 @@ struct alloc_control {
int nid; /* preferred node id */
nodemask_t *nmask;
gfp_t gfp_mask;
+ bool skip_cma;
};
#endif /* __MM_INTERNAL_H */
--
2.7.4
On Wed 27-05-20 15:44:57, Joonsoo Kim wrote:
> From: Joonsoo Kim <[email protected]>
>
> There is a user who do not want to use CMA memory for migration. Until
> now, it is implemented by caller side but it's not optimal since there
> is limited information on caller. This patch implements it on callee side
> to get better result.
I do not follow this changelog and honestly do not see an improvement.
skip_cma in the alloc_control sound like a hack to me. I can now see
why your earlier patch has started to or the given gfp_mask. If anything
this should be folded here. But even then I do not like a partial
gfp_mask (__GFP_NOWARN on its own really has GFP_NOWAIT like semantic).
> Acked-by: Mike Kravetz <[email protected]>
> Signed-off-by: Joonsoo Kim <[email protected]>
> ---
> include/linux/hugetlb.h | 2 --
> mm/gup.c | 9 +++------
> mm/hugetlb.c | 21 +++++++++++++++++----
> mm/internal.h | 1 +
> 4 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index f482563..3d05f7d 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -503,8 +503,6 @@ struct huge_bootmem_page {
> struct hstate *hstate;
> };
>
> -struct page *alloc_migrate_huge_page(struct hstate *h,
> - struct alloc_control *ac);
> struct page *alloc_huge_page_nodemask(struct hstate *h,
> struct alloc_control *ac);
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> diff --git a/mm/gup.c b/mm/gup.c
> index 6b78f11..87eca79 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1617,14 +1617,11 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
> struct alloc_control ac = {
> .nid = nid,
> .nmask = NULL,
> - .gfp_mask = gfp_mask,
> + .gfp_mask = __GFP_NOWARN,
> + .skip_cma = true,
> };
>
> - /*
> - * We don't want to dequeue from the pool because pool pages will
> - * mostly be from the CMA region.
> - */
> - return alloc_migrate_huge_page(h, &ac);
> + return alloc_huge_page_nodemask(h, &ac);
> }
>
> if (PageTransHuge(page)) {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 8132985..e465582 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1033,13 +1033,19 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
> h->free_huge_pages_node[nid]++;
> }
>
> -static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
> +static struct page *dequeue_huge_page_node_exact(struct hstate *h,
> + int nid, bool skip_cma)
> {
> struct page *page;
>
> - list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
> + list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
> + if (skip_cma && is_migrate_cma_page(page))
> + continue;
> +
> if (!PageHWPoison(page))
> break;
> + }
> +
> /*
> * if 'non-isolated free hugepage' not found on the list,
> * the allocation fails.
> @@ -1080,7 +1086,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h,
> continue;
> node = zone_to_nid(zone);
>
> - page = dequeue_huge_page_node_exact(h, node);
> + page = dequeue_huge_page_node_exact(h, node, ac->skip_cma);
> if (page)
> return page;
> }
> @@ -1937,7 +1943,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
> return page;
> }
>
> -struct page *alloc_migrate_huge_page(struct hstate *h,
> +static struct page *alloc_migrate_huge_page(struct hstate *h,
> struct alloc_control *ac)
> {
> struct page *page;
> @@ -1999,6 +2005,13 @@ struct page *alloc_huge_page_nodemask(struct hstate *h,
> }
> spin_unlock(&hugetlb_lock);
>
> + /*
> + * clearing __GFP_MOVABLE flag ensure that allocated page
> + * will not come from CMA area
> + */
> + if (ac->skip_cma)
> + ac->gfp_mask &= ~__GFP_MOVABLE;
> +
> return alloc_migrate_huge_page(h, ac);
> }
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 6e613ce..159cfd6 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -618,6 +618,7 @@ struct alloc_control {
> int nid; /* preferred node id */
> nodemask_t *nmask;
> gfp_t gfp_mask;
> + bool skip_cma;
> };
>
> #endif /* __MM_INTERNAL_H */
> --
> 2.7.4
>
--
Michal Hocko
SUSE Labs
2020년 6월 9일 (화) 오후 10:53, Michal Hocko <[email protected]>님이 작성:
>
> On Wed 27-05-20 15:44:57, Joonsoo Kim wrote:
> > From: Joonsoo Kim <[email protected]>
> >
> > There is a user who do not want to use CMA memory for migration. Until
> > now, it is implemented by caller side but it's not optimal since there
> > is limited information on caller. This patch implements it on callee side
> > to get better result.
>
> I do not follow this changelog and honestly do not see an improvement.
> skip_cma in the alloc_control sound like a hack to me. I can now see
new_non_cma_page() want to allocate the new page that is not on the
CMA area. new_non_cma_page() implements it by not specifying
__GFP_MOVALBE mask or removing this mask.
hugetlb page allocation has two steps. First is dequeing from the pool. And,
if there is no available page on the pool, allocating from the page allocator.
new_non_cma_page() can control allocating from the page allocator in hugetlb
via the gfp flags. However, dequeing cannot be controlled by this way so it
skips dequeing completely. This is why new_non_cma_page() uses
alloc_migrate_huge_page() instead of alloc_huge_page_nodemask().
My patch makes hugetlb code CMA aware so that new_non_cma_page()
can get the benefit of the hugetlb pool.
> why your earlier patch has started to or the given gfp_mask. If anything
> this should be folded here. But even then I do not like a partial
> gfp_mask (__GFP_NOWARN on its own really has GFP_NOWAIT like semantic).
Will not use partial gfp_mask.
Thanks.