2022-09-05 06:21:34

by Cheng Li

[permalink] [raw]
Subject: [PATCH v2] mm: use mem_map_offset instead of mem_map_next

To handle discontiguity case, mem_map_next() has a parameter named
`offset`. As a function caller, one would be confused why "get
next entry" needs a parameter named "offset". The other drawback of
mem_map_next() is that the callers must take care of the map between
parameter "iter" and "offset", otherwise we may get an hole or
duplication during iteration. So we use mem_map_offset instead of
mem_map_next.

Signed-off-by: Cheng Li <[email protected]>
Fixes: 69d177c2fc70 ("hugetlbfs: handle pages higher order than MAX_ORDER")
---

Notes:
v2:
- fix build error

mm/hugetlb.c | 25 +++++++++++++++----------
mm/internal.h | 16 ++--------------
mm/memory.c | 21 ++++++++++-----------
3 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e070b8593b37..a9592f69bf82 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1308,12 +1308,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
{
int i;
int nr_pages = 1 << order;
- struct page *p = page + 1;
+ struct page *p;

atomic_set(compound_mapcount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);

- for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+ for (i = 1; i < nr_pages; i++) {
+ p = mem_map_offset(page, i);
p->mapping = NULL;
clear_compound_head(p);
if (!demote)
@@ -1530,7 +1531,7 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
static void __update_and_free_page(struct hstate *h, struct page *page)
{
int i;
- struct page *subpage = page;
+ struct page *subpage;

if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
@@ -1561,8 +1562,8 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
if (unlikely(PageHWPoison(page)))
hugetlb_clear_page_hwpoison(page);

- for (i = 0; i < pages_per_huge_page(h);
- i++, subpage = mem_map_next(subpage, page, i)) {
+ for (i = 0; i < pages_per_huge_page(h); i++) {
+ subpage = mem_map_offset(page, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
1 << PG_active | 1 << PG_private |
@@ -1769,13 +1770,15 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
{
int i, j;
int nr_pages = 1 << order;
- struct page *p = page + 1;
+ struct page *p;

/* we rely on prep_new_huge_page to set the destructor */
set_compound_order(page, order);
__ClearPageReserved(page);
__SetPageHead(page);
- for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+ for (i = 1; i < nr_pages; i++) {
+ p = mem_map_offset(page, i);
+
/*
* For gigantic hugepages allocated through bootmem at
* boot, it's safer to be consistent with the not-gigantic
@@ -1822,14 +1825,16 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,

out_error:
/* undo tail page modifications made above */
- p = page + 1;
- for (j = 1; j < i; j++, p = mem_map_next(p, page, j)) {
+ for (j = 1; j < i; j++) {
+ p = mem_map_offset(page, j);
clear_compound_head(p);
set_page_refcounted(p);
}
/* need to clear PG_reserved on remaining tail pages */
- for (; j < nr_pages; j++, p = mem_map_next(p, page, j))
+ for (; j < nr_pages; j++) {
+ p = mem_map_offset(page, j);
__ClearPageReserved(p);
+ }
set_compound_order(page, 0);
#ifdef CONFIG_64BIT
page[1].compound_nr = 0;
diff --git a/mm/internal.h b/mm/internal.h
index 785409805ed7..1012a305a60f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -646,25 +646,13 @@ static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
*/
static inline struct page *mem_map_offset(struct page *base, int offset)
{
- if (unlikely(offset >= MAX_ORDER_NR_PAGES))
- return nth_page(base, offset);
- return base + offset;
-}
-
-/*
- * Iterator over all subpages within the maximally aligned gigantic
- * page 'base'. Handle any discontiguity in the mem_map.
- */
-static inline struct page *mem_map_next(struct page *iter,
- struct page *base, int offset)
-{
- if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
+ if (unlikely(offset >= MAX_ORDER_NR_PAGES)) {
unsigned long pfn = page_to_pfn(base) + offset;
if (!pfn_valid(pfn))
return NULL;
return pfn_to_page(pfn);
}
- return iter + 1;
+ return base + offset;
}

/* Memory initialisation debug and verification */
diff --git a/mm/memory.c b/mm/memory.c
index 4ba73f5aa8bb..32179c4fd1a5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5637,11 +5637,11 @@ static void clear_gigantic_page(struct page *page,
unsigned int pages_per_huge_page)
{
int i;
- struct page *p = page;
+ struct page *p;

might_sleep();
- for (i = 0; i < pages_per_huge_page;
- i++, p = mem_map_next(p, page, i)) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ p = mem_map_offset(page, i);
cond_resched();
clear_user_highpage(p, addr + i * PAGE_SIZE);
}
@@ -5677,13 +5677,12 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
struct page *dst_base = dst;
struct page *src_base = src;

- for (i = 0; i < pages_per_huge_page; ) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ dst = mem_map_offset(dst_base, i);
+ src = mem_map_offset(src_base, i);
+
cond_resched();
copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
-
- i++;
- dst = mem_map_next(dst, dst_base, i);
- src = mem_map_next(src, src_base, i);
}
}

@@ -5730,10 +5729,10 @@ long copy_huge_page_from_user(struct page *dst_page,
void *page_kaddr;
unsigned long i, rc = 0;
unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
- struct page *subpage = dst_page;
+ struct page *subpage;

- for (i = 0; i < pages_per_huge_page;
- i++, subpage = mem_map_next(subpage, dst_page, i)) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ subpage = mem_map_offset(dst_page, i);
if (allow_pagefault)
page_kaddr = kmap(subpage);
else
--
1.8.3.1


2022-09-05 15:58:06

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v2] mm: use mem_map_offset instead of mem_map_next

On Mon, Sep 05, 2022 at 06:09:19AM +0000, Cheng Li wrote:
> To handle discontiguity case, mem_map_next() has a parameter named
> `offset`. As a function caller, one would be confused why "get
> next entry" needs a parameter named "offset". The other drawback of
> mem_map_next() is that the callers must take care of the map between
> parameter "iter" and "offset", otherwise we may get an hole or
> duplication during iteration. So we use mem_map_offset instead of
> mem_map_next.

Per my previous email, use nth_page(), not mem_map_offset().

2022-09-06 18:37:18

by Mike Kravetz

[permalink] [raw]
Subject: Re: [PATCH v2] mm: use mem_map_offset instead of mem_map_next

On 09/05/22 06:09, Cheng Li wrote:
> To handle discontiguity case, mem_map_next() has a parameter named
> `offset`. As a function caller, one would be confused why "get
> next entry" needs a parameter named "offset". The other drawback of
> mem_map_next() is that the callers must take care of the map between
> parameter "iter" and "offset", otherwise we may get an hole or
> duplication during iteration. So we use mem_map_offset instead of
> mem_map_next.
>
> Signed-off-by: Cheng Li <[email protected]>
> Fixes: 69d177c2fc70 ("hugetlbfs: handle pages higher order than MAX_ORDER")

The Fixes tag implies there is a user visible bug. I do not believe this is
the case here. Is there a user visible bug?
--
Mike Kravetz

> ---
>
> Notes:
> v2:
> - fix build error
>
> mm/hugetlb.c | 25 +++++++++++++++----------
> mm/internal.h | 16 ++--------------
> mm/memory.c | 21 ++++++++++-----------
> 3 files changed, 27 insertions(+), 35 deletions(-)
>
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index e070b8593b37..a9592f69bf82 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1308,12 +1308,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
> {
> int i;
> int nr_pages = 1 << order;
> - struct page *p = page + 1;
> + struct page *p;
>
> atomic_set(compound_mapcount_ptr(page), 0);
> atomic_set(compound_pincount_ptr(page), 0);
>
> - for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
> + for (i = 1; i < nr_pages; i++) {
> + p = mem_map_offset(page, i);
> p->mapping = NULL;
> clear_compound_head(p);
> if (!demote)
> @@ -1530,7 +1531,7 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
> static void __update_and_free_page(struct hstate *h, struct page *page)
> {
> int i;
> - struct page *subpage = page;
> + struct page *subpage;
>
> if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
> return;
> @@ -1561,8 +1562,8 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
> if (unlikely(PageHWPoison(page)))
> hugetlb_clear_page_hwpoison(page);
>
> - for (i = 0; i < pages_per_huge_page(h);
> - i++, subpage = mem_map_next(subpage, page, i)) {
> + for (i = 0; i < pages_per_huge_page(h); i++) {
> + subpage = mem_map_offset(page, i);
> subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
> 1 << PG_referenced | 1 << PG_dirty |
> 1 << PG_active | 1 << PG_private |
> @@ -1769,13 +1770,15 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
> {
> int i, j;
> int nr_pages = 1 << order;
> - struct page *p = page + 1;
> + struct page *p;
>
> /* we rely on prep_new_huge_page to set the destructor */
> set_compound_order(page, order);
> __ClearPageReserved(page);
> __SetPageHead(page);
> - for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
> + for (i = 1; i < nr_pages; i++) {
> + p = mem_map_offset(page, i);
> +
> /*
> * For gigantic hugepages allocated through bootmem at
> * boot, it's safer to be consistent with the not-gigantic
> @@ -1822,14 +1825,16 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
>
> out_error:
> /* undo tail page modifications made above */
> - p = page + 1;
> - for (j = 1; j < i; j++, p = mem_map_next(p, page, j)) {
> + for (j = 1; j < i; j++) {
> + p = mem_map_offset(page, j);
> clear_compound_head(p);
> set_page_refcounted(p);
> }
> /* need to clear PG_reserved on remaining tail pages */
> - for (; j < nr_pages; j++, p = mem_map_next(p, page, j))
> + for (; j < nr_pages; j++) {
> + p = mem_map_offset(page, j);
> __ClearPageReserved(p);
> + }
> set_compound_order(page, 0);
> #ifdef CONFIG_64BIT
> page[1].compound_nr = 0;
> diff --git a/mm/internal.h b/mm/internal.h
> index 785409805ed7..1012a305a60f 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -646,25 +646,13 @@ static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
> */
> static inline struct page *mem_map_offset(struct page *base, int offset)
> {
> - if (unlikely(offset >= MAX_ORDER_NR_PAGES))
> - return nth_page(base, offset);
> - return base + offset;
> -}
> -
> -/*
> - * Iterator over all subpages within the maximally aligned gigantic
> - * page 'base'. Handle any discontiguity in the mem_map.
> - */
> -static inline struct page *mem_map_next(struct page *iter,
> - struct page *base, int offset)
> -{
> - if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
> + if (unlikely(offset >= MAX_ORDER_NR_PAGES)) {
> unsigned long pfn = page_to_pfn(base) + offset;
> if (!pfn_valid(pfn))
> return NULL;
> return pfn_to_page(pfn);
> }
> - return iter + 1;
> + return base + offset;
> }
>
> /* Memory initialisation debug and verification */
> diff --git a/mm/memory.c b/mm/memory.c
> index 4ba73f5aa8bb..32179c4fd1a5 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -5637,11 +5637,11 @@ static void clear_gigantic_page(struct page *page,
> unsigned int pages_per_huge_page)
> {
> int i;
> - struct page *p = page;
> + struct page *p;
>
> might_sleep();
> - for (i = 0; i < pages_per_huge_page;
> - i++, p = mem_map_next(p, page, i)) {
> + for (i = 0; i < pages_per_huge_page; i++) {
> + p = mem_map_offset(page, i);
> cond_resched();
> clear_user_highpage(p, addr + i * PAGE_SIZE);
> }
> @@ -5677,13 +5677,12 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
> struct page *dst_base = dst;
> struct page *src_base = src;
>
> - for (i = 0; i < pages_per_huge_page; ) {
> + for (i = 0; i < pages_per_huge_page; i++) {
> + dst = mem_map_offset(dst_base, i);
> + src = mem_map_offset(src_base, i);
> +
> cond_resched();
> copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
> -
> - i++;
> - dst = mem_map_next(dst, dst_base, i);
> - src = mem_map_next(src, src_base, i);
> }
> }
>
> @@ -5730,10 +5729,10 @@ long copy_huge_page_from_user(struct page *dst_page,
> void *page_kaddr;
> unsigned long i, rc = 0;
> unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
> - struct page *subpage = dst_page;
> + struct page *subpage;
>
> - for (i = 0; i < pages_per_huge_page;
> - i++, subpage = mem_map_next(subpage, dst_page, i)) {
> + for (i = 0; i < pages_per_huge_page; i++) {
> + subpage = mem_map_offset(dst_page, i);
> if (allow_pagefault)
> page_kaddr = kmap(subpage);
> else
> --
> 1.8.3.1
>

2022-09-07 00:34:18

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH v2] mm: use mem_map_offset instead of mem_map_next

On Tue, 6 Sep 2022 10:07:03 -0700 Mike Kravetz <[email protected]> wrote:

> On 09/05/22 06:09, Cheng Li wrote:
> > To handle discontiguity case, mem_map_next() has a parameter named
> > `offset`. As a function caller, one would be confused why "get
> > next entry" needs a parameter named "offset". The other drawback of
> > mem_map_next() is that the callers must take care of the map between
> > parameter "iter" and "offset", otherwise we may get an hole or
> > duplication during iteration. So we use mem_map_offset instead of
> > mem_map_next.
> >
> > Signed-off-by: Cheng Li <[email protected]>
> > Fixes: 69d177c2fc70 ("hugetlbfs: handle pages higher order than MAX_ORDER")
>
> The Fixes tag implies there is a user visible bug. I do not believe this is
> the case here. Is there a user visible bug?

A Fixes: with a cc:stable would indicate a user-visible bug. But IMO a
bare Fixes: is simply a when-to-stop guide to backporters - a
convenience. And, I suppose, it has some documentation benefit.

And if people are really that interested, they can read the dang
changelog ;)

2022-09-07 01:47:20

by Cheng Li

[permalink] [raw]
Subject: Re: [PATCH v2] mm: use mem_map_offset instead of mem_map_next

On Tue, Sep 06, 2022 at 05:10:48PM -0700, Andrew Morton wrote:
> On Tue, 6 Sep 2022 10:07:03 -0700 Mike Kravetz <[email protected]> wrote:
>
> > On 09/05/22 06:09, Cheng Li wrote:
> > > To handle discontiguity case, mem_map_next() has a parameter named
> > > `offset`. As a function caller, one would be confused why "get
> > > next entry" needs a parameter named "offset". The other drawback of
> > > mem_map_next() is that the callers must take care of the map between
> > > parameter "iter" and "offset", otherwise we may get an hole or
> > > duplication during iteration. So we use mem_map_offset instead of
> > > mem_map_next.
> > >
> > > Signed-off-by: Cheng Li <[email protected]>
> > > Fixes: 69d177c2fc70 ("hugetlbfs: handle pages higher order than MAX_ORDER")
> >
> > The Fixes tag implies there is a user visible bug. I do not believe this is
> > the case here. Is there a user visible bug?
>
> A Fixes: with a cc:stable would indicate a user-visible bug. But IMO a
> bare Fixes: is simply a when-to-stop guide to backporters - a
> convenience. And, I suppose, it has some documentation benefit.
>
> And if people are really that interested, they can read the dang
> changelog ;)
>

Thank you for the reviews and the "Fixes" tag tips.

So seems we are agrenment on replacing mem_map_offset() with nth_page().
I may need to send the version 3 :)

I learnt the "Fixes" tag usage from this kenrel contribution guide.[1]

[1]
https://www.kernel.org/doc/html/v4.17/process/submitting-patches.html#using-reported-by-tested-by-reviewed-by-suggested-by-and-fixes