2022-09-04 15:27:45

by Cheng Li

[permalink] [raw]
Subject: [PATCH] mm: use mem_map_offset instead of mem_map_next

To handle discontiguity case, mem_map_next() has a parameter named
`offset`. As a function caller, one would be confused why "get
next entry" needs a parameter named "offset". The other drawback of
mem_map_next() is that the callers must take care of the map between
parameter "iter" and "offset", otherwise we may get an hole or
duplication during iteration. So we use mem_map_offset instead of
mem_map_next.

Signed-off-by: Cheng Li <[email protected]>
Fixes: 69d177c2fc70 ("hugetlbfs: handle pages higher order than MAX_ORDER")
---
mm/hugetlb.c | 25 +++++++++++++++----------
mm/internal.h | 16 ++--------------
mm/memory.c | 21 ++++++++++-----------
3 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e070b8593b37..56d74cdbdbf9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1308,12 +1308,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
{
int i;
int nr_pages = 1 << order;
- struct page *p = page + 1;
+ struct page *p;

atomic_set(compound_mapcount_ptr(page), 0);
atomic_set(compound_pincount_ptr(page), 0);

- for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+ for (i = 1; i < nr_pages; i++) {
+ p = mem_map_offset(page, i);
p->mapping = NULL;
clear_compound_head(p);
if (!demote)
@@ -1530,7 +1531,7 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
static void __update_and_free_page(struct hstate *h, struct page *page)
{
int i;
- struct page *subpage = page;
+ struct page *subpage;

if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
@@ -1561,8 +1562,8 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
if (unlikely(PageHWPoison(page)))
hugetlb_clear_page_hwpoison(page);

- for (i = 0; i < pages_per_huge_page(h);
- i++, subpage = mem_map_next(subpage, page, i)) {
+ for (i = 0; i < pages_per_huge_page(h); i++) {
+ subpage = mem_map_offset(page, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_referenced | 1 << PG_dirty |
1 << PG_active | 1 << PG_private |
@@ -1769,13 +1770,15 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
{
int i, j;
int nr_pages = 1 << order;
- struct page *p = page + 1;
+ struct page *p;

/* we rely on prep_new_huge_page to set the destructor */
set_compound_order(page, order);
__ClearPageReserved(page);
__SetPageHead(page);
- for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
+ for (i = 1; i < nr_pages; i++) {
+ p = mem_map_offset(p, page, i);
+
/*
* For gigantic hugepages allocated through bootmem at
* boot, it's safer to be consistent with the not-gigantic
@@ -1822,14 +1825,16 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,

out_error:
/* undo tail page modifications made above */
- p = page + 1;
- for (j = 1; j < i; j++, p = mem_map_next(p, page, j)) {
+ for (j = 1; j < i; j++) {
+ p = mem_map_offset(page, j);
clear_compound_head(p);
set_page_refcounted(p);
}
/* need to clear PG_reserved on remaining tail pages */
- for (; j < nr_pages; j++, p = mem_map_next(p, page, j))
+ for (; j < nr_pages; j++) {
+ p = mem_map_offset(page, j);
__ClearPageReserved(p);
+ }
set_compound_order(page, 0);
#ifdef CONFIG_64BIT
page[1].compound_nr = 0;
diff --git a/mm/internal.h b/mm/internal.h
index 785409805ed7..1012a305a60f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -646,25 +646,13 @@ static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
*/
static inline struct page *mem_map_offset(struct page *base, int offset)
{
- if (unlikely(offset >= MAX_ORDER_NR_PAGES))
- return nth_page(base, offset);
- return base + offset;
-}
-
-/*
- * Iterator over all subpages within the maximally aligned gigantic
- * page 'base'. Handle any discontiguity in the mem_map.
- */
-static inline struct page *mem_map_next(struct page *iter,
- struct page *base, int offset)
-{
- if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) {
+ if (unlikely(offset >= MAX_ORDER_NR_PAGES)) {
unsigned long pfn = page_to_pfn(base) + offset;
if (!pfn_valid(pfn))
return NULL;
return pfn_to_page(pfn);
}
- return iter + 1;
+ return base + offset;
}

/* Memory initialisation debug and verification */
diff --git a/mm/memory.c b/mm/memory.c
index 4ba73f5aa8bb..32179c4fd1a5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5637,11 +5637,11 @@ static void clear_gigantic_page(struct page *page,
unsigned int pages_per_huge_page)
{
int i;
- struct page *p = page;
+ struct page *p;

might_sleep();
- for (i = 0; i < pages_per_huge_page;
- i++, p = mem_map_next(p, page, i)) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ p = mem_map_offset(page, i);
cond_resched();
clear_user_highpage(p, addr + i * PAGE_SIZE);
}
@@ -5677,13 +5677,12 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
struct page *dst_base = dst;
struct page *src_base = src;

- for (i = 0; i < pages_per_huge_page; ) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ dst = mem_map_offset(dst_base, i);
+ src = mem_map_offset(src_base, i);
+
cond_resched();
copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
-
- i++;
- dst = mem_map_next(dst, dst_base, i);
- src = mem_map_next(src, src_base, i);
}
}

@@ -5730,10 +5729,10 @@ long copy_huge_page_from_user(struct page *dst_page,
void *page_kaddr;
unsigned long i, rc = 0;
unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
- struct page *subpage = dst_page;
+ struct page *subpage;

- for (i = 0; i < pages_per_huge_page;
- i++, subpage = mem_map_next(subpage, dst_page, i)) {
+ for (i = 0; i < pages_per_huge_page; i++) {
+ subpage = mem_map_offset(dst_page, i);
if (allow_pagefault)
page_kaddr = kmap(subpage);
else
--
1.8.3.1


2022-09-04 17:19:20

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] mm: use mem_map_offset instead of mem_map_next

Hi Cheng,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]

url: https://github.com/intel-lab-lkp/linux/commits/Cheng-Li/mm-use-mem_map_offset-instead-of-mem_map_next/20220904-220520
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: i386-randconfig-a005 (https://download.01.org/0day-ci/archive/20220905/[email protected]/config)
compiler: gcc-11 (Debian 11.3.0-5) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/18a9446796efe2ae164f38013cbd4272a6b89cb1
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Cheng-Li/mm-use-mem_map_offset-instead-of-mem_map_next/20220904-220520
git checkout 18a9446796efe2ae164f38013cbd4272a6b89cb1
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All error/warnings (new ones prefixed by >>):

mm/hugetlb.c: In function '__prep_compound_gigantic_page':
>> mm/hugetlb.c:1798:39: warning: passing argument 2 of 'mem_map_offset' makes integer from pointer without a cast [-Wint-conversion]
1798 | p = mem_map_offset(p, page, i);
| ^~~~
| |
| struct page *
In file included from mm/hugetlb.c:47:
mm/internal.h:646:66: note: expected 'int' but argument is of type 'struct page *'
646 | static inline struct page *mem_map_offset(struct page *base, int offset)
| ~~~~^~~~~~
>> mm/hugetlb.c:1798:21: error: too many arguments to function 'mem_map_offset'
1798 | p = mem_map_offset(p, page, i);
| ^~~~~~~~~~~~~~
In file included from mm/hugetlb.c:47:
mm/internal.h:646:28: note: declared here
646 | static inline struct page *mem_map_offset(struct page *base, int offset)
| ^~~~~~~~~~~~~~


vim +/mem_map_offset +1798 mm/hugetlb.c

1785
1786 static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
1787 bool demote)
1788 {
1789 int i, j;
1790 int nr_pages = 1 << order;
1791 struct page *p;
1792
1793 /* we rely on prep_new_huge_page to set the destructor */
1794 set_compound_order(page, order);
1795 __ClearPageReserved(page);
1796 __SetPageHead(page);
1797 for (i = 1; i < nr_pages; i++) {
> 1798 p = mem_map_offset(p, page, i);
1799
1800 /*
1801 * For gigantic hugepages allocated through bootmem at
1802 * boot, it's safer to be consistent with the not-gigantic
1803 * hugepages and clear the PG_reserved bit from all tail pages
1804 * too. Otherwise drivers using get_user_pages() to access tail
1805 * pages may get the reference counting wrong if they see
1806 * PG_reserved set on a tail page (despite the head page not
1807 * having PG_reserved set). Enforcing this consistency between
1808 * head and tail pages allows drivers to optimize away a check
1809 * on the head page when they need know if put_page() is needed
1810 * after get_user_pages().
1811 */
1812 __ClearPageReserved(p);
1813 /*
1814 * Subtle and very unlikely
1815 *
1816 * Gigantic 'page allocators' such as memblock or cma will
1817 * return a set of pages with each page ref counted. We need
1818 * to turn this set of pages into a compound page with tail
1819 * page ref counts set to zero. Code such as speculative page
1820 * cache adding could take a ref on a 'to be' tail page.
1821 * We need to respect any increased ref count, and only set
1822 * the ref count to zero if count is currently 1. If count
1823 * is not 1, we return an error. An error return indicates
1824 * the set of pages can not be converted to a gigantic page.
1825 * The caller who allocated the pages should then discard the
1826 * pages using the appropriate free interface.
1827 *
1828 * In the case of demote, the ref count will be zero.
1829 */
1830 if (!demote) {
1831 if (!page_ref_freeze(p, 1)) {
1832 pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n");
1833 goto out_error;
1834 }
1835 } else {
1836 VM_BUG_ON_PAGE(page_count(p), p);
1837 }
1838 set_compound_head(p, page);
1839 }
1840 atomic_set(compound_mapcount_ptr(page), -1);
1841 atomic_set(compound_pincount_ptr(page), 0);
1842 return true;
1843
1844 out_error:
1845 /* undo tail page modifications made above */
1846 for (j = 1; j < i; j++) {
1847 p = mem_map_offset(page, j);
1848 clear_compound_head(p);
1849 set_page_refcounted(p);
1850 }
1851 /* need to clear PG_reserved on remaining tail pages */
1852 for (; j < nr_pages; j++) {
1853 p = mem_map_offset(page, j);
1854 __ClearPageReserved(p);
1855 }
1856 set_compound_order(page, 0);
1857 #ifdef CONFIG_64BIT
1858 page[1].compound_nr = 0;
1859 #endif
1860 __ClearPageHead(page);
1861 return false;
1862 }
1863

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-09-04 17:23:25

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] mm: use mem_map_offset instead of mem_map_next

Hi Cheng,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on akpm-mm/mm-everything]

url: https://github.com/intel-lab-lkp/linux/commits/Cheng-Li/mm-use-mem_map_offset-instead-of-mem_map_next/20220904-220520
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: i386-randconfig-a002
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/18a9446796efe2ae164f38013cbd4272a6b89cb1
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Cheng-Li/mm-use-mem_map_offset-instead-of-mem_map_next/20220904-220520
git checkout 18a9446796efe2ae164f38013cbd4272a6b89cb1
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> mm/hugetlb.c:1798:31: error: too many arguments to function call, expected 2, have 3
p = mem_map_offset(p, page, i);
~~~~~~~~~~~~~~ ^
mm/internal.h:646:28: note: 'mem_map_offset' declared here
static inline struct page *mem_map_offset(struct page *base, int offset)
^
1 error generated.


vim +1798 mm/hugetlb.c

1785
1786 static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
1787 bool demote)
1788 {
1789 int i, j;
1790 int nr_pages = 1 << order;
1791 struct page *p;
1792
1793 /* we rely on prep_new_huge_page to set the destructor */
1794 set_compound_order(page, order);
1795 __ClearPageReserved(page);
1796 __SetPageHead(page);
1797 for (i = 1; i < nr_pages; i++) {
> 1798 p = mem_map_offset(p, page, i);
1799
1800 /*
1801 * For gigantic hugepages allocated through bootmem at
1802 * boot, it's safer to be consistent with the not-gigantic
1803 * hugepages and clear the PG_reserved bit from all tail pages
1804 * too. Otherwise drivers using get_user_pages() to access tail
1805 * pages may get the reference counting wrong if they see
1806 * PG_reserved set on a tail page (despite the head page not
1807 * having PG_reserved set). Enforcing this consistency between
1808 * head and tail pages allows drivers to optimize away a check
1809 * on the head page when they need know if put_page() is needed
1810 * after get_user_pages().
1811 */
1812 __ClearPageReserved(p);
1813 /*
1814 * Subtle and very unlikely
1815 *
1816 * Gigantic 'page allocators' such as memblock or cma will
1817 * return a set of pages with each page ref counted. We need
1818 * to turn this set of pages into a compound page with tail
1819 * page ref counts set to zero. Code such as speculative page
1820 * cache adding could take a ref on a 'to be' tail page.
1821 * We need to respect any increased ref count, and only set
1822 * the ref count to zero if count is currently 1. If count
1823 * is not 1, we return an error. An error return indicates
1824 * the set of pages can not be converted to a gigantic page.
1825 * The caller who allocated the pages should then discard the
1826 * pages using the appropriate free interface.
1827 *
1828 * In the case of demote, the ref count will be zero.
1829 */
1830 if (!demote) {
1831 if (!page_ref_freeze(p, 1)) {
1832 pr_warn("HugeTLB page can not be used due to unexpected inflated ref count\n");
1833 goto out_error;
1834 }
1835 } else {
1836 VM_BUG_ON_PAGE(page_count(p), p);
1837 }
1838 set_compound_head(p, page);
1839 }
1840 atomic_set(compound_mapcount_ptr(page), -1);
1841 atomic_set(compound_pincount_ptr(page), 0);
1842 return true;
1843
1844 out_error:
1845 /* undo tail page modifications made above */
1846 for (j = 1; j < i; j++) {
1847 p = mem_map_offset(page, j);
1848 clear_compound_head(p);
1849 set_page_refcounted(p);
1850 }
1851 /* need to clear PG_reserved on remaining tail pages */
1852 for (; j < nr_pages; j++) {
1853 p = mem_map_offset(page, j);
1854 __ClearPageReserved(p);
1855 }
1856 set_compound_order(page, 0);
1857 #ifdef CONFIG_64BIT
1858 page[1].compound_nr = 0;
1859 #endif
1860 __ClearPageHead(page);
1861 return false;
1862 }
1863

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (4.92 kB)
config (166.94 kB)
Download all attachments

2022-09-04 21:22:45

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH] mm: use mem_map_offset instead of mem_map_next

On Sun, Sep 04, 2022 at 02:02:41PM +0000, Cheng Li wrote:
> To handle discontiguity case, mem_map_next() has a parameter named
> `offset`. As a function caller, one would be confused why "get
> next entry" needs a parameter named "offset". The other drawback of
> mem_map_next() is that the callers must take care of the map between
> parameter "iter" and "offset", otherwise we may get an hole or
> duplication during iteration. So we use mem_map_offset instead of
> mem_map_next.

I think we should go further and get rid of mem_map_offset().
nth_page() is now more efficient than mem_map_offset().

2022-09-06 18:13:54

by Mike Kravetz

[permalink] [raw]
Subject: Re: [PATCH] mm: use mem_map_offset instead of mem_map_next

On 09/04/22 22:15, Matthew Wilcox wrote:
> On Sun, Sep 04, 2022 at 02:02:41PM +0000, Cheng Li wrote:
> > To handle discontiguity case, mem_map_next() has a parameter named
> > `offset`. As a function caller, one would be confused why "get
> > next entry" needs a parameter named "offset". The other drawback of
> > mem_map_next() is that the callers must take care of the map between
> > parameter "iter" and "offset", otherwise we may get an hole or
> > duplication during iteration. So we use mem_map_offset instead of
> > mem_map_next.
>
> I think we should go further and get rid of mem_map_offset().
> nth_page() is now more efficient than mem_map_offset().

Agree.

However, IIUC nth_page() will 'almost' always be more efficient. The
only exception is unlikely configuration where CONFIG_SPARSEMEM &&
!CONFIG_SPARSEMEM_VMEMMAP. Correct?

Not arguing against replacement. Just wanting to refresh my memory.
--
Mike Kravetz