2023-03-14 13:20:18

by zhangpeng (AS)

[permalink] [raw]
Subject: [PATCH v2 0/3] userfaultfd: convert userfaultfd functions to use folios

From: ZhangPeng <[email protected]>

This patch series converts several userfaultfd functions to use folios.
And this series pass the userfaultfd selftests and the LTP userfaultfd
test cases.

Change log:

v1->v2:
In patch 2:
- Rename copy_large_folio_from_user() to copy_folio_from_user().
- Delete the inner_folio.
- kmap() and kmap_atomic() are converted to kmap_local_page(). Use
pagefault_disable() to ensure that a deadlock will not occur.
- flush_dcache_folio() is placed outside the loop.

ZhangPeng (3):
userfaultfd: convert mcopy_atomic_pte() to use a folio
userfaultfd: convert __mcopy_atomic_hugetlb() to use a folio
userfaultfd: convert __mcopy_atomic() to use a folio

include/linux/hugetlb.h | 4 +--
include/linux/mm.h | 3 +-
include/linux/shmem_fs.h | 2 +-
mm/hugetlb.c | 25 +++++++-------
mm/memory.c | 27 +++++++--------
mm/shmem.c | 17 +++++----
mm/userfaultfd.c | 74 +++++++++++++++++++---------------------
7 files changed, 72 insertions(+), 80 deletions(-)

--
2.25.1



2023-03-14 13:20:31

by zhangpeng (AS)

[permalink] [raw]
Subject: [PATCH v2 3/3] userfaultfd: convert __mcopy_atomic() to use a folio

From: ZhangPeng <[email protected]>

Convert mcopy_atomic_pte(), shmem_mfill_atomic_pte() and
mfill_atomic_pte() to pass in a folio pointer.
Convert __mcopy_atomic() to use a folio.

Signed-off-by: ZhangPeng <[email protected]>
---
include/linux/shmem_fs.h | 2 +-
mm/shmem.c | 17 ++++++++---------
mm/userfaultfd.c | 35 +++++++++++++++++------------------
3 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 103d1000a5a2..580af0e3bf02 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -156,7 +156,7 @@ extern int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
unsigned long dst_addr,
unsigned long src_addr,
bool zeropage, bool wp_copy,
- struct page **pagep);
+ struct folio **foliop);
#else /* !CONFIG_SHMEM */
#define shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \
src_addr, zeropage, wp_copy, pagep) ({ BUG(); 0; })
diff --git a/mm/shmem.c b/mm/shmem.c
index 448f393d8ab2..986267bf34ef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2421,7 +2421,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
unsigned long dst_addr,
unsigned long src_addr,
bool zeropage, bool wp_copy,
- struct page **pagep)
+ struct folio **foliop)
{
struct inode *inode = file_inode(dst_vma->vm_file);
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -2439,14 +2439,14 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
* and now we find ourselves with -ENOMEM. Release the page, to
* avoid a BUG_ON in our caller.
*/
- if (unlikely(*pagep)) {
- put_page(*pagep);
- *pagep = NULL;
+ if (unlikely(*foliop)) {
+ folio_put(*foliop);
+ *foliop = NULL;
}
return -ENOMEM;
}

- if (!*pagep) {
+ if (!*foliop) {
ret = -ENOMEM;
folio = shmem_alloc_folio(gfp, info, pgoff);
if (!folio)
@@ -2478,7 +2478,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,

/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
- *pagep = &folio->page;
+ *foliop = folio;
ret = -ENOENT;
/* don't free the page */
goto out_unacct_blocks;
@@ -2489,9 +2489,8 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
clear_user_highpage(&folio->page, dst_addr);
}
} else {
- folio = page_folio(*pagep);
- VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
- *pagep = NULL;
+ VM_BUG_ON_FOLIO(folio_test_large(*foliop), folio);
+ *foliop = NULL;
}

VM_BUG_ON(folio_test_locked(folio));
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index a7da26cdb731..cfe2d527f1c0 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -132,14 +132,14 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
- struct page **pagep,
+ struct folio **foliop,
bool wp_copy)
{
void *page_kaddr;
int ret;
struct folio *folio;

- if (!*pagep) {
+ if (!*foliop) {
ret = -ENOMEM;
folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma, dst_addr, false);
if (!folio)
@@ -171,16 +171,15 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
ret = -ENOENT;
- *pagep = &folio->page;
+ *foliop = folio;
/* don't free the page */
goto out;
}

flush_dcache_folio(folio);
} else {
- folio = page_folio(*pagep);
- VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
- *pagep = NULL;
+ VM_BUG_ON_FOLIO(folio_test_large(*foliop), folio);
+ *foliop = NULL;
}

/*
@@ -476,7 +475,7 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
- struct page **page,
+ struct folio **foliop,
enum mcopy_atomic_mode mode,
bool wp_copy)
{
@@ -500,7 +499,7 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
if (!(dst_vma->vm_flags & VM_SHARED)) {
if (mode == MCOPY_ATOMIC_NORMAL)
err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
- dst_addr, src_addr, page,
+ dst_addr, src_addr, foliop,
wp_copy);
else
err = mfill_zeropage_pte(dst_mm, dst_pmd,
@@ -509,7 +508,7 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
err = shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma,
dst_addr, src_addr,
mode != MCOPY_ATOMIC_NORMAL,
- wp_copy, page);
+ wp_copy, foliop);
}

return err;
@@ -528,7 +527,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
pmd_t *dst_pmd;
unsigned long src_addr, dst_addr;
long copied;
- struct page *page;
+ struct folio *folio;
bool wp_copy;

/*
@@ -544,7 +543,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
src_addr = src_start;
dst_addr = dst_start;
copied = 0;
- page = NULL;
+ folio = NULL;
retry:
mmap_read_lock(dst_mm);

@@ -641,16 +640,16 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
BUG_ON(pmd_trans_huge(*dst_pmd));

err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- src_addr, &page, mcopy_mode, wp_copy);
+ src_addr, &folio, mcopy_mode, wp_copy);
cond_resched();

if (unlikely(err == -ENOENT)) {
void *page_kaddr;

mmap_read_unlock(dst_mm);
- BUG_ON(!page);
+ BUG_ON(!folio);

- page_kaddr = kmap_local_page(page);
+ page_kaddr = kmap_local_folio(folio, 0);
err = copy_from_user(page_kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
@@ -659,10 +658,10 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
err = -EFAULT;
goto out;
}
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
goto retry;
} else
- BUG_ON(page);
+ BUG_ON(folio);

if (!err) {
dst_addr += PAGE_SIZE;
@@ -679,8 +678,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
out_unlock:
mmap_read_unlock(dst_mm);
out:
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
BUG_ON(copied < 0);
BUG_ON(err > 0);
BUG_ON(!copied && !err);
--
2.25.1


2023-03-14 13:27:05

by Matthew Wilcox

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] userfaultfd: convert userfaultfd functions to use folios

On Tue, Mar 14, 2023 at 01:13:47PM +0000, Peng Zhang wrote:
> From: ZhangPeng <[email protected]>
>
> This patch series converts several userfaultfd functions to use folios.
> And this series pass the userfaultfd selftests and the LTP userfaultfd
> test cases.

That's what you said about the earlier patchset too. Assuming you
ran the tests, they need to be improved to fid the bug that was in the
earlier version of the patches.

2023-03-14 13:40:10

by zhangpeng (AS)

[permalink] [raw]
Subject: [PATCH v2 2/3] userfaultfd: convert __mcopy_atomic_hugetlb() to use a folio

From: ZhangPeng <[email protected]>

Change copy_huge_page_from_user() to copy_folio_from_user().
Convert hugetlb_mcopy_atomic_pte() and __mcopy_atomic_hugetlb() to use a
folio.

Signed-off-by: ZhangPeng <[email protected]>
---
include/linux/hugetlb.h | 4 ++--
include/linux/mm.h | 3 +--
mm/hugetlb.c | 25 ++++++++++++-------------
mm/memory.c | 27 ++++++++++++---------------
mm/userfaultfd.c | 20 +++++++++-----------
5 files changed, 36 insertions(+), 43 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 7c977d234aba..030252ce51bd 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -163,7 +163,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
unsigned long dst_addr,
unsigned long src_addr,
enum mcopy_atomic_mode mode,
- struct page **pagep,
+ struct folio **foliop,
bool wp_copy);
#endif /* CONFIG_USERFAULTFD */
bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
@@ -399,7 +399,7 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
unsigned long dst_addr,
unsigned long src_addr,
enum mcopy_atomic_mode mode,
- struct page **pagep,
+ struct folio **foliop,
bool wp_copy)
{
BUG();
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1f79667824eb..428cff424d26 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3546,9 +3546,8 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
unsigned long addr_hint,
struct vm_area_struct *vma,
unsigned int pages_per_huge_page);
-extern long copy_huge_page_from_user(struct page *dst_page,
+long copy_folio_from_user(struct folio *dst_folio,
const void __user *usr_src,
- unsigned int pages_per_huge_page,
bool allow_pagefault);

/**
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 07abcb6eb203..ea1f3b448b7a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6163,7 +6163,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
unsigned long dst_addr,
unsigned long src_addr,
enum mcopy_atomic_mode mode,
- struct page **pagep,
+ struct folio **foliop,
bool wp_copy)
{
bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
@@ -6185,7 +6185,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
if (!folio)
goto out;
folio_in_pagecache = true;
- } else if (!*pagep) {
+ } else if (!*foliop) {
/* If a page already exists, then it's UFFDIO_COPY for
* a non-missing case. Return -EEXIST.
*/
@@ -6201,9 +6201,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
goto out;
}

- ret = copy_huge_page_from_user(&folio->page,
- (const void __user *) src_addr,
- pages_per_huge_page(h), false);
+ ret = copy_folio_from_user(folio,
+ (const void __user *) src_addr, false);

/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
@@ -6222,7 +6221,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ret = -ENOMEM;
goto out;
}
- *pagep = &folio->page;
+ *foliop = folio;
/* Set the outparam pagep and return to the caller to
* copy the contents outside the lock. Don't free the
* page.
@@ -6232,23 +6231,23 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
} else {
if (vm_shared &&
hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
- put_page(*pagep);
+ folio_put(*foliop);
ret = -EEXIST;
- *pagep = NULL;
+ *foliop = NULL;
goto out;
}

folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0);
if (IS_ERR(folio)) {
- put_page(*pagep);
+ folio_put(*foliop);
ret = -ENOMEM;
- *pagep = NULL;
+ *foliop = NULL;
goto out;
}
- copy_user_huge_page(&folio->page, *pagep, dst_addr, dst_vma,
+ copy_user_huge_page(&folio->page, &((*foliop)->page), dst_addr, dst_vma,
pages_per_huge_page(h));
- put_page(*pagep);
- *pagep = NULL;
+ folio_put(*foliop);
+ *foliop = NULL;
}

/*
diff --git a/mm/memory.c b/mm/memory.c
index f456f3b5049c..78fe9ef0c058 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5769,37 +5769,34 @@ void copy_user_huge_page(struct page *dst, struct page *src,
process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg);
}

-long copy_huge_page_from_user(struct page *dst_page,
+long copy_folio_from_user(struct folio *dst_folio,
const void __user *usr_src,
- unsigned int pages_per_huge_page,
bool allow_pagefault)
{
void *page_kaddr;
unsigned long i, rc = 0;
- unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
+ unsigned int nr_pages = folio_nr_pages(dst_folio);
+ unsigned long ret_val = nr_pages * PAGE_SIZE;
struct page *subpage;

- for (i = 0; i < pages_per_huge_page; i++) {
- subpage = nth_page(dst_page, i);
- if (allow_pagefault)
- page_kaddr = kmap(subpage);
- else
- page_kaddr = kmap_atomic(subpage);
+ for (i = 0; i < nr_pages; i++) {
+ subpage = folio_page(dst_folio, i);
+ page_kaddr = kmap_local_page(subpage);
+ if (!allow_pagefault)
+ pagefault_disable();
rc = copy_from_user(page_kaddr,
usr_src + i * PAGE_SIZE, PAGE_SIZE);
- if (allow_pagefault)
- kunmap(subpage);
- else
- kunmap_atomic(page_kaddr);
+ if (!allow_pagefault)
+ pagefault_enable();
+ kunmap_local(page_kaddr);

ret_val -= (PAGE_SIZE - rc);
if (rc)
break;

- flush_dcache_page(subpage);
-
cond_resched();
}
+ flush_dcache_folio(dst_folio);
return ret_val;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 263191c37fb5..a7da26cdb731 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -324,7 +324,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
pte_t *dst_pte;
unsigned long src_addr, dst_addr;
long copied;
- struct page *page;
+ struct folio *folio;
unsigned long vma_hpagesize;
pgoff_t idx;
u32 hash;
@@ -344,7 +344,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
src_addr = src_start;
dst_addr = dst_start;
copied = 0;
- page = NULL;
+ folio = NULL;
vma_hpagesize = vma_kernel_pagesize(dst_vma);

/*
@@ -413,7 +413,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
}

err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
- dst_addr, src_addr, mode, &page,
+ dst_addr, src_addr, mode, &folio,
wp_copy);

hugetlb_vma_unlock_read(dst_vma);
@@ -423,12 +423,10 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,

if (unlikely(err == -ENOENT)) {
mmap_read_unlock(dst_mm);
- BUG_ON(!page);
+ BUG_ON(!folio);

- err = copy_huge_page_from_user(page,
- (const void __user *)src_addr,
- vma_hpagesize / PAGE_SIZE,
- true);
+ err = copy_folio_from_user(folio,
+ (const void __user *) src_addr, true);
if (unlikely(err)) {
err = -EFAULT;
goto out;
@@ -438,7 +436,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
dst_vma = NULL;
goto retry;
} else
- BUG_ON(page);
+ BUG_ON(folio);

if (!err) {
dst_addr += vma_hpagesize;
@@ -455,8 +453,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
out_unlock:
mmap_read_unlock(dst_mm);
out:
- if (page)
- put_page(page);
+ if (folio)
+ folio_put(folio);
BUG_ON(copied < 0);
BUG_ON(err > 0);
BUG_ON(!copied && !err);
--
2.25.1


2023-03-14 13:43:32

by zhangpeng (AS)

[permalink] [raw]
Subject: [PATCH v2 1/3] userfaultfd: convert mcopy_atomic_pte() to use a folio

From: ZhangPeng <[email protected]>

Call vma_alloc_folio() directly instead of alloc_page_vma(). Add an
assertion that this is a single-page folio and removes several calls to
compound_head().

Signed-off-by: ZhangPeng <[email protected]>
---
mm/userfaultfd.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 53c3d916ff66..263191c37fb5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -137,15 +137,15 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
{
void *page_kaddr;
int ret;
- struct page *page;
+ struct folio *folio;

if (!*pagep) {
ret = -ENOMEM;
- page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, dst_vma, dst_addr);
- if (!page)
+ folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, dst_vma, dst_addr, false);
+ if (!folio)
goto out;

- page_kaddr = kmap_local_page(page);
+ page_kaddr = kmap_local_folio(folio, 0);
/*
* The read mmap_lock is held here. Despite the
* mmap_lock being read recursive a deadlock is still
@@ -171,36 +171,37 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
ret = -ENOENT;
- *pagep = page;
+ *pagep = &folio->page;
/* don't free the page */
goto out;
}

- flush_dcache_page(page);
+ flush_dcache_folio(folio);
} else {
- page = *pagep;
+ folio = page_folio(*pagep);
+ VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
*pagep = NULL;
}

/*
- * The memory barrier inside __SetPageUptodate makes sure that
+ * The memory barrier inside __folio_mark_uptodate makes sure that
* preceding stores to the page contents become visible before
* the set_pte_at() write.
*/
- __SetPageUptodate(page);
+ __folio_mark_uptodate(folio);

ret = -ENOMEM;
- if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL))
+ if (mem_cgroup_charge(folio, dst_mm, GFP_KERNEL))
goto out_release;

ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
- page, true, wp_copy);
+ &folio->page, true, wp_copy);
if (ret)
goto out_release;
out:
return ret;
out_release:
- put_page(page);
+ folio_put(folio);
goto out;
}

--
2.25.1


2023-03-15 19:54:13

by Mike Kravetz

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] userfaultfd: convert __mcopy_atomic_hugetlb() to use a folio

On 03/14/23 13:13, Peng Zhang wrote:
> From: ZhangPeng <[email protected]>
>
> Change copy_huge_page_from_user() to copy_folio_from_user().

Any reason why you did not change copy_user_huge_page to folios as
well? All callers are passing &folio->page.

Just my opinion, but it might be better/easier to review if the copy
routines were done in a separate patch. There is a little more than
folio conversion happening there.

--
Mike Kravetz

> Convert hugetlb_mcopy_atomic_pte() and __mcopy_atomic_hugetlb() to use a
> folio.
>
> Signed-off-by: ZhangPeng <[email protected]>
> ---
> include/linux/hugetlb.h | 4 ++--
> include/linux/mm.h | 3 +--
> mm/hugetlb.c | 25 ++++++++++++-------------
> mm/memory.c | 27 ++++++++++++---------------
> mm/userfaultfd.c | 20 +++++++++-----------
> 5 files changed, 36 insertions(+), 43 deletions(-)

2023-03-16 03:54:43

by zhangpeng (AS)

[permalink] [raw]
Subject: Re: [PATCH v2 2/3] userfaultfd: convert __mcopy_atomic_hugetlb() to use a folio

On 2023/3/16 3:53, Mike Kravetz wrote:

> On 03/14/23 13:13, Peng Zhang wrote:
>> From: ZhangPeng <[email protected]>
>>
>> Change copy_huge_page_from_user() to copy_folio_from_user().
> Any reason why you did not change copy_user_huge_page to folios as
> well? All callers are passing &folio->page.
>
> Just my opinion, but it might be better/easier to review if the copy
> routines were done in a separate patch. There is a little more than
> folio conversion happening there.

Thanks for your review.

I will change copy_user_huge_page to folios and split this patch in a
v3 of this patch series.

Thanks,
Peng.