LinuxLists.cc - [PATCH 4/9] mm/rmap: change hugepage_add_new_anon

2023-01-19 21:32:46

Subject: [PATCH 4/9] mm/rmap: change hugepage_add_new_anon_rmap to take in a folio

Change hugepage_add_new_anon_rmap() to take in a folio directly. While
adding a folio variable to the four call sites, convert page functions
with their folio equivalents. This removes three callers of the Huge Page
macros which take in a page.

Signed-off-by: Sidhartha Kumar <[email protected]>
---
include/linux/rmap.h | 2 +-
mm/hugetlb.c | 90 +++++++++++++++++++++++++-------------------
mm/rmap.c | 6 +--
3 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index a6bd1f0a183d..a4570da03e58 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -203,7 +203,7 @@ void page_remove_rmap(struct page *, struct vm_area_struct *,

void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
unsigned long address, rmap_t flags);
-void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
+void hugepage_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
unsigned long address);

static inline void __page_dup_rmap(struct page *page, bool compound)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c37a26c8392c..6f25055c3ba5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4950,7 +4950,7 @@ hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long add
struct folio *new_folio)
{
__folio_mark_uptodate(new_folio);
- hugepage_add_new_anon_rmap(&new_folio->page, vma, addr);
+ hugepage_add_new_anon_rmap(new_folio, vma, addr);
set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1));
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
@@ -5479,6 +5479,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t pte;
struct hstate *h = hstate_vma(vma);
struct page *old_page, *new_page;
+ struct folio *new_folio = NULL;
int outside_reserve = 0;
vm_fault_t ret = 0;
unsigned long haddr = address & huge_page_mask(h);
@@ -5590,6 +5591,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_release_old;
}

+ if (new_page)
+ new_folio = page_folio(new_page);
+
/*
* When the original hugepage is shared one, it does not have
* anon_vma prepared.
@@ -5599,9 +5603,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_release_all;
}

- copy_user_huge_page(new_page, old_page, address, vma,
+ copy_user_huge_page(&new_folio->page, old_page, address, vma,
pages_per_huge_page(h));
- __SetPageUptodate(new_page);
+ __folio_mark_uptodate(new_folio);

mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr,
haddr + huge_page_size(h));
@@ -5618,10 +5622,10 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
page_remove_rmap(old_page, vma, true);
- hugepage_add_new_anon_rmap(new_page, vma, haddr);
+ hugepage_add_new_anon_rmap(new_folio, vma, haddr);
set_huge_pte_at(mm, haddr, ptep,
- make_huge_pte(vma, new_page, !unshare));
- SetHPageMigratable(new_page);
+ make_huge_pte(vma, &new_folio->page, !unshare));
+ folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_page = old_page;
}
@@ -5633,8 +5637,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
* unshare)
*/
if (new_page != old_page)
- restore_reserve_on_error(h, vma, haddr, new_page);
- put_page(new_page);
+ restore_reserve_on_error(h, vma, haddr, &new_folio->page);
+ folio_put(new_folio);
out_release_old:
put_page(old_page);

@@ -5756,6 +5760,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
int anon_rmap = 0;
unsigned long size;
struct page *page;
+ struct folio *folio = NULL;
pte_t new_pte;
spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h);
@@ -5833,12 +5838,16 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
ret = 0;
goto out;
}
- clear_huge_page(page, address, pages_per_huge_page(h));
- __SetPageUptodate(page);
+
+ if (page)
+ folio = page_folio(page);
+
+ clear_huge_page(&folio->page, address, pages_per_huge_page(h));
+ __folio_mark_uptodate(folio);
new_page = true;

if (vma->vm_flags & VM_MAYSHARE) {
- int err = hugetlb_add_to_page_cache(page, mapping, idx);
+ int err = hugetlb_add_to_page_cache(&folio->page, mapping, idx);
if (err) {
/*
* err can't be -EEXIST which implies someone
@@ -5847,13 +5856,13 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* to the page cache. So it's safe to call
* restore_reserve_on_error() here.
*/
- restore_reserve_on_error(h, vma, haddr, page);
- put_page(page);
+ restore_reserve_on_error(h, vma, haddr, &folio->page);
+ folio_put(folio);
goto out;
}
new_pagecache_page = true;
} else {
- lock_page(page);
+ folio_lock(folio);
if (unlikely(anon_vma_prepare(vma))) {
ret = VM_FAULT_OOM;
goto backout_unlocked;
@@ -5861,12 +5870,13 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
anon_rmap = 1;
}
} else {
+ folio = page_folio(page);
/*
* If memory error occurs between mmap() and fault, some process
* don't have hwpoisoned swap entry for errored virtual address.
* So we need to block hugepage fault by PG_hwpoison bit check.
*/
- if (unlikely(PageHWPoison(page))) {
+ if (unlikely(folio_test_hwpoison(folio))) {
ret = VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
goto backout_unlocked;
@@ -5874,8 +5884,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,

/* Check for page in userfault range. */
if (userfaultfd_minor(vma)) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
/* See comment in userfaultfd_missing() block above */
if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
ret = 0;
@@ -5909,10 +5919,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto backout;

if (anon_rmap)
- hugepage_add_new_anon_rmap(page, vma, haddr);
+ hugepage_add_new_anon_rmap(folio, vma, haddr);
else
- page_dup_file_rmap(page, true);
- new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
+ page_dup_file_rmap(&folio->page, true);
+ new_pte = make_huge_pte(vma, &folio->page, ((vma->vm_flags & VM_WRITE)
&& (vma->vm_flags & VM_SHARED)));
/*
* If this pte was previously wr-protected, keep it wr-protected even
@@ -5925,7 +5935,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
hugetlb_count_add(pages_per_huge_page(h), mm);
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
- ret = hugetlb_wp(mm, vma, address, ptep, flags, page, ptl);
+ ret = hugetlb_wp(mm, vma, address, ptep, flags, &folio->page, ptl);
}

spin_unlock(ptl);
@@ -5936,9 +5946,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* been isolated for migration.
*/
if (new_page)
- SetHPageMigratable(page);
+ folio_set_hugetlb_migratable(folio);

- unlock_page(page);
+ folio_unlock(folio);
out:
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -5948,10 +5958,10 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spin_unlock(ptl);
backout_unlocked:
if (new_page && !new_pagecache_page)
- restore_reserve_on_error(h, vma, haddr, page);
+ restore_reserve_on_error(h, vma, haddr, &folio->page);

- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
goto out;
}

@@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
spinlock_t *ptl;
int ret = -ENOMEM;
struct page *page;
+ struct folio *folio = NULL;
int writable;
bool page_in_pagecache = false;

@@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
*pagep = NULL;
}

+ if (page)
+ folio = page_folio(page);
+
/*
- * The memory barrier inside __SetPageUptodate makes sure that
+ * The memory barrier inside __folio_mark_uptodate makes sure that
* preceding stores to the page contents become visible before
* the set_pte_at() write.
*/
- __SetPageUptodate(page);
+ __folio_mark_uptodate(folio);

/* Add shared, newly allocated pages to the page cache. */
if (vm_shared && !is_continue) {
@@ -6271,7 +6285,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
* hugetlb_fault_mutex_table that here must be hold by
* the caller.
*/
- ret = hugetlb_add_to_page_cache(page, mapping, idx);
+ ret = hugetlb_add_to_page_cache(&folio->page, mapping, idx);
if (ret)
goto out_release_nounlock;
page_in_pagecache = true;
@@ -6280,7 +6294,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ptl = huge_pte_lock(h, dst_mm, dst_pte);

ret = -EIO;
- if (PageHWPoison(page))
+ if (folio_test_hwpoison(folio))
goto out_release_unlock;

/*
@@ -6293,9 +6307,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
goto out_release_unlock;

if (page_in_pagecache)
- page_dup_file_rmap(page, true);
+ page_dup_file_rmap(&folio->page, true);
else
- hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
+ hugepage_add_new_anon_rmap(folio, dst_vma, dst_addr);

/*
* For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
@@ -6306,7 +6320,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
else
writable = dst_vma->vm_flags & VM_WRITE;

- _dst_pte = make_huge_pte(dst_vma, page, writable);
+ _dst_pte = make_huge_pte(dst_vma, &folio->page, writable);
/*
* Always mark UFFDIO_COPY page dirty; note that this may not be
* extremely important for hugetlbfs for now since swapping is not
@@ -6328,20 +6342,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,

spin_unlock(ptl);
if (!is_continue)
- SetHPageMigratable(page);
+ folio_set_hugetlb_migratable(folio);
if (vm_shared || is_continue)
- unlock_page(page);
+ folio_unlock(folio);
ret = 0;
out:
return ret;
out_release_unlock:
spin_unlock(ptl);
if (vm_shared || is_continue)
- unlock_page(page);
+ folio_unlock(folio);
out_release_nounlock:
if (!page_in_pagecache)
- restore_reserve_on_error(h, dst_vma, dst_addr, page);
- put_page(page);
+ restore_reserve_on_error(h, dst_vma, dst_addr, &folio->page);
+ folio_put(folio);
goto out;
}
#endif /* CONFIG_USERFAULTFD */
diff --git a/mm/rmap.c b/mm/rmap.c
index 948ca17a96ad..e6d94bd19879 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2547,15 +2547,13 @@ void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
!!(flags & RMAP_EXCLUSIVE));
}

-void hugepage_add_new_anon_rmap(struct page *page,
+void hugepage_add_new_anon_rmap(struct folio *folio,
struct vm_area_struct *vma, unsigned long address)
{
- struct folio *folio = page_folio(page);
-
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
folio_clear_hugetlb_restore_reserve(folio);
- __page_set_anon_rmap(folio, page, vma, address, 1);
+ __page_set_anon_rmap(folio, &folio->page, vma, address, 1);
}
#endif /* CONFIG_HUGETLB_PAGE */
--
2.39.0

2023-01-20 06:21:44

by Matthew Wilcox

[permalink] [raw]

Subject: Re: [PATCH 4/9] mm/rmap: change hugepage_add_new_anon_rmap to take in a folio

On Thu, Jan 19, 2023 at 01:14:41PM -0800, Sidhartha Kumar wrote:
> @@ -5599,9 +5603,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
> goto out_release_all;
> }
>
> - copy_user_huge_page(new_page, old_page, address, vma,
> + copy_user_huge_page(&new_folio->page, old_page, address, vma,
> pages_per_huge_page(h));

We have a folio_copy(), but it feels to me like we need a
folio_copy_user() so that we can use copy_user_page() on machines
with virtual caches.

> @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> spinlock_t *ptl;
> int ret = -ENOMEM;
> struct page *page;
> + struct folio *folio = NULL;
> int writable;
> bool page_in_pagecache = false;
>
> @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> *pagep = NULL;
> }
>
> + if (page)
> + folio = page_folio(page);
> +
> /*
> - * The memory barrier inside __SetPageUptodate makes sure that
> + * The memory barrier inside __folio_mark_uptodate makes sure that
> * preceding stores to the page contents become visible before
> * the set_pte_at() write.
> */
> - __SetPageUptodate(page);
> + __folio_mark_uptodate(folio);

I suggest that "page" can never be NULL or __SetPageUptodate() would
have crashed.

2023-01-20 21:13:46

by Sidhartha Kumar

[permalink] [raw]

Subject: Re: [PATCH 4/9] mm/rmap: change hugepage_add_new_anon_rmap to take in a folio

On 1/19/23 10:00 PM, Matthew Wilcox wrote:
> On Thu, Jan 19, 2023 at 01:14:41PM -0800, Sidhartha Kumar wrote:
>> @@ -5599,9 +5603,9 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
>> goto out_release_all;
>> }
>>
>> - copy_user_huge_page(new_page, old_page, address, vma,
>> + copy_user_huge_page(&new_folio->page, old_page, address, vma,
>> pages_per_huge_page(h));
>
> We have a folio_copy(), but it feels to me like we need a
> folio_copy_user() so that we can use copy_user_page() on machines
> with virtual caches.
>
>> @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
>> spinlock_t *ptl;
>> int ret = -ENOMEM;
>> struct page *page;
>> + struct folio *folio = NULL;
>> int writable;
>> bool page_in_pagecache = false;
>>
>> @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
>> *pagep = NULL;
>> }
>>
>> + if (page)
>> + folio = page_folio(page);
>> +
>> /*
>> - * The memory barrier inside __SetPageUptodate makes sure that
>> + * The memory barrier inside __folio_mark_uptodate makes sure that
>> * preceding stores to the page contents become visible before
>> * the set_pte_at() write.
>> */
>> - __SetPageUptodate(page);
>> + __folio_mark_uptodate(folio);
>

Hi Matthew,

In the snippet:

page = alloc_huge_page(dst_vma, dst_addr, 0);
if (IS_ERR(page)) {
put_page(*pagep);
ret = -ENOMEM;
*pagep = NULL;
goto out;
}
copy_user_huge_page(page, *pagep, dst_addr, dst_vma,
pages_per_huge_page(h));

I thought the IS_ERR() call does not handle the NULL case and is a check
for high memory addresses, and copy_user_huge_page() path does not seem
to handle the NULL case as well but alloc_huge_page() can possibly
return NULL so I was unsure about how to handle the folio conversion.

> I suggest that "page" can never be NULL or __SetPageUptodate() would
> have crashed.
>

2023-01-20 22:22:30

by Matthew Wilcox

[permalink] [raw]

Subject: Re: [PATCH 4/9] mm/rmap: change hugepage_add_new_anon_rmap to take in a folio

On Fri, Jan 20, 2023 at 12:45:38PM -0800, Sidhartha Kumar wrote:
> > > @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> > > spinlock_t *ptl;
> > > int ret = -ENOMEM;
> > > struct page *page;
> > > + struct folio *folio = NULL;
> > > int writable;
> > > bool page_in_pagecache = false;
> > > @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
> > > *pagep = NULL;
> > > }
> > > + if (page)
> > > + folio = page_folio(page);
> > > +
> > > /*
> > > - * The memory barrier inside __SetPageUptodate makes sure that
> > > + * The memory barrier inside __folio_mark_uptodate makes sure that
> > > * preceding stores to the page contents become visible before
> > > * the set_pte_at() write.
> > > */
> > > - __SetPageUptodate(page);
> > > + __folio_mark_uptodate(folio);
> >
>
> Hi Matthew,
>
> In the snippet:
>
> page = alloc_huge_page(dst_vma, dst_addr, 0);
> if (IS_ERR(page)) {
> put_page(*pagep);
> ret = -ENOMEM;
> *pagep = NULL;
> goto out;
> }
> copy_user_huge_page(page, *pagep, dst_addr, dst_vma,
> pages_per_huge_page(h));
>
> I thought the IS_ERR() call does not handle the NULL case and is a check for
> high memory addresses, and copy_user_huge_page() path does not seem to
> handle the NULL case as well but alloc_huge_page() can possibly return NULL
> so I was unsure about how to handle the folio conversion.

I'm not sure how alloc_huge_page() can return NULL. It seems like it
returns ERR_PTR(-ENOSPC) or ERR_PTR(-ENOMEM) if it cannot allocate memory?

2023-01-20 22:40:34

by Sidhartha Kumar

[permalink] [raw]

Subject: Re: [PATCH 4/9] mm/rmap: change hugepage_add_new_anon_rmap to take in a folio

On 1/20/23 1:17 PM, Matthew Wilcox wrote:
> On Fri, Jan 20, 2023 at 12:45:38PM -0800, Sidhartha Kumar wrote:
>>>> @@ -6176,6 +6186,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
>>>> spinlock_t *ptl;
>>>> int ret = -ENOMEM;
>>>> struct page *page;
>>>> + struct folio *folio = NULL;
>>>> int writable;
>>>> bool page_in_pagecache = false;
>>>> @@ -6251,12 +6262,15 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
>>>> *pagep = NULL;
>>>> }
>>>> + if (page)
>>>> + folio = page_folio(page);
>>>> +
>>>> /*
>>>> - * The memory barrier inside __SetPageUptodate makes sure that
>>>> + * The memory barrier inside __folio_mark_uptodate makes sure that
>>>> * preceding stores to the page contents become visible before
>>>> * the set_pte_at() write.
>>>> */
>>>> - __SetPageUptodate(page);
>>>> + __folio_mark_uptodate(folio);
>>>
>>
>> Hi Matthew,
>>
>> In the snippet:
>>
>> page = alloc_huge_page(dst_vma, dst_addr, 0);
>> if (IS_ERR(page)) {
>> put_page(*pagep);
>> ret = -ENOMEM;
>> *pagep = NULL;
>> goto out;
>> }
>> copy_user_huge_page(page, *pagep, dst_addr, dst_vma,
>> pages_per_huge_page(h));
>>
>> I thought the IS_ERR() call does not handle the NULL case and is a check for
>> high memory addresses, and copy_user_huge_page() path does not seem to
>> handle the NULL case as well but alloc_huge_page() can possibly return NULL
>> so I was unsure about how to handle the folio conversion.
>
> I'm not sure how alloc_huge_page() can return NULL. It seems like it
> returns ERR_PTR(-ENOSPC) or ERR_PTR(-ENOMEM) if it cannot allocate memory?
>
I see now, I agree that page cannot be NULL at the return from
alloc_huge_page, I will make that change in v2.

Thanks,
Sidhartha Kumar