LinuxLists.cc - [PATCH] Convert __unmap_hugepage

2024-05-20 19:49:09

Subject: [PATCH] Convert __unmap_hugepage_range() to folios

Replaces 4 calls to compound_head() with one. Also converts
unmap_hugepage_range() and unmap_ref_private() to take in folios.

Signed-off-by: Vishal Moola (Oracle) <[email protected]>
---
include/linux/hugetlb.h | 6 ++---
mm/hugetlb.c | 50 ++++++++++++++++++++---------------------
2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 68244bb3637a..6c7ce8679950 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -137,12 +137,12 @@ struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
unsigned int *page_mask);
void unmap_hugepage_range(struct vm_area_struct *,
- unsigned long, unsigned long, struct page *,
+ unsigned long, unsigned long, struct folio *,
zap_flags_t);
void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- struct page *ref_page, zap_flags_t zap_flags);
+ struct folio *ref_folio, zap_flags_t zap_flags);
void hugetlb_report_meminfo(struct seq_file *);
int hugetlb_report_node_meminfo(char *buf, int len, int nid);
void hugetlb_show_meminfo_node(int nid);
@@ -458,7 +458,7 @@ static inline long hugetlb_change_protection(

static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
struct vm_area_struct *vma, unsigned long start,
- unsigned long end, struct page *ref_page,
+ unsigned long end, struct folio *ref_folio,
zap_flags_t zap_flags)
{
BUG();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6be78e7d4f6e..02f40bfa3686 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5643,14 +5643,14 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,

void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
- struct page *ref_page, zap_flags_t zap_flags)
+ struct folio *ref_folio, zap_flags_t zap_flags)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long address;
pte_t *ptep;
pte_t pte;
spinlock_t *ptl;
- struct page *page;
+ struct folio *folio;
struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h);
bool adjust_reservation = false;
@@ -5663,7 +5663,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,

/*
* This is a hugetlb vma, all the pte entries should point
- * to huge page.
+ * to huge folio.
*/
tlb_change_page_size(tlb, sz);
tlb_start_vma(tlb, vma);
@@ -5714,19 +5714,19 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
continue;
}

- page = pte_page(pte);
+ folio = page_folio(pte_page(pte));
/*
- * If a reference page is supplied, it is because a specific
- * page is being unmapped, not a range. Ensure the page we
- * are about to unmap is the actual page of interest.
+ * If a reference folio is supplied, it is because a specific
+ * folio is being unmapped, not a range. Ensure the folio we
+ * are about to unmap is the actual folio of interest.
*/
- if (ref_page) {
- if (page != ref_page) {
+ if (ref_folio) {
+ if (folio != ref_folio) {
spin_unlock(ptl);
continue;
}
/*
- * Mark the VMA as having unmapped its page so that
+ * Mark the VMA as having unmapped its folio so that
* future faults in this VMA will fail rather than
* looking like data was lost
*/
@@ -5736,7 +5736,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
pte = huge_ptep_get_and_clear(mm, address, ptep);
tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
if (huge_pte_dirty(pte))
- set_page_dirty(page);
+ folio_mark_dirty(folio);
/* Leave a uffd-wp pte marker if needed */
if (huge_pte_uffd_wp(pte) &&
!(zap_flags & ZAP_FLAG_DROP_MARKER))
@@ -5744,17 +5744,17 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
make_pte_marker(PTE_MARKER_UFFD_WP),
sz);
hugetlb_count_sub(pages_per_huge_page(h), mm);
- hugetlb_remove_rmap(page_folio(page));
+ hugetlb_remove_rmap(folio);

/*
- * Restore the reservation for anonymous page, otherwise the
- * backing page could be stolen by someone.
+ * Restore the reservation for anonymous folio, otherwise the
+ * backing folio could be stolen by someone.
* If there we are freeing a surplus, do not set the restore
* reservation bit.
*/
if (!h->surplus_huge_pages && __vma_private_lock(vma) &&
- folio_test_anon(page_folio(page))) {
- folio_set_hugetlb_restore_reserve(page_folio(page));
+ folio_test_anon(folio)) {
+ folio_set_hugetlb_restore_reserve(folio);
/* Reservation to be adjusted after the spin lock */
adjust_reservation = true;
}
@@ -5771,11 +5771,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (adjust_reservation && vma_needs_reservation(h, vma, address))
vma_add_reservation(h, vma, address);

- tlb_remove_page_size(tlb, page, huge_page_size(h));
+ tlb_remove_page_size(tlb, &folio->page, huge_page_size(h));
/*
- * Bail out after unmapping reference page if supplied
+ * Bail out after unmapping reference folio if supplied
*/
- if (ref_page)
+ if (ref_folio)
break;
}
tlb_end_vma(tlb, vma);
@@ -5837,7 +5837,7 @@ void __hugetlb_zap_end(struct vm_area_struct *vma,
}

void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end, struct page *ref_page,
+ unsigned long end, struct folio *ref_folio,
zap_flags_t zap_flags)
{
struct mmu_notifier_range range;
@@ -5849,7 +5849,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
mmu_notifier_invalidate_range_start(&range);
tlb_gather_mmu(&tlb, vma->vm_mm);

- __unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+ __unmap_hugepage_range(&tlb, vma, start, end, ref_folio, zap_flags);

mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb);
@@ -5862,7 +5862,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
* same region.
*/
static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
- struct page *page, unsigned long address)
+ struct folio *folio, unsigned long address)
{
struct hstate *h = hstate_vma(vma);
struct vm_area_struct *iter_vma;
@@ -5898,7 +5898,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
continue;

/*
- * Unmap the page from other VMAs without their own reserves.
+ * Unmap the folio from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these
* areas. This is because a future no-page fault on this VMA
* could insert a zeroed page instead of the data existing
@@ -5906,7 +5906,7 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
*/
if (!is_vma_resv_set(iter_vma, HPAGE_RESV_OWNER))
unmap_hugepage_range(iter_vma, address,
- address + huge_page_size(h), page, 0);
+ address + huge_page_size(h), folio, 0);
}
i_mmap_unlock_write(mapping);
}
@@ -6035,7 +6035,7 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
hugetlb_vma_unlock_read(vma);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);

- unmap_ref_private(mm, vma, &old_folio->page,
+ unmap_ref_private(mm, vma, old_folio,
vmf->address);

mutex_lock(&hugetlb_fault_mutex_table[hash]);
--
2.45.0

2024-05-20 22:53:33

by Matthew Wilcox

[permalink] [raw]

Subject: Re: [PATCH] Convert __unmap_hugepage_range() to folios

On Mon, May 20, 2024 at 12:47:49PM -0700, Vishal Moola (Oracle) wrote:
> Replaces 4 calls to compound_head() with one. Also converts
> unmap_hugepage_range() and unmap_ref_private() to take in folios.

This is great!

> void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> unsigned long start, unsigned long end,
> - struct page *ref_page, zap_flags_t zap_flags)
> + struct folio *ref_folio, zap_flags_t zap_flags)
> {
> struct mm_struct *mm = vma->vm_mm;
> unsigned long address;
> pte_t *ptep;
> pte_t pte;
> spinlock_t *ptl;
> - struct page *page;
> + struct folio *folio;
> struct hstate *h = hstate_vma(vma);
> unsigned long sz = huge_page_size(h);

I would appreciate some further cleanup ...

size_t sz = folio_size(folio);

I think there are further cleanups along those lines, eg
pages_per_huge_page(), hugetlb_mask_last_page(), huge_page_mask().

2024-05-21 21:18:42

by Vishal Moola

[permalink] [raw]

Subject: Re: [PATCH] Convert __unmap_hugepage_range() to folios

On Mon, May 20, 2024 at 2:46 PM Matthew Wilcox <[email protected]> wrote:
>
> On Mon, May 20, 2024 at 12:47:49PM -0700, Vishal Moola (Oracle) wrote:
> > Replaces 4 calls to compound_head() with one. Also converts
> > unmap_hugepage_range() and unmap_ref_private() to take in folios.
>
> This is great!
>
> > void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> > unsigned long start, unsigned long end,
> > - struct page *ref_page, zap_flags_t zap_flags)
> > + struct folio *ref_folio, zap_flags_t zap_flags)
> > {
> > struct mm_struct *mm = vma->vm_mm;
> > unsigned long address;
> > pte_t *ptep;
> > pte_t pte;
> > spinlock_t *ptl;
> > - struct page *page;
> > + struct folio *folio;
> > struct hstate *h = hstate_vma(vma);
> > unsigned long sz = huge_page_size(h);
>
> I would appreciate some further cleanup ...
>
> size_t sz = folio_size(folio);
>
> I think there are further cleanups along those lines, eg
> pages_per_huge_page(), hugetlb_mask_last_page(), huge_page_mask().
>

Gotcha, I'll look into those and change them in v2.

2024-06-03 19:39:12

by Vishal Moola

[permalink] [raw]

Subject: Re: [PATCH] Convert __unmap_hugepage_range() to folios

On Tue, May 21, 2024 at 2:13 PM Vishal Moola <[email protected]> wrote:
>
> On Mon, May 20, 2024 at 2:46 PM Matthew Wilcox <[email protected]> wrote:
> >
> > On Mon, May 20, 2024 at 12:47:49PM -0700, Vishal Moola (Oracle) wrote:
> > > Replaces 4 calls to compound_head() with one. Also converts
> > > unmap_hugepage_range() and unmap_ref_private() to take in folios.
> >
> > This is great!
> >
> > > void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> > > unsigned long start, unsigned long end,
> > > - struct page *ref_page, zap_flags_t zap_flags)
> > > + struct folio *ref_folio, zap_flags_t zap_flags)
> > > {
> > > struct mm_struct *mm = vma->vm_mm;
> > > unsigned long address;
> > > pte_t *ptep;
> > > pte_t pte;
> > > spinlock_t *ptl;
> > > - struct page *page;
> > > + struct folio *folio;
> > > struct hstate *h = hstate_vma(vma);
> > > unsigned long sz = huge_page_size(h);
> >
> > I would appreciate some further cleanup ...
> >
> > size_t sz = folio_size(folio);
> >
> > I think there are further cleanups along those lines, eg
> > pages_per_huge_page(), hugetlb_mask_last_page(), huge_page_mask().
> >
>
> Gotcha, I'll look into those and change them in v2.

I took a closer look at your suggestions for cleanups here. Most callers of
unmap_hugepage_range() pass NULL as ref_folio - meaning we want to unmap
all pages in the range. This means alot of the preparatory work is
likely done without
a reference to a folio, so using folio_size() is unsafe.

For when we later have a reference to a folio, I think we should
continue to use the
hstate-defined values since using the folio in one place and hstate in
another makes
things harder to change (if we ever want to).