2022-12-26 09:04:39

by Lorenzo Stoakes

[permalink] [raw]
Subject: [PATCH v3 4/5] mm: mlock: update the interface to use folios

This patch updates the mlock interface to accept folios rather than pages,
bringing the interface in line with the internal implementation.

munlock_vma_page() still requires a page_folio() conversion, however this
is consistent with the existent mlock_vma_page() implementation and a
product of rmap still dealing in pages rather than folios.

Signed-off-by: Lorenzo Stoakes <[email protected]>
---
mm/internal.h | 26 ++++++++++++++++----------
mm/mlock.c | 32 +++++++++++++++-----------------
mm/swap.c | 2 +-
3 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 1d6f4e168510..8a6e83315369 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -515,10 +515,9 @@ extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
* should be called with vma's mmap_lock held for read or write,
* under page table lock for the pte/pmd being added or removed.
*
- * mlock is usually called at the end of page_add_*_rmap(),
- * munlock at the end of page_remove_rmap(); but new anon
- * pages are managed by lru_cache_add_inactive_or_unevictable()
- * calling mlock_new_page().
+ * mlock is usually called at the end of page_add_*_rmap(), munlock at
+ * the end of page_remove_rmap(); but new anon folios are managed by
+ * folio_add_lru_vma() calling mlock_new_folio().
*
* @compound is used to include pmd mappings of THPs, but filter out
* pte mappings of THPs, which cannot be consistently counted: a pte
@@ -547,15 +546,22 @@ static inline void mlock_vma_page(struct page *page,
mlock_vma_folio(page_folio(page), vma, compound);
}

-void munlock_page(struct page *page);
-static inline void munlock_vma_page(struct page *page,
+void munlock_folio(struct folio *folio);
+
+static inline void munlock_vma_folio(struct folio *folio,
struct vm_area_struct *vma, bool compound)
{
if (unlikely(vma->vm_flags & VM_LOCKED) &&
- (compound || !PageTransCompound(page)))
- munlock_page(page);
+ (compound || !folio_test_large(folio)))
+ munlock_folio(folio);
+}
+
+static inline void munlock_vma_page(struct page *page,
+ struct vm_area_struct *vma, bool compound)
+{
+ munlock_vma_folio(page_folio(page), vma, compound);
}
-void mlock_new_page(struct page *page);
+void mlock_new_folio(struct folio *folio);
bool need_mlock_page_drain(int cpu);
void mlock_page_drain_local(void);
void mlock_page_drain_remote(int cpu);
@@ -647,7 +653,7 @@ static inline void mlock_vma_page(struct page *page,
struct vm_area_struct *vma, bool compound) { }
static inline void munlock_vma_page(struct page *page,
struct vm_area_struct *vma, bool compound) { }
-static inline void mlock_new_page(struct page *page) { }
+static inline void mlock_new_folio(struct folio *folio) { }
static inline bool need_mlock_page_drain(int cpu) { return false; }
static inline void mlock_page_drain_local(void) { }
static inline void mlock_page_drain_remote(int cpu) { }
diff --git a/mm/mlock.c b/mm/mlock.c
index e9ba47fe67ed..3982ef4d1632 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -262,13 +262,12 @@ void mlock_folio(struct folio *folio)
}

/**
- * mlock_new_page - mlock a newly allocated page not yet on LRU
- * @page: page to be mlocked, either a normal page or a THP head.
+ * mlock_new_folio - mlock a newly allocated folio not yet on LRU
+ * @folio: folio to be mlocked, either normal or a THP head.
*/
-void mlock_new_page(struct page *page)
+void mlock_new_folio(struct folio *folio)
{
struct folio_batch *fbatch;
- struct folio *folio = page_folio(page);
int nr_pages = folio_nr_pages(folio);

local_lock(&mlock_fbatch.lock);
@@ -286,13 +285,12 @@ void mlock_new_page(struct page *page)
}

/**
- * munlock_page - munlock a page
- * @page: page to be munlocked, either a normal page or a THP head.
+ * munlock_folio - munlock a folio
+ * @folio: folio to be munlocked, either normal or a THP head.
*/
-void munlock_page(struct page *page)
+void munlock_folio(struct folio *folio)
{
struct folio_batch *fbatch;
- struct folio *folio = page_folio(page);

local_lock(&mlock_fbatch.lock);
fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
@@ -314,7 +312,7 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
spinlock_t *ptl;
pte_t *start_pte, *pte;
- struct page *page;
+ struct folio *folio;

ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
@@ -322,11 +320,11 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
goto out;
if (is_huge_zero_pmd(*pmd))
goto out;
- page = pmd_page(*pmd);
+ folio = page_folio(pmd_page(*pmd));
if (vma->vm_flags & VM_LOCKED)
- mlock_folio(page_folio(page));
+ mlock_folio(folio);
else
- munlock_page(page);
+ munlock_folio(folio);
goto out;
}

@@ -334,15 +332,15 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
if (!pte_present(*pte))
continue;
- page = vm_normal_page(vma, addr, *pte);
- if (!page || is_zone_device_page(page))
+ folio = vm_normal_folio(vma, addr, *pte);
+ if (!folio || folio_is_zone_device(folio))
continue;
- if (PageTransCompound(page))
+ if (folio_test_large(folio))
continue;
if (vma->vm_flags & VM_LOCKED)
- mlock_folio(page_folio(page));
+ mlock_folio(folio);
else
- munlock_page(page);
+ munlock_folio(folio);
}
pte_unmap(start_pte);
out:
diff --git a/mm/swap.c b/mm/swap.c
index e54e2a252e27..7df297b143f9 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -562,7 +562,7 @@ void folio_add_lru_vma(struct folio *folio, struct vm_area_struct *vma)
VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);

if (unlikely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED))
- mlock_new_page(&folio->page);
+ mlock_new_folio(folio);
else
folio_add_lru(folio);
}
--
2.39.0


2023-01-12 11:36:16

by Vlastimil Babka

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] mm: mlock: update the interface to use folios

On 12/26/22 09:44, Lorenzo Stoakes wrote:
> This patch updates the mlock interface to accept folios rather than pages,
> bringing the interface in line with the internal implementation.
>
> munlock_vma_page() still requires a page_folio() conversion, however this
> is consistent with the existent mlock_vma_page() implementation and a
> product of rmap still dealing in pages rather than folios.
>
> Signed-off-by: Lorenzo Stoakes <[email protected]>

Acked-by: Vlastimil Babka <[email protected]>

With some suggestion:

> ---
> mm/internal.h | 26 ++++++++++++++++----------
> mm/mlock.c | 32 +++++++++++++++-----------------
> mm/swap.c | 2 +-
> 3 files changed, 32 insertions(+), 28 deletions(-)
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 1d6f4e168510..8a6e83315369 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -515,10 +515,9 @@ extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
> * should be called with vma's mmap_lock held for read or write,
> * under page table lock for the pte/pmd being added or removed.
> *
> - * mlock is usually called at the end of page_add_*_rmap(),
> - * munlock at the end of page_remove_rmap(); but new anon
> - * pages are managed by lru_cache_add_inactive_or_unevictable()
> - * calling mlock_new_page().
> + * mlock is usually called at the end of page_add_*_rmap(), munlock at
> + * the end of page_remove_rmap(); but new anon folios are managed by
> + * folio_add_lru_vma() calling mlock_new_folio().
> *
> * @compound is used to include pmd mappings of THPs, but filter out
> * pte mappings of THPs, which cannot be consistently counted: a pte
> @@ -547,15 +546,22 @@ static inline void mlock_vma_page(struct page *page,
> mlock_vma_folio(page_folio(page), vma, compound);
> }
>
> -void munlock_page(struct page *page);
> -static inline void munlock_vma_page(struct page *page,
> +void munlock_folio(struct folio *folio);
> +
> +static inline void munlock_vma_folio(struct folio *folio,
> struct vm_area_struct *vma, bool compound)
> {
> if (unlikely(vma->vm_flags & VM_LOCKED) &&
> - (compound || !PageTransCompound(page)))
> - munlock_page(page);
> + (compound || !folio_test_large(folio)))
> + munlock_folio(folio);
> +}
> +
> +static inline void munlock_vma_page(struct page *page,
> + struct vm_area_struct *vma, bool compound)
> +{
> + munlock_vma_folio(page_folio(page), vma, compound);
> }
> -void mlock_new_page(struct page *page);
> +void mlock_new_folio(struct folio *folio);
> bool need_mlock_page_drain(int cpu);
> void mlock_page_drain_local(void);
> void mlock_page_drain_remote(int cpu);

I think these drain related functions could use a rename as well?
Maybe replace "page" with "fbatch" or "folio_batch"? Even the old name isn't
great, should have been "pagevec".
But maybe it would fit patch 2/5 rather than 4/5 as it's logically internal
even if in a .h file.


2023-01-12 12:35:10

by Lorenzo Stoakes

[permalink] [raw]
Subject: Re: [PATCH v3 4/5] mm: mlock: update the interface to use folios

On Thu, Jan 12, 2023 at 11:55:13AM +0100, Vlastimil Babka wrote:
> On 12/26/22 09:44, Lorenzo Stoakes wrote:
> > This patch updates the mlock interface to accept folios rather than pages,
> > bringing the interface in line with the internal implementation.
> >
> > munlock_vma_page() still requires a page_folio() conversion, however this
> > is consistent with the existent mlock_vma_page() implementation and a
> > product of rmap still dealing in pages rather than folios.
> >
> > Signed-off-by: Lorenzo Stoakes <[email protected]>
>
> Acked-by: Vlastimil Babka <[email protected]>
>
> With some suggestion:
>
> > ---
> > mm/internal.h | 26 ++++++++++++++++----------
> > mm/mlock.c | 32 +++++++++++++++-----------------
> > mm/swap.c | 2 +-
> > 3 files changed, 32 insertions(+), 28 deletions(-)
> >
> > diff --git a/mm/internal.h b/mm/internal.h
> > index 1d6f4e168510..8a6e83315369 100644
> > --- a/mm/internal.h
> > +++ b/mm/internal.h
> > @@ -515,10 +515,9 @@ extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
> > * should be called with vma's mmap_lock held for read or write,
> > * under page table lock for the pte/pmd being added or removed.
> > *
> > - * mlock is usually called at the end of page_add_*_rmap(),
> > - * munlock at the end of page_remove_rmap(); but new anon
> > - * pages are managed by lru_cache_add_inactive_or_unevictable()
> > - * calling mlock_new_page().
> > + * mlock is usually called at the end of page_add_*_rmap(), munlock at
> > + * the end of page_remove_rmap(); but new anon folios are managed by
> > + * folio_add_lru_vma() calling mlock_new_folio().
> > *
> > * @compound is used to include pmd mappings of THPs, but filter out
> > * pte mappings of THPs, which cannot be consistently counted: a pte
> > @@ -547,15 +546,22 @@ static inline void mlock_vma_page(struct page *page,
> > mlock_vma_folio(page_folio(page), vma, compound);
> > }
> >
> > -void munlock_page(struct page *page);
> > -static inline void munlock_vma_page(struct page *page,
> > +void munlock_folio(struct folio *folio);
> > +
> > +static inline void munlock_vma_folio(struct folio *folio,
> > struct vm_area_struct *vma, bool compound)
> > {
> > if (unlikely(vma->vm_flags & VM_LOCKED) &&
> > - (compound || !PageTransCompound(page)))
> > - munlock_page(page);
> > + (compound || !folio_test_large(folio)))
> > + munlock_folio(folio);
> > +}
> > +
> > +static inline void munlock_vma_page(struct page *page,
> > + struct vm_area_struct *vma, bool compound)
> > +{
> > + munlock_vma_folio(page_folio(page), vma, compound);
> > }
> > -void mlock_new_page(struct page *page);
> > +void mlock_new_folio(struct folio *folio);
> > bool need_mlock_page_drain(int cpu);
> > void mlock_page_drain_local(void);
> > void mlock_page_drain_remote(int cpu);
>
> I think these drain related functions could use a rename as well?
> Maybe replace "page" with "fbatch" or "folio_batch"? Even the old name isn't
> great, should have been "pagevec".

Agreed, though I feel it's more readable if we just drop this bit altogether,
which is also more consistent with the core batch drain functions like
e.g. lru_add_drain().

In this case we'd go to need_mlock_drain(), mlock_drain_local() and
mlock_drain_remote().

> But maybe it would fit patch 2/5 rather than 4/5 as it's logically internal
> even if in a .h file.
>
>

Even though it is an internal interface across the board I feel like it makes
the patch series a little easier to read keeping this separate, so I think it
makes sense to keep it here so we can have a separation between
internal-to-mlock changes vs. internal-to-mm ones :)