2023-05-17 02:30:13

by Peter Collingbourne

[permalink] [raw]
Subject: [PATCH v3 0/3] mm: Fix bug affecting swapping in MTE tagged pages

This patch series reworks the logic that handles swapping in page
metadata to fix a reported bug [1] where metadata can sometimes not
be swapped in correctly after commit c145e0b47c77 ("mm: streamline COW
logic in do_swap_page()").

- Patch 1 fixes the bug itself, but still requires architectures
to restore metadata in both arch_swap_restore() and set_pte_at().

- Patch 2 makes it so that architectures only need to restore metadata
in arch_swap_restore().

- Patch 3 changes arm64 to remove support for restoring metadata
in set_pte_at().

[1] https://lore.kernel.org/all/[email protected]/

v3:
- Added patch to call arch_swap_restore() from unuse_pte()
- Rebased onto arm64/for-next/fixes

v2:
- Call arch_swap_restore() directly instead of via arch_do_swap_page()

Peter Collingbourne (3):
mm: Call arch_swap_restore() from do_swap_page()
mm: Call arch_swap_restore() from unuse_pte()
arm64: mte: Simplify swap tag restoration logic

arch/arm64/include/asm/mte.h | 4 ++--
arch/arm64/include/asm/pgtable.h | 14 ++----------
arch/arm64/kernel/mte.c | 37 ++++++--------------------------
arch/arm64/mm/mteswap.c | 7 +++---
mm/memory.c | 7 ++++++
mm/swapfile.c | 7 ++++++
6 files changed, 28 insertions(+), 48 deletions(-)

--
2.40.1.606.ga4b1b128d6-goog



2023-05-17 02:51:18

by Peter Collingbourne

[permalink] [raw]
Subject: [PATCH v3 3/3] arm64: mte: Simplify swap tag restoration logic

As a result of the previous two patches, there are no circumstances
in which a swapped-in page is installed in a page table without first
having arch_swap_restore() called on it. Therefore, we no longer need
the logic in set_pte_at() that restores the tags, so remove it.

Because we can now rely on the page being locked, we no longer need to
handle the case where a page is having its tags restored by multiple tasks
concurrently, so we can slightly simplify the logic in mte_restore_tags().

Signed-off-by: Peter Collingbourne <[email protected]>
Link: https://linux-review.googlesource.com/id/I8ad54476f3b2d0144ccd8ce0c1d7a2963e5ff6f3
---
v3:
- Rebased onto arm64/for-next/fixes, which already has a fix
for the issue previously tagged, therefore removed Fixes:
tag

arch/arm64/include/asm/mte.h | 4 ++--
arch/arm64/include/asm/pgtable.h | 14 ++----------
arch/arm64/kernel/mte.c | 37 ++++++--------------------------
arch/arm64/mm/mteswap.c | 7 +++---
4 files changed, 14 insertions(+), 48 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index c028afb1cd0b..4cedbaa16f41 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
}

void mte_zero_clear_page_tags(void *addr);
-void mte_sync_tags(pte_t old_pte, pte_t pte);
+void mte_sync_tags(pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
@@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
static inline void mte_zero_clear_page_tags(void *addr)
{
}
-static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
+static inline void mte_sync_tags(pte_t pte)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0bd18de9fd97..e8a252e62b12 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -337,18 +337,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
* don't expose tags (instruction fetches don't check tags).
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
- !pte_special(pte)) {
- pte_t old_pte = READ_ONCE(*ptep);
- /*
- * We only need to synchronise if the new PTE has tags enabled
- * or if swapping in (in which case another mapping may have
- * set tags in the past even if this PTE isn't tagged).
- * (!pte_none() && !pte_present()) is an open coded version of
- * is_swap_pte()
- */
- if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
- mte_sync_tags(old_pte, pte);
- }
+ !pte_special(pte) && pte_tagged(pte))
+ mte_sync_tags(pte);

__check_safe_pte_update(mm, ptep, pte);

diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 7e89968bd282..c40728046fed 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -35,41 +35,18 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
#endif

-static void mte_sync_page_tags(struct page *page, pte_t old_pte,
- bool check_swap, bool pte_is_tagged)
-{
- if (check_swap && is_swap_pte(old_pte)) {
- swp_entry_t entry = pte_to_swp_entry(old_pte);
-
- if (!non_swap_entry(entry))
- mte_restore_tags(entry, page);
- }
-
- if (!pte_is_tagged)
- return;
-
- if (try_page_mte_tagging(page)) {
- mte_clear_page_tags(page_address(page));
- set_page_mte_tagged(page);
- }
-}
-
-void mte_sync_tags(pte_t old_pte, pte_t pte)
+void mte_sync_tags(pte_t pte)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
- bool check_swap = nr_pages == 1;
- bool pte_is_tagged = pte_tagged(pte);
-
- /* Early out if there's nothing to do */
- if (!check_swap && !pte_is_tagged)
- return;

/* if PG_mte_tagged is set, tags have already been initialised */
- for (i = 0; i < nr_pages; i++, page++)
- if (!page_mte_tagged(page))
- mte_sync_page_tags(page, old_pte, check_swap,
- pte_is_tagged);
+ for (i = 0; i < nr_pages; i++, page++) {
+ if (try_page_mte_tagging(page)) {
+ mte_clear_page_tags(page_address(page));
+ set_page_mte_tagged(page);
+ }
+ }

/* ensure the tags are visible before the PTE is set */
smp_wmb();
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index cd508ba80ab1..3a78bf1b1364 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -53,10 +53,9 @@ void mte_restore_tags(swp_entry_t entry, struct page *page)
if (!tags)
return;

- if (try_page_mte_tagging(page)) {
- mte_restore_page_tags(page_address(page), tags);
- set_page_mte_tagged(page);
- }
+ WARN_ON_ONCE(!try_page_mte_tagging(page));
+ mte_restore_page_tags(page_address(page), tags);
+ set_page_mte_tagged(page);
}

void mte_invalidate_tags(int type, pgoff_t offset)
--
2.40.1.606.ga4b1b128d6-goog


2023-05-17 03:21:06

by Peter Collingbourne

[permalink] [raw]
Subject: [PATCH v3 2/3] mm: Call arch_swap_restore() from unuse_pte()

We would like to move away from requiring architectures to restore
metadata from swap in the set_pte_at() implementation, as this is not only
error-prone but adds complexity to the arch-specific code. This requires
us to call arch_swap_restore() before calling swap_free() whenever pages
are restored from swap. We are currently doing so everywhere except in
unuse_pte(); do so there as well.

Signed-off-by: Peter Collingbourne <[email protected]>
Link: https://linux-review.googlesource.com/id/I68276653e612d64cde271ce1b5a99ae05d6bbc4f
---
mm/swapfile.c | 7 +++++++
1 file changed, 7 insertions(+)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 274bbf797480..e9843fadecd6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1794,6 +1794,13 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
goto setpte;
}

+ /*
+ * Some architectures may have to restore extra metadata to the page
+ * when reading from swap. This metadata may be indexed by swap entry
+ * so this must be called before swap_free().
+ */
+ arch_swap_restore(entry, page_folio(page));
+
/* See do_swap_page() */
BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
BUG_ON(PageAnon(page) && PageAnonExclusive(page));
--
2.40.1.606.ga4b1b128d6-goog


2023-05-17 09:27:23

by David Hildenbrand

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: Call arch_swap_restore() from unuse_pte()

On 17.05.23 04:21, Peter Collingbourne wrote:
> We would like to move away from requiring architectures to restore
> metadata from swap in the set_pte_at() implementation, as this is not only
> error-prone but adds complexity to the arch-specific code. This requires
> us to call arch_swap_restore() before calling swap_free() whenever pages
> are restored from swap. We are currently doing so everywhere except in
> unuse_pte(); do so there as well.
>
> Signed-off-by: Peter Collingbourne <[email protected]>
> Link: https://linux-review.googlesource.com/id/I68276653e612d64cde271ce1b5a99ae05d6bbc4f
> ---
> mm/swapfile.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 274bbf797480..e9843fadecd6 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1794,6 +1794,13 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
> goto setpte;
> }
>
> + /*
> + * Some architectures may have to restore extra metadata to the page
> + * when reading from swap. This metadata may be indexed by swap entry
> + * so this must be called before swap_free().
> + */
> + arch_swap_restore(entry, page_folio(page));
> +
> /* See do_swap_page() */
> BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
> BUG_ON(PageAnon(page) && PageAnonExclusive(page));

Acked-by: David Hildenbrand <[email protected]>

--
Thanks,

David / dhildenb


2023-05-17 15:05:59

by Steven Price

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: Call arch_swap_restore() from unuse_pte()

On 17/05/2023 03:21, Peter Collingbourne wrote:
> We would like to move away from requiring architectures to restore
> metadata from swap in the set_pte_at() implementation, as this is not only
> error-prone but adds complexity to the arch-specific code. This requires
> us to call arch_swap_restore() before calling swap_free() whenever pages
> are restored from swap. We are currently doing so everywhere except in
> unuse_pte(); do so there as well.
>
> Signed-off-by: Peter Collingbourne <[email protected]>
> Link: https://linux-review.googlesource.com/id/I68276653e612d64cde271ce1b5a99ae05d6bbc4f

Reviewed-by: Steven Price <[email protected]>

> ---
> mm/swapfile.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 274bbf797480..e9843fadecd6 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1794,6 +1794,13 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
> goto setpte;
> }
>
> + /*
> + * Some architectures may have to restore extra metadata to the page
> + * when reading from swap. This metadata may be indexed by swap entry
> + * so this must be called before swap_free().
> + */
> + arch_swap_restore(entry, page_folio(page));
> +
> /* See do_swap_page() */
> BUG_ON(!PageAnon(page) && PageMappedToDisk(page));
> BUG_ON(PageAnon(page) && PageAnonExclusive(page));


2023-05-17 15:29:19

by Steven Price

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] arm64: mte: Simplify swap tag restoration logic

On 17/05/2023 03:21, Peter Collingbourne wrote:
> As a result of the previous two patches, there are no circumstances
> in which a swapped-in page is installed in a page table without first
> having arch_swap_restore() called on it. Therefore, we no longer need
> the logic in set_pte_at() that restores the tags, so remove it.
>
> Because we can now rely on the page being locked, we no longer need to
> handle the case where a page is having its tags restored by multiple tasks
> concurrently, so we can slightly simplify the logic in mte_restore_tags().
>
> Signed-off-by: Peter Collingbourne <[email protected]>
> Link: https://linux-review.googlesource.com/id/I8ad54476f3b2d0144ccd8ce0c1d7a2963e5ff6f3

This is much neater, thanks for figuring out a better way of
implementing it. The set_pte_at() thing always felt like a hack, but it
was always there for the non-swap case and I obviously never figured out
a better solution.

Reviewed-by: Steven Price <[email protected]>

> ---
> v3:
> - Rebased onto arm64/for-next/fixes, which already has a fix
> for the issue previously tagged, therefore removed Fixes:
> tag
>
> arch/arm64/include/asm/mte.h | 4 ++--
> arch/arm64/include/asm/pgtable.h | 14 ++----------
> arch/arm64/kernel/mte.c | 37 ++++++--------------------------
> arch/arm64/mm/mteswap.c | 7 +++---
> 4 files changed, 14 insertions(+), 48 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index c028afb1cd0b..4cedbaa16f41 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -90,7 +90,7 @@ static inline bool try_page_mte_tagging(struct page *page)
> }
>
> void mte_zero_clear_page_tags(void *addr);
> -void mte_sync_tags(pte_t old_pte, pte_t pte);
> +void mte_sync_tags(pte_t pte);
> void mte_copy_page_tags(void *kto, const void *kfrom);
> void mte_thread_init_user(void);
> void mte_thread_switch(struct task_struct *next);
> @@ -122,7 +122,7 @@ static inline bool try_page_mte_tagging(struct page *page)
> static inline void mte_zero_clear_page_tags(void *addr)
> {
> }
> -static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
> +static inline void mte_sync_tags(pte_t pte)
> {
> }
> static inline void mte_copy_page_tags(void *kto, const void *kfrom)
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 0bd18de9fd97..e8a252e62b12 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -337,18 +337,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
> * don't expose tags (instruction fetches don't check tags).
> */
> if (system_supports_mte() && pte_access_permitted(pte, false) &&
> - !pte_special(pte)) {
> - pte_t old_pte = READ_ONCE(*ptep);
> - /*
> - * We only need to synchronise if the new PTE has tags enabled
> - * or if swapping in (in which case another mapping may have
> - * set tags in the past even if this PTE isn't tagged).
> - * (!pte_none() && !pte_present()) is an open coded version of
> - * is_swap_pte()
> - */
> - if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
> - mte_sync_tags(old_pte, pte);
> - }
> + !pte_special(pte) && pte_tagged(pte))
> + mte_sync_tags(pte);
>
> __check_safe_pte_update(mm, ptep, pte);
>
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 7e89968bd282..c40728046fed 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -35,41 +35,18 @@ DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
> EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
> #endif
>
> -static void mte_sync_page_tags(struct page *page, pte_t old_pte,
> - bool check_swap, bool pte_is_tagged)
> -{
> - if (check_swap && is_swap_pte(old_pte)) {
> - swp_entry_t entry = pte_to_swp_entry(old_pte);
> -
> - if (!non_swap_entry(entry))
> - mte_restore_tags(entry, page);
> - }
> -
> - if (!pte_is_tagged)
> - return;
> -
> - if (try_page_mte_tagging(page)) {
> - mte_clear_page_tags(page_address(page));
> - set_page_mte_tagged(page);
> - }
> -}
> -
> -void mte_sync_tags(pte_t old_pte, pte_t pte)
> +void mte_sync_tags(pte_t pte)
> {
> struct page *page = pte_page(pte);
> long i, nr_pages = compound_nr(page);
> - bool check_swap = nr_pages == 1;
> - bool pte_is_tagged = pte_tagged(pte);
> -
> - /* Early out if there's nothing to do */
> - if (!check_swap && !pte_is_tagged)
> - return;
>
> /* if PG_mte_tagged is set, tags have already been initialised */
> - for (i = 0; i < nr_pages; i++, page++)
> - if (!page_mte_tagged(page))
> - mte_sync_page_tags(page, old_pte, check_swap,
> - pte_is_tagged);
> + for (i = 0; i < nr_pages; i++, page++) {
> + if (try_page_mte_tagging(page)) {
> + mte_clear_page_tags(page_address(page));
> + set_page_mte_tagged(page);
> + }
> + }
>
> /* ensure the tags are visible before the PTE is set */
> smp_wmb();
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index cd508ba80ab1..3a78bf1b1364 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -53,10 +53,9 @@ void mte_restore_tags(swp_entry_t entry, struct page *page)
> if (!tags)
> return;
>
> - if (try_page_mte_tagging(page)) {
> - mte_restore_page_tags(page_address(page), tags);
> - set_page_mte_tagged(page);
> - }
> + WARN_ON_ONCE(!try_page_mte_tagging(page));
> + mte_restore_page_tags(page_address(page), tags);
> + set_page_mte_tagged(page);
> }
>
> void mte_invalidate_tags(int type, pgoff_t offset)


2023-05-19 17:05:16

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] arm64: mte: Simplify swap tag restoration logic

On Tue, May 16, 2023 at 07:21:13PM -0700, Peter Collingbourne wrote:
> As a result of the previous two patches, there are no circumstances
> in which a swapped-in page is installed in a page table without first
> having arch_swap_restore() called on it. Therefore, we no longer need
> the logic in set_pte_at() that restores the tags, so remove it.
>
> Because we can now rely on the page being locked, we no longer need to
> handle the case where a page is having its tags restored by multiple tasks
> concurrently, so we can slightly simplify the logic in mte_restore_tags().
[...]
> diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> index cd508ba80ab1..3a78bf1b1364 100644
> --- a/arch/arm64/mm/mteswap.c
> +++ b/arch/arm64/mm/mteswap.c
> @@ -53,10 +53,9 @@ void mte_restore_tags(swp_entry_t entry, struct page *page)
> if (!tags)
> return;
>
> - if (try_page_mte_tagging(page)) {
> - mte_restore_page_tags(page_address(page), tags);
> - set_page_mte_tagged(page);
> - }
> + WARN_ON_ONCE(!try_page_mte_tagging(page));
> + mte_restore_page_tags(page_address(page), tags);
> + set_page_mte_tagged(page);
> }

Can we have a situation where two processes share the same swap pte
(CoW) and they both enter the do_swap_page() or the unuse_pte() paths
triggering this warning?

Other than that, the looks nice, it simplifies the logic and probably
saves a few cycles as well on the set_pte_at() path.

Reviewed-by: Catalin Marinas <[email protected]>

2023-05-19 17:05:22

by Catalin Marinas

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] mm: Call arch_swap_restore() from unuse_pte()

On Tue, May 16, 2023 at 07:21:12PM -0700, Peter Collingbourne wrote:
> We would like to move away from requiring architectures to restore
> metadata from swap in the set_pte_at() implementation, as this is not only
> error-prone but adds complexity to the arch-specific code. This requires
> us to call arch_swap_restore() before calling swap_free() whenever pages
> are restored from swap. We are currently doing so everywhere except in
> unuse_pte(); do so there as well.
>
> Signed-off-by: Peter Collingbourne <[email protected]>
> Link: https://linux-review.googlesource.com/id/I68276653e612d64cde271ce1b5a99ae05d6bbc4f

Acked-by: Catalin Marinas <[email protected]>

2023-05-23 00:39:21

by Peter Collingbourne

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] arm64: mte: Simplify swap tag restoration logic

Hi Catalin,

On Fri, May 19, 2023 at 9:54 AM Catalin Marinas <[email protected]> wrote:
>
> On Tue, May 16, 2023 at 07:21:13PM -0700, Peter Collingbourne wrote:
> > As a result of the previous two patches, there are no circumstances
> > in which a swapped-in page is installed in a page table without first
> > having arch_swap_restore() called on it. Therefore, we no longer need
> > the logic in set_pte_at() that restores the tags, so remove it.
> >
> > Because we can now rely on the page being locked, we no longer need to
> > handle the case where a page is having its tags restored by multiple tasks
> > concurrently, so we can slightly simplify the logic in mte_restore_tags().
> [...]
> > diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
> > index cd508ba80ab1..3a78bf1b1364 100644
> > --- a/arch/arm64/mm/mteswap.c
> > +++ b/arch/arm64/mm/mteswap.c
> > @@ -53,10 +53,9 @@ void mte_restore_tags(swp_entry_t entry, struct page *page)
> > if (!tags)
> > return;
> >
> > - if (try_page_mte_tagging(page)) {
> > - mte_restore_page_tags(page_address(page), tags);
> > - set_page_mte_tagged(page);
> > - }
> > + WARN_ON_ONCE(!try_page_mte_tagging(page));
> > + mte_restore_page_tags(page_address(page), tags);
> > + set_page_mte_tagged(page);
> > }
>
> Can we have a situation where two processes share the same swap pte
> (CoW) and they both enter the do_swap_page() or the unuse_pte() paths
> triggering this warning?

Having examined the code more closely, I realized that this is
possible with two do_swap_page() calls on CoW shared pages (or
do_swap_page() followed by unuse_pte()), because the swapcache page
will be shared between the tasks and so they will both call
arch_swap_restore() on the same page. I was able to provoke the
warning with the following program:

#include <sys/mman.h>
#include <unistd.h>

int main() {
char *p = mmap(0, 4096, PROT_READ|PROT_WRITE|PROT_MTE,
MAP_ANON|MAP_PRIVATE, -1, 0);
p[0] = 1;
madvise(p, 4096, MADV_PAGEOUT);
fork();
return p[0];
}

I will send a v4 with this hunk removed.

> Other than that, the looks nice, it simplifies the logic and probably
> saves a few cycles as well on the set_pte_at() path.
>
> Reviewed-by: Catalin Marinas <[email protected]>

Thanks for the review!

Peter