2023-11-06 22:10:40

by Andrei Vagin

[permalink] [raw]
Subject: [PATCH 1/2 v2] fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl

The PAGEMAP_SCAN ioctl returns information regarding page table entries.
It is more efficient compared to reading pagemap files. CRIU can start
to utilize this ioctl, but it needs info about soft-dirty bits to track
memory changes.

We are aware of a new method for tracking memory changes implemented in
the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method
is its usability by unprivileged users. However, it is not feasible to
transparently replace the soft-dirty tracker with the new one. The main
problem here is userfault descriptors that have to be preserved between
pre-dump iterations. It means criu continues supporting the soft-dirty
method to avoid breakage for current users. The new method will be
implemented as a separate feature.

Cc: Muhammad Usama Anjum <[email protected]>
Cc: Michał Mirosław <[email protected]>
Signed-off-by: Andrei Vagin <[email protected]>
---
v2: check the soft-dirty bit in pagemap_page_category

Documentation/admin-guide/mm/pagemap.rst | 1 +
fs/proc/task_mmu.c | 17 ++++++++++++++++-
include/uapi/linux/fs.h | 1 +
3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index fe17cf210426..f5f065c67615 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -253,6 +253,7 @@ Following flags about pages are currently supported:
- ``PAGE_IS_SWAPPED`` - Page is in swapped
- ``PAGE_IS_PFNZERO`` - Page has zero PFN
- ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed
+- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty

The ``struct pm_scan_arg`` is used as the argument of the IOCTL.

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ef2eb12906da..51e0ec658457 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1761,7 +1761,7 @@ static int pagemap_release(struct inode *inode, struct file *file)
#define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
PAGE_IS_FILE | PAGE_IS_PRESENT | \
PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
- PAGE_IS_HUGE)
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY)
#define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)

struct pagemap_scan_private {
@@ -1793,6 +1793,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,

if (is_zero_pfn(pte_pfn(pte)))
categories |= PAGE_IS_PFNZERO;
+ if (pte_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
swp_entry_t swp;

@@ -1806,6 +1808,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
!PageAnon(pfn_swap_entry_to_page(swp)))
categories |= PAGE_IS_FILE;
}
+ if (pte_swp_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
}

return categories;
@@ -1853,12 +1857,16 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,

if (is_zero_pfn(pmd_pfn(pmd)))
categories |= PAGE_IS_PFNZERO;
+ if (pmd_soft_dirty(pmd))
+ categories |= PAGE_IS_SOFT_DIRTY;
} else if (is_swap_pmd(pmd)) {
swp_entry_t swp;

categories |= PAGE_IS_SWAPPED;
if (!pmd_swp_uffd_wp(pmd))
categories |= PAGE_IS_WRITTEN;
+ if (pmd_swp_soft_dirty(pmd))
+ categories |= PAGE_IS_SOFT_DIRTY;

if (p->masks_of_interest & PAGE_IS_FILE) {
swp = pmd_to_swp_entry(pmd);
@@ -1905,10 +1913,14 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
categories |= PAGE_IS_FILE;
if (is_zero_pfn(pte_pfn(pte)))
categories |= PAGE_IS_PFNZERO;
+ if (pte_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
} else if (is_swap_pte(pte)) {
categories |= PAGE_IS_SWAPPED;
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
+ if (pte_swp_soft_dirty(pte))
+ categories |= PAGE_IS_SOFT_DIRTY;
}

return categories;
@@ -1991,6 +2003,9 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
if (vma->vm_flags & VM_PFNMAP)
return 1;

+ if (vma->vm_flags & VM_SOFTDIRTY)
+ vma_category |= PAGE_IS_SOFT_DIRTY;
+
if (!pagemap_scan_is_interesting_vma(vma_category, p))
return 1;

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index da43810b7485..48ad69f7722e 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t;
#define PAGE_IS_SWAPPED (1 << 4)
#define PAGE_IS_PFNZERO (1 << 5)
#define PAGE_IS_HUGE (1 << 6)
+#define PAGE_IS_SOFT_DIRTY (1 << 7)

/*
* struct page_region - Page region with flags
--
2.42.0.869.gea05f2083d-goog


2023-11-06 22:11:07

by Andrei Vagin

[permalink] [raw]
Subject: [PATCH 2/2] selftests/mm: check that PAGEMAP_SCAN returns correct categories

Right now, tests read page flags from /proc/pid/pagemap files. With this
change, tests will check that PAGEMAP_SCAN return correct information
too.

Signed-off-by: Andrei Vagin <[email protected]>
---
tools/testing/selftests/mm/vm_util.c | 53 ++++++++++++++++++++++++++--
1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 3082b40492dd..ec3478b96e4c 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -4,6 +4,7 @@
#include <dirent.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
+#include <linux/fs.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "../kselftest.h"
@@ -28,19 +29,65 @@ uint64_t pagemap_get_entry(int fd, char *start)
return entry;
}

+static uint64_t pagemap_scan_get_categories(int fd, char *start)
+{
+ struct pm_scan_arg arg;
+ struct page_region r;
+ long ret;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + psize());
+ arg.vec = (uintptr_t)&r;
+ arg.vec_len = 1;
+ arg.flags = 0;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = 0;
+ arg.category_inverted = 0;
+ arg.category_mask = 0;
+ arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
+ arg.return_mask = arg.category_anyof_mask;
+
+ ret = ioctl(fd, PAGEMAP_SCAN, &arg);
+ if (ret < 0)
+ ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
+ if (ret == 0)
+ return 0;
+ return r.categories;
+}
+
+static bool page_entry_is(int fd, char *start, char *desc,
+ uint64_t pagemap_flags, uint64_t pagescan_flags)
+{
+ bool m, s;
+
+ m = pagemap_get_entry(fd, start) & pagemap_flags;
+ s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
+ if (m == s)
+ return m;
+
+ ksft_exit_fail_msg(
+ "read and ioctl return unmatched results for %s: %d %d", desc, m, s);
+ return m;
+}
+
bool pagemap_is_softdirty(int fd, char *start)
{
- return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY;
+ return page_entry_is(fd, start, "soft-dirty",
+ PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
}

bool pagemap_is_swapped(int fd, char *start)
{
- return pagemap_get_entry(fd, start) & PM_SWAP;
+ return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
}

bool pagemap_is_populated(int fd, char *start)
{
- return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP);
+ return page_entry_is(fd, start, "populated",
+ PM_PRESENT | PM_SWAP,
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
}

unsigned long pagemap_get_pfn(int fd, char *start)
--
2.42.0.869.gea05f2083d-goog

2023-11-07 05:50:27

by Muhammad Usama Anjum

[permalink] [raw]
Subject: Re: [PATCH 1/2 v2] fs/proc/task_mmu: report SOFT_DIRTY bits through the PAGEMAP_SCAN ioctl

On 11/7/23 3:09 AM, Andrei Vagin wrote:
> The PAGEMAP_SCAN ioctl returns information regarding page table entries.
> It is more efficient compared to reading pagemap files. CRIU can start
> to utilize this ioctl, but it needs info about soft-dirty bits to track
> memory changes.
>
> We are aware of a new method for tracking memory changes implemented in
> the PAGEMAP_SCAN ioctl. For CRIU, the primary advantage of this method
> is its usability by unprivileged users. However, it is not feasible to
> transparently replace the soft-dirty tracker with the new one. The main
> problem here is userfault descriptors that have to be preserved between
> pre-dump iterations. It means criu continues supporting the soft-dirty
> method to avoid breakage for current users. The new method will be
> implemented as a separate feature.
>
> Cc: Muhammad Usama Anjum <[email protected]>
> Cc: Michał Mirosław <[email protected]>
> Signed-off-by: Andrei Vagin <[email protected]>
Reviewed-by: Muhammad Usama Anjum <[email protected]>

> ---
> v2: check the soft-dirty bit in pagemap_page_category
>
> Documentation/admin-guide/mm/pagemap.rst | 1 +
> fs/proc/task_mmu.c | 17 ++++++++++++++++-
> include/uapi/linux/fs.h | 1 +
> 3 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
> index fe17cf210426..f5f065c67615 100644
> --- a/Documentation/admin-guide/mm/pagemap.rst
> +++ b/Documentation/admin-guide/mm/pagemap.rst
> @@ -253,6 +253,7 @@ Following flags about pages are currently supported:
> - ``PAGE_IS_SWAPPED`` - Page is in swapped
> - ``PAGE_IS_PFNZERO`` - Page has zero PFN
> - ``PAGE_IS_HUGE`` - Page is THP or Hugetlb backed
> +- ``PAGE_IS_SOFT_DIRTY`` - Page is soft-dirty
>
> The ``struct pm_scan_arg`` is used as the argument of the IOCTL.
>
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index ef2eb12906da..51e0ec658457 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1761,7 +1761,7 @@ static int pagemap_release(struct inode *inode, struct file *file)
> #define PM_SCAN_CATEGORIES (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
> PAGE_IS_FILE | PAGE_IS_PRESENT | \
> PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
> - PAGE_IS_HUGE)
> + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY)
> #define PM_SCAN_FLAGS (PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC)
>
> struct pagemap_scan_private {
> @@ -1793,6 +1793,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
>
> if (is_zero_pfn(pte_pfn(pte)))
> categories |= PAGE_IS_PFNZERO;
> + if (pte_soft_dirty(pte))
> + categories |= PAGE_IS_SOFT_DIRTY;
> } else if (is_swap_pte(pte)) {
> swp_entry_t swp;
>
> @@ -1806,6 +1808,8 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
> !PageAnon(pfn_swap_entry_to_page(swp)))
> categories |= PAGE_IS_FILE;
> }
> + if (pte_swp_soft_dirty(pte))
> + categories |= PAGE_IS_SOFT_DIRTY;
> }
>
> return categories;
> @@ -1853,12 +1857,16 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
>
> if (is_zero_pfn(pmd_pfn(pmd)))
> categories |= PAGE_IS_PFNZERO;
> + if (pmd_soft_dirty(pmd))
> + categories |= PAGE_IS_SOFT_DIRTY;
> } else if (is_swap_pmd(pmd)) {
> swp_entry_t swp;
>
> categories |= PAGE_IS_SWAPPED;
> if (!pmd_swp_uffd_wp(pmd))
> categories |= PAGE_IS_WRITTEN;
> + if (pmd_swp_soft_dirty(pmd))
> + categories |= PAGE_IS_SOFT_DIRTY;
>
> if (p->masks_of_interest & PAGE_IS_FILE) {
> swp = pmd_to_swp_entry(pmd);
> @@ -1905,10 +1913,14 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
> categories |= PAGE_IS_FILE;
> if (is_zero_pfn(pte_pfn(pte)))
> categories |= PAGE_IS_PFNZERO;
> + if (pte_soft_dirty(pte))
> + categories |= PAGE_IS_SOFT_DIRTY;
> } else if (is_swap_pte(pte)) {
> categories |= PAGE_IS_SWAPPED;
> if (!pte_swp_uffd_wp_any(pte))
> categories |= PAGE_IS_WRITTEN;
> + if (pte_swp_soft_dirty(pte))
> + categories |= PAGE_IS_SOFT_DIRTY;
> }
>
> return categories;
> @@ -1991,6 +2003,9 @@ static int pagemap_scan_test_walk(unsigned long start, unsigned long end,
> if (vma->vm_flags & VM_PFNMAP)
> return 1;
>
> + if (vma->vm_flags & VM_SOFTDIRTY)
> + vma_category |= PAGE_IS_SOFT_DIRTY;
> +
> if (!pagemap_scan_is_interesting_vma(vma_category, p))
> return 1;
>
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index da43810b7485..48ad69f7722e 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -316,6 +316,7 @@ typedef int __bitwise __kernel_rwf_t;
> #define PAGE_IS_SWAPPED (1 << 4)
> #define PAGE_IS_PFNZERO (1 << 5)
> #define PAGE_IS_HUGE (1 << 6)
> +#define PAGE_IS_SOFT_DIRTY (1 << 7)
LGTM, other than the missed the identical change in
tools/include/uapi/linux/fs.h as well.

>
> /*
> * struct page_region - Page region with flags

--
BR,
Muhammad Usama Anjum

2023-11-07 06:16:21

by Muhammad Usama Anjum

[permalink] [raw]
Subject: Re: [PATCH 2/2] selftests/mm: check that PAGEMAP_SCAN returns correct categories

On 11/7/23 3:09 AM, Andrei Vagin wrote:
> Right now, tests read page flags from /proc/pid/pagemap files. With this
> change, tests will check that PAGEMAP_SCAN return correct information
> too.
>
> Signed-off-by: Andrei Vagin <[email protected]>
Reviewed-by: Muhammad Usama Anjum <[email protected]>
Tested-by: Muhammad Usama Anjum <[email protected]>

> ---
> tools/testing/selftests/mm/vm_util.c | 53 ++++++++++++++++++++++++++--
> 1 file changed, 50 insertions(+), 3 deletions(-)
>
> diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
> index 3082b40492dd..ec3478b96e4c 100644
> --- a/tools/testing/selftests/mm/vm_util.c
> +++ b/tools/testing/selftests/mm/vm_util.c
> @@ -4,6 +4,7 @@
> #include <dirent.h>
> #include <sys/ioctl.h>
> #include <linux/userfaultfd.h>
> +#include <linux/fs.h>
> #include <sys/syscall.h>
> #include <unistd.h>
> #include "../kselftest.h"
> @@ -28,19 +29,65 @@ uint64_t pagemap_get_entry(int fd, char *start)
> return entry;
> }
>
> +static uint64_t pagemap_scan_get_categories(int fd, char *start)
> +{
> + struct pm_scan_arg arg;
> + struct page_region r;
> + long ret;
> +
> + arg.start = (uintptr_t)start;
> + arg.end = (uintptr_t)(start + psize());
> + arg.vec = (uintptr_t)&r;
> + arg.vec_len = 1;
> + arg.flags = 0;
> + arg.size = sizeof(struct pm_scan_arg);
> + arg.max_pages = 0;
> + arg.category_inverted = 0;
> + arg.category_mask = 0;
> + arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
> + PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
> + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
> + arg.return_mask = arg.category_anyof_mask;
> +
> + ret = ioctl(fd, PAGEMAP_SCAN, &arg);
> + if (ret < 0)
> + ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
> + if (ret == 0)
> + return 0;
> + return r.categories;
> +}
> +
> +static bool page_entry_is(int fd, char *start, char *desc,
> + uint64_t pagemap_flags, uint64_t pagescan_flags)
> +{
> + bool m, s;
> +
> + m = pagemap_get_entry(fd, start) & pagemap_flags;
> + s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
> + if (m == s)
> + return m;
> +
> + ksft_exit_fail_msg(
> + "read and ioctl return unmatched results for %s: %d %d", desc, m, s);
> + return m;
> +}
> +
> bool pagemap_is_softdirty(int fd, char *start)
> {
> - return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY;
> + return page_entry_is(fd, start, "soft-dirty",
> + PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
> }
>
> bool pagemap_is_swapped(int fd, char *start)
> {
> - return pagemap_get_entry(fd, start) & PM_SWAP;
> + return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
> }
>
> bool pagemap_is_populated(int fd, char *start)
> {
> - return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP);
> + return page_entry_is(fd, start, "populated",
> + PM_PRESENT | PM_SWAP,
> + PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
> }
>
> unsigned long pagemap_get_pfn(int fd, char *start)

--
BR,
Muhammad Usama Anjum