2021-11-04 01:23:34

by Alistair Popple

[permalink] [raw]
Subject: [PATCH] mm/hmm.c: Allow VM_MIXEDMAP to work with hmm_range_fault

hmm_range_fault() can be used instead of get_user_pages() for devices
which allow faulting however unlike get_user_pages() it will return an
error when used on a VM_MIXEDMAP range.

To make hmm_range_fault() more closely match get_user_pages() remove
this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
case in hmm_vma_handle_pte(). Rather than replicating the logic of
vm_normal_page() call it directly and do a check for the zero pfn
similar to what get_user_pages() currently does.

Also add a test to hmm selftest to verify functionality.

Signed-off-by: Alistair Popple <[email protected]>
---
lib/test_hmm.c | 24 +++++++++++++++
mm/hmm.c | 5 +--
tools/testing/selftests/vm/hmm-tests.c | 42 ++++++++++++++++++++++++++
3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index c259842f6d44..ac794e354069 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -1087,9 +1087,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp,
return 0;
}

+static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ struct page *page;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ ret = vm_insert_page(vma, addr, page);
+ if (ret) {
+ __free_page(page);
+ return ret;
+ }
+ put_page(page);
+ }
+
+ return 0;
+}
+
static const struct file_operations dmirror_fops = {
.open = dmirror_fops_open,
.release = dmirror_fops_release,
+ .mmap = dmirror_fops_mmap,
.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
diff --git a/mm/hmm.c b/mm/hmm.c
index fad6be2bf072..70fa81a3b629 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -298,7 +298,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
* Since each architecture defines a struct page for the zero page, just
* fall through and treat it like a normal page.
*/
- if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) {
+ if (!vm_normal_page(walk->vma, addr, pte) &&
+ !is_zero_pfn(pte_pfn(pte))) {
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
pte_unmap(ptep);
return -EFAULT;
@@ -515,7 +516,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end,
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;

- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
vma->vm_flags & VM_READ)
return 0;

diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1248,6 +1248,48 @@ TEST_F(hmm, anon_teardown)
}
}

+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
--
2.30.2


2021-11-05 16:07:06

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH] mm/hmm.c: Allow VM_MIXEDMAP to work with hmm_range_fault

On Thu, Nov 04, 2021 at 12:20:01PM +1100, Alistair Popple wrote:
> hmm_range_fault() can be used instead of get_user_pages() for devices
> which allow faulting however unlike get_user_pages() it will return an
> error when used on a VM_MIXEDMAP range.
>
> To make hmm_range_fault() more closely match get_user_pages() remove
> this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
> case in hmm_vma_handle_pte(). Rather than replicating the logic of
> vm_normal_page() call it directly and do a check for the zero pfn
> similar to what get_user_pages() currently does.
>
> Also add a test to hmm selftest to verify functionality.

Please add a fixes line

> Signed-off-by: Alistair Popple <[email protected]>
> ---
> lib/test_hmm.c | 24 +++++++++++++++
> mm/hmm.c | 5 +--
> tools/testing/selftests/vm/hmm-tests.c | 42 ++++++++++++++++++++++++++
> 3 files changed, 69 insertions(+), 2 deletions(-)

Reviewed-by: Jason Gunthorpe <[email protected]>

Jason

2021-11-08 11:33:36

by Alistair Popple

[permalink] [raw]
Subject: Re: [PATCH] mm/hmm.c: Allow VM_MIXEDMAP to work with hmm_range_fault

On Friday, 5 November 2021 11:25:57 PM AEDT Jason Gunthorpe wrote:
> On Thu, Nov 04, 2021 at 12:20:01PM +1100, Alistair Popple wrote:
> > hmm_range_fault() can be used instead of get_user_pages() for devices
> > which allow faulting however unlike get_user_pages() it will return an
> > error when used on a VM_MIXEDMAP range.
> >
> > To make hmm_range_fault() more closely match get_user_pages() remove
> > this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
> > case in hmm_vma_handle_pte(). Rather than replicating the logic of
> > vm_normal_page() call it directly and do a check for the zero pfn
> > similar to what get_user_pages() currently does.
> >
> > Also add a test to hmm selftest to verify functionality.
>
> Please add a fixes line

This has been the case since hmm_vma_get_pfns() was first introduced. Therefore
the fixes line is:

Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")

Should I send a v2 adding that tag or does Andrew normally pick these up along
with the Reviewed-by?

Thanks.

> > Signed-off-by: Alistair Popple <[email protected]>
> > ---
> > lib/test_hmm.c | 24 +++++++++++++++
> > mm/hmm.c | 5 +--
> > tools/testing/selftests/vm/hmm-tests.c | 42 ++++++++++++++++++++++++++
> > 3 files changed, 69 insertions(+), 2 deletions(-)
>
> Reviewed-by: Jason Gunthorpe <[email protected]>
>
> Jason
>




2021-11-16 04:49:13

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] mm/hmm.c: Allow VM_MIXEDMAP to work with hmm_range_fault

On Mon, 8 Nov 2021 18:58:01 +1100 Alistair Popple <[email protected]> wrote:

> On Friday, 5 November 2021 11:25:57 PM AEDT Jason Gunthorpe wrote:
> > On Thu, Nov 04, 2021 at 12:20:01PM +1100, Alistair Popple wrote:
> > > hmm_range_fault() can be used instead of get_user_pages() for devices
> > > which allow faulting however unlike get_user_pages() it will return an
> > > error when used on a VM_MIXEDMAP range.
> > >
> > > To make hmm_range_fault() more closely match get_user_pages() remove
> > > this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
> > > case in hmm_vma_handle_pte(). Rather than replicating the logic of
> > > vm_normal_page() call it directly and do a check for the zero pfn
> > > similar to what get_user_pages() currently does.
> > >
> > > Also add a test to hmm selftest to verify functionality.
> >
> > Please add a fixes line
>
> This has been the case since hmm_vma_get_pfns() was first introduced. Therefore
> the fixes line is:
>
> Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
>
> Should I send a v2 adding that tag or does Andrew normally pick these up along
> with the Reviewed-by?

I got it.

It needed a bit of rework due to the newly-added !pte_devmap() change
in hmm_vma_handle_pte(). Please check carefully?



From: Alistair Popple <[email protected]>
Subject: mm/hmm.c: Allow VM_MIXEDMAP to work with hmm_range_fault

hmm_range_fault() can be used instead of get_user_pages() for devices
which allow faulting however unlike get_user_pages() it will return an
error when used on a VM_MIXEDMAP range.

To make hmm_range_fault() more closely match get_user_pages() remove
this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
case in hmm_vma_handle_pte(). Rather than replicating the logic of
vm_normal_page() call it directly and do a check for the zero pfn
similar to what get_user_pages() currently does.

Also add a test to hmm selftest to verify functionality.

Link: https://lkml.kernel.org/r/[email protected]
Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
Signed-off-by: Alistair Popple <[email protected]>
Reviewed-by: Jason Gunthorpe <[email protected]>
Cc: Jerome Glisse <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Zi Yan <[email protected]>
Cc: Ralph Campbell <[email protected]>
Cc: Felix Kuehling <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
---

lib/test_hmm.c | 24 +++++++++++++
mm/hmm.c | 5 +-
tools/testing/selftests/vm/hmm-tests.c | 42 +++++++++++++++++++++++
3 files changed, 69 insertions(+), 2 deletions(-)

--- a/lib/test_hmm.c~mm-hmmc-allow-vm_mixedmap-to-work-with-hmm_range_fault
+++ a/lib/test_hmm.c
@@ -1086,9 +1086,33 @@ static long dmirror_fops_unlocked_ioctl(
return 0;
}

+static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ unsigned long addr;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ struct page *page;
+ int ret;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+
+ ret = vm_insert_page(vma, addr, page);
+ if (ret) {
+ __free_page(page);
+ return ret;
+ }
+ put_page(page);
+ }
+
+ return 0;
+}
+
static const struct file_operations dmirror_fops = {
.open = dmirror_fops_open,
.release = dmirror_fops_release,
+ .mmap = dmirror_fops_mmap,
.unlocked_ioctl = dmirror_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
--- a/mm/hmm.c~mm-hmmc-allow-vm_mixedmap-to-work-with-hmm_range_fault
+++ a/mm/hmm.c
@@ -300,7 +300,8 @@ static int hmm_vma_handle_pte(struct mm_
* Since each architecture defines a struct page for the zero page, just
* fall through and treat it like a normal page.
*/
- if (pte_special(pte) && !pte_devmap(pte) &&
+ if (!vm_normal_page(walk->vma, addr, pte) &&
+ !pte_devmap(pte) &&
!is_zero_pfn(pte_pfn(pte))) {
if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
pte_unmap(ptep);
@@ -518,7 +519,7 @@ static int hmm_vma_walk_test(unsigned lo
struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;

- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
vma->vm_flags & VM_READ)
return 0;

--- a/tools/testing/selftests/vm/hmm-tests.c~mm-hmmc-allow-vm_mixedmap-to-work-with-hmm_range_fault
+++ a/tools/testing/selftests/vm/hmm-tests.c
@@ -1251,6 +1251,48 @@ TEST_F(hmm, anon_teardown)
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
TEST_F(hmm2, snapshot)
{
struct hmm_buffer *buffer;
_



2021-11-16 07:15:55

by Alistair Popple

[permalink] [raw]
Subject: Re: [PATCH] mm/hmm.c: Allow VM_MIXEDMAP to work with hmm_range_fault

On Tuesday, 16 November 2021 3:48:42 PM AEDT Andrew Morton wrote:
> On Mon, 8 Nov 2021 18:58:01 +1100 Alistair Popple <[email protected]> wrote:
>
> > On Friday, 5 November 2021 11:25:57 PM AEDT Jason Gunthorpe wrote:
> > > On Thu, Nov 04, 2021 at 12:20:01PM +1100, Alistair Popple wrote:
> > > > hmm_range_fault() can be used instead of get_user_pages() for devices
> > > > which allow faulting however unlike get_user_pages() it will return an
> > > > error when used on a VM_MIXEDMAP range.
> > > >
> > > > To make hmm_range_fault() more closely match get_user_pages() remove
> > > > this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
> > > > case in hmm_vma_handle_pte(). Rather than replicating the logic of
> > > > vm_normal_page() call it directly and do a check for the zero pfn
> > > > similar to what get_user_pages() currently does.
> > > >
> > > > Also add a test to hmm selftest to verify functionality.
> > >
> > > Please add a fixes line
> >
> > This has been the case since hmm_vma_get_pfns() was first introduced. Therefore
> > the fixes line is:
> >
> > Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
> >
> > Should I send a v2 adding that tag or does Andrew normally pick these up along
> > with the Reviewed-by?
>
> I got it.
>
> It needed a bit of rework due to the newly-added !pte_devmap() change
> in hmm_vma_handle_pte(). Please check carefully?

Thanks. The rework looks good to me - changing pte_special() to
vm_insert_page() does not change the logic of the !pte_devmap() change. In
either case the value of pte_special() or vm_insert_page() is ignored when
pte_devmap()==True as intended by the !pte_devmap() change.

For good measure I rerun the HMM selftest as well.

> From: Alistair Popple <[email protected]>
> Subject: mm/hmm.c: Allow VM_MIXEDMAP to work with hmm_range_fault
>
> hmm_range_fault() can be used instead of get_user_pages() for devices
> which allow faulting however unlike get_user_pages() it will return an
> error when used on a VM_MIXEDMAP range.
>
> To make hmm_range_fault() more closely match get_user_pages() remove
> this restriction. This requires dealing with the !ARCH_HAS_PTE_SPECIAL
> case in hmm_vma_handle_pte(). Rather than replicating the logic of
> vm_normal_page() call it directly and do a check for the zero pfn
> similar to what get_user_pages() currently does.
>
> Also add a test to hmm selftest to verify functionality.
>
> Link: https://lkml.kernel.org/r/[email protected]
> Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table")
> Signed-off-by: Alistair Popple <[email protected]>
> Reviewed-by: Jason Gunthorpe <[email protected]>
> Cc: Jerome Glisse <[email protected]>
> Cc: John Hubbard <[email protected]>
> Cc: Zi Yan <[email protected]>
> Cc: Ralph Campbell <[email protected]>
> Cc: Felix Kuehling <[email protected]>
> Signed-off-by: Andrew Morton <[email protected]>
> ---
>
> lib/test_hmm.c | 24 +++++++++++++
> mm/hmm.c | 5 +-
> tools/testing/selftests/vm/hmm-tests.c | 42 +++++++++++++++++++++++
> 3 files changed, 69 insertions(+), 2 deletions(-)
>
> --- a/lib/test_hmm.c~mm-hmmc-allow-vm_mixedmap-to-work-with-hmm_range_fault
> +++ a/lib/test_hmm.c
> @@ -1086,9 +1086,33 @@ static long dmirror_fops_unlocked_ioctl(
> return 0;
> }
>
> +static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + unsigned long addr;
> +
> + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
> + struct page *page;
> + int ret;
> +
> + page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> + if (!page)
> + return -ENOMEM;
> +
> + ret = vm_insert_page(vma, addr, page);
> + if (ret) {
> + __free_page(page);
> + return ret;
> + }
> + put_page(page);
> + }
> +
> + return 0;
> +}
> +
> static const struct file_operations dmirror_fops = {
> .open = dmirror_fops_open,
> .release = dmirror_fops_release,
> + .mmap = dmirror_fops_mmap,
> .unlocked_ioctl = dmirror_fops_unlocked_ioctl,
> .llseek = default_llseek,
> .owner = THIS_MODULE,
> --- a/mm/hmm.c~mm-hmmc-allow-vm_mixedmap-to-work-with-hmm_range_fault
> +++ a/mm/hmm.c
> @@ -300,7 +300,8 @@ static int hmm_vma_handle_pte(struct mm_
> * Since each architecture defines a struct page for the zero page, just
> * fall through and treat it like a normal page.
> */
> - if (pte_special(pte) && !pte_devmap(pte) &&
> + if (!vm_normal_page(walk->vma, addr, pte) &&
> + !pte_devmap(pte) &&
> !is_zero_pfn(pte_pfn(pte))) {
> if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
> pte_unmap(ptep);
> @@ -518,7 +519,7 @@ static int hmm_vma_walk_test(unsigned lo
> struct hmm_range *range = hmm_vma_walk->range;
> struct vm_area_struct *vma = walk->vma;
>
> - if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) &&
> + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
> vma->vm_flags & VM_READ)
> return 0;
>
> --- a/tools/testing/selftests/vm/hmm-tests.c~mm-hmmc-allow-vm_mixedmap-to-work-with-hmm_range_fault
> +++ a/tools/testing/selftests/vm/hmm-tests.c
> @@ -1251,6 +1251,48 @@ TEST_F(hmm, anon_teardown)
> /*
> * Test memory snapshot without faulting in pages accessed by the device.
> */
> +TEST_F(hmm, mixedmap)
> +{
> + struct hmm_buffer *buffer;
> + unsigned long npages;
> + unsigned long size;
> + unsigned char *m;
> + int ret;
> +
> + npages = 1;
> + size = npages << self->page_shift;
> +
> + buffer = malloc(sizeof(*buffer));
> + ASSERT_NE(buffer, NULL);
> +
> + buffer->fd = -1;
> + buffer->size = size;
> + buffer->mirror = malloc(npages);
> + ASSERT_NE(buffer->mirror, NULL);
> +
> +
> + /* Reserve a range of addresses. */
> + buffer->ptr = mmap(NULL, size,
> + PROT_READ | PROT_WRITE,
> + MAP_PRIVATE,
> + self->fd, 0);
> + ASSERT_NE(buffer->ptr, MAP_FAILED);
> +
> + /* Simulate a device snapshotting CPU pagetables. */
> + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
> + ASSERT_EQ(ret, 0);
> + ASSERT_EQ(buffer->cpages, npages);
> +
> + /* Check what the device saw. */
> + m = buffer->mirror;
> + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
> +
> + hmm_buffer_free(buffer);
> +}
> +
> +/*
> + * Test memory snapshot without faulting in pages accessed by the device.
> + */
> TEST_F(hmm2, snapshot)
> {
> struct hmm_buffer *buffer;
> _
>
>
>