2019-11-04 22:24:48

by Ralph Campbell

[permalink] [raw]
Subject: [PATCH v4 1/2] mm/hmm: make full use of walk_page_range()

hmm_range_fault() calls find_vma() and walk_page_range() in a loop.
This is unnecessary duplication since walk_page_range() calls find_vma()
in a loop already.
Simplify hmm_range_fault() by defining a walk_test() callback function
to filter unhandled vmas.
This also fixes a bug where hmm_range_fault() was not checking
start >= vma->vm_start before checking vma->vm_flags so hmm_range_fault()
could return an error based on the wrong vma for the requested range.
It also fixes a bug when the vma has no read access and the caller did
not request a fault, there shouldn't be any error return code.

Signed-off-by: Ralph Campbell <[email protected]>
Cc: "Jérôme Glisse" <[email protected]>
Cc: Jason Gunthorpe <[email protected]>
Cc: Christoph Hellwig <[email protected]>
---
mm/hmm.c | 121 ++++++++++++++++++++++++++-----------------------------
1 file changed, 58 insertions(+), 63 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 9e9d3f4ea17c..d4984a08ed9b 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -65,18 +65,15 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
return -EFAULT;
}

-static int hmm_pfns_bad(unsigned long addr,
- unsigned long end,
- struct mm_walk *walk)
+static int hmm_pfns_fill(unsigned long addr, unsigned long end,
+ struct hmm_range *range, enum hmm_pfn_value_e value)
{
- struct hmm_vma_walk *hmm_vma_walk = walk->private;
- struct hmm_range *range = hmm_vma_walk->range;
uint64_t *pfns = range->pfns;
unsigned long i;

i = (addr - range->start) >> PAGE_SHIFT;
for (; addr < end; addr += PAGE_SIZE, i++)
- pfns[i] = range->values[HMM_PFN_ERROR];
+ pfns[i] = range->values[value];

return 0;
}
@@ -403,7 +400,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
}
return 0;
} else if (!pmd_present(pmd))
- return hmm_pfns_bad(start, end, walk);
+ return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);

if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
/*
@@ -431,7 +428,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
* recover.
*/
if (pmd_bad(pmd))
- return hmm_pfns_bad(start, end, walk);
+ return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);

ptep = pte_offset_map(pmdp, addr);
i = (addr - range->start) >> PAGE_SHIFT;
@@ -589,13 +586,47 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
#define hmm_vma_walk_hugetlb_entry NULL
#endif /* CONFIG_HUGETLB_PAGE */

-static void hmm_pfns_clear(struct hmm_range *range,
- uint64_t *pfns,
- unsigned long addr,
- unsigned long end)
+static int hmm_vma_walk_test(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
{
- for (; addr < end; addr += PAGE_SIZE, pfns++)
- *pfns = range->values[HMM_PFN_NONE];
+ struct hmm_vma_walk *hmm_vma_walk = walk->private;
+ struct hmm_range *range = hmm_vma_walk->range;
+ struct vm_area_struct *vma = walk->vma;
+
+ /*
+ * Skip vma ranges that don't have struct page backing them or
+ * map I/O devices directly.
+ */
+ if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP))
+ return -EFAULT;
+
+ /*
+ * If the vma does not allow read access, then assume that it does not
+ * allow write access either. HMM does not support architectures
+ * that allow write without read.
+ */
+ if (!(vma->vm_flags & VM_READ)) {
+ bool fault, write_fault;
+
+ /*
+ * Check to see if a fault is requested for any page in the
+ * range.
+ */
+ hmm_range_need_fault(hmm_vma_walk, range->pfns +
+ ((start - range->start) >> PAGE_SHIFT),
+ (end - start) >> PAGE_SHIFT,
+ 0, &fault, &write_fault);
+ if (fault || write_fault)
+ return -EFAULT;
+
+ hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
+ hmm_vma_walk->last = end;
+
+ /* Skip this vma and continue processing the next vma. */
+ return 1;
+ }
+
+ return 0;
}

static const struct mm_walk_ops hmm_walk_ops = {
@@ -603,6 +634,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
.pmd_entry = hmm_vma_walk_pmd,
.pte_hole = hmm_vma_walk_hole,
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
+ .test_walk = hmm_vma_walk_test,
};

/**
@@ -635,11 +667,13 @@ static const struct mm_walk_ops hmm_walk_ops = {
*/
long hmm_range_fault(struct hmm_range *range, unsigned int flags)
{
- const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
- unsigned long start = range->start, end;
- struct hmm_vma_walk hmm_vma_walk;
+ unsigned long start = range->start;
+ struct hmm_vma_walk hmm_vma_walk = {
+ .range = range,
+ .last = start,
+ .flags = flags,
+ };
struct mm_struct *mm = range->notifier->mm;
- struct vm_area_struct *vma;
int ret;

lockdep_assert_held(&mm->mmap_sem);
@@ -648,52 +682,13 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
/* If range is no longer valid force retry. */
if (mmu_range_check_retry(range->notifier, range->notifier_seq))
return -EBUSY;
+ ret = walk_page_range(mm, start, range->end, &hmm_walk_ops,
+ &hmm_vma_walk);
+ start = hmm_vma_walk.last;
+ } while (ret == -EBUSY);

- vma = find_vma(mm, start);
- if (vma == NULL || (vma->vm_flags & device_vma))
- return -EFAULT;
-
- if (!(vma->vm_flags & VM_READ)) {
- /*
- * If vma do not allow read access, then assume that it
- * does not allow write access, either. HMM does not
- * support architecture that allow write without read.
- */
- hmm_pfns_clear(range, range->pfns,
- range->start, range->end);
- return -EPERM;
- }
-
- hmm_vma_walk.pgmap = NULL;
- hmm_vma_walk.last = start;
- hmm_vma_walk.flags = flags;
- hmm_vma_walk.range = range;
- end = min(range->end, vma->vm_end);
-
- walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
- &hmm_vma_walk);
-
- do {
- ret = walk_page_range(vma->vm_mm, start, end,
- &hmm_walk_ops, &hmm_vma_walk);
- start = hmm_vma_walk.last;
-
- /* Keep trying while the range is valid. */
- } while (ret == -EBUSY &&
- !mmu_range_check_retry(range->notifier,
- range->notifier_seq));
-
- if (ret) {
- unsigned long i;
-
- i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
- hmm_pfns_clear(range, &range->pfns[i],
- hmm_vma_walk.last, range->end);
- return ret;
- }
- start = end;
-
- } while (start < range->end);
+ if (ret)
+ return ret;

return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
}
--
2.20.1


2019-11-12 15:20:40

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] mm/hmm: make full use of walk_page_range()

Looks good,

Reviewed-by: Christoph Hellwig <[email protected]>

Although we could clean this up a tidbit more by removing the start
variable:

diff --git a/mm/hmm.c b/mm/hmm.c
index d4984a08ed9b..b5b1ed646c2f 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -667,10 +667,9 @@ static const struct mm_walk_ops hmm_walk_ops = {
*/
long hmm_range_fault(struct hmm_range *range, unsigned int flags)
{
- unsigned long start = range->start;
struct hmm_vma_walk hmm_vma_walk = {
.range = range,
- .last = start,
+ .last = range->start,
.flags = flags,
};
struct mm_struct *mm = range->notifier->mm;
@@ -682,9 +681,8 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
/* If range is no longer valid force retry. */
if (mmu_range_check_retry(range->notifier, range->notifier_seq))
return -EBUSY;
- ret = walk_page_range(mm, start, range->end, &hmm_walk_ops,
- &hmm_vma_walk);
- start = hmm_vma_walk.last;
+ ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
+ &hmm_walk_ops, &hmm_vma_walk);
} while (ret == -EBUSY);

if (ret)

2019-11-12 22:23:12

by Ralph Campbell

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] mm/hmm: make full use of walk_page_range()


On 11/12/19 7:18 AM, Christoph Hellwig wrote:
> Looks good,
>
> Reviewed-by: Christoph Hellwig <[email protected]>
>
> Although we could clean this up a tidbit more by removing the start
> variable:
>
> diff --git a/mm/hmm.c b/mm/hmm.c
> index d4984a08ed9b..b5b1ed646c2f 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -667,10 +667,9 @@ static const struct mm_walk_ops hmm_walk_ops = {
> */
> long hmm_range_fault(struct hmm_range *range, unsigned int flags)
> {
> - unsigned long start = range->start;
> struct hmm_vma_walk hmm_vma_walk = {
> .range = range,
> - .last = start,
> + .last = range->start,
> .flags = flags,
> };
> struct mm_struct *mm = range->notifier->mm;
> @@ -682,9 +681,8 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
> /* If range is no longer valid force retry. */
> if (mmu_range_check_retry(range->notifier, range->notifier_seq))
> return -EBUSY;
> - ret = walk_page_range(mm, start, range->end, &hmm_walk_ops,
> - &hmm_vma_walk);
> - start = hmm_vma_walk.last;
> + ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
> + &hmm_walk_ops, &hmm_vma_walk);
> } while (ret == -EBUSY);
>
> if (ret)
>

Thanks for the review.
I'll add this to patch 1 since I need to send a v5 for patch 2.

2019-11-14 14:25:20

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH v4 1/2] mm/hmm: make full use of walk_page_range()

On Mon, Nov 04, 2019 at 02:21:40PM -0800, Ralph Campbell wrote:
> hmm_range_fault() calls find_vma() and walk_page_range() in a loop.
> This is unnecessary duplication since walk_page_range() calls find_vma()
> in a loop already.
> Simplify hmm_range_fault() by defining a walk_test() callback function
> to filter unhandled vmas.
> This also fixes a bug where hmm_range_fault() was not checking
> start >= vma->vm_start before checking vma->vm_flags so hmm_range_fault()
> could return an error based on the wrong vma for the requested range.
> It also fixes a bug when the vma has no read access and the caller did
> not request a fault, there shouldn't be any error return code.
>
> Signed-off-by: Ralph Campbell <[email protected]>
> Cc: "Jérôme Glisse" <[email protected]>
> Cc: Jason Gunthorpe <[email protected]>
> Cc: Christoph Hellwig <[email protected]>
> Reviewed-by: Christoph Hellwig <[email protected]>
> ---
> mm/hmm.c | 121 ++++++++++++++++++++++++++-----------------------------
> 1 file changed, 58 insertions(+), 63 deletions(-)

Applied to hmm.git with Christoph's hunk merged in

Thanks,
Jason