2018-09-20 20:04:51

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 0/7] mm: faster get user pages

Changes since v1:

Fixed shift size for following huge PMD on powerpc

Updated change log with addition justification details

Keith Busch (7):
mm/gup_benchmark: Time put_page
mm/gup_benchmark: Add additional pinning methods
tools/gup_benchmark: Fix 'write' flag usage
tools/gup_benchmark: Allow user specified file
tools/gup_benchmark: Add parameter for hugetlb
mm/gup: Combine parameters into struct
mm/gup: Cache dev_pagemap while pinning pages

include/linux/huge_mm.h | 12 +-
include/linux/hugetlb.h | 2 +-
include/linux/mm.h | 27 ++-
mm/gup.c | 279 ++++++++++++++---------------
mm/gup_benchmark.c | 36 +++-
mm/huge_memory.c | 67 ++++---
mm/nommu.c | 6 +-
tools/testing/selftests/vm/gup_benchmark.c | 40 ++++-
8 files changed, 262 insertions(+), 207 deletions(-)

--
2.14.4



2018-09-20 20:03:27

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 6/7] mm/gup: Combine parameters into struct

This will make it easier to add new parameters that we may wish to
thread through these function calls.

Cc: Kirill Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dan Williams <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
include/linux/huge_mm.h | 12 +--
include/linux/hugetlb.h | 2 +-
include/linux/mm.h | 21 ++++-
mm/gup.c | 238 +++++++++++++++++++++++-------------------------
mm/huge_memory.c | 32 +++----
mm/nommu.c | 6 +-
6 files changed, 151 insertions(+), 160 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 99c19b06d9a4..7d22e2c7f154 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -212,10 +212,8 @@ static inline int hpage_nr_pages(struct page *page)
return 1;
}

-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, int flags);
-struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, int flags);
+struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd);
+struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud);

extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);

@@ -343,14 +341,12 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm)
return;
}

-static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmd, int flags)
+static inline struct page *follow_devmap_pmd(struct gup_context *ctx, pmd_t *pmd)
{
return NULL;
}

-static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
- unsigned long addr, pud_t *pud, int flags)
+static inline struct page *follow_devmap_pud(struct gup_context *ctx, pud_t *pud)
{
return NULL;
}
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b68e345f0ca..64b675863793 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -180,7 +180,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
static inline void hugetlb_show_meminfo(void)
{
}
-#define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL
+#define follow_huge_pd(ctx, hpd, pdshift) NULL
#define follow_huge_pmd(mm, addr, pmd, flags) NULL
#define follow_huge_pud(mm, addr, pud, flags) NULL
#define follow_huge_pgd(mm, addr, pgd, flags) NULL
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a61ebe8ad4ca..f1fd241c9071 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -378,6 +378,13 @@ struct vm_fault {
*/
};

+struct follow_page_context {
+ struct vm_area_struct *vma;
+ unsigned long address;
+ unsigned int page_mask;
+ unsigned int flags;
+};
+
/* page entry size for vm->huge_fault() */
enum page_entry_size {
PE_SIZE_PTE = 0,
@@ -2534,15 +2541,19 @@ static inline vm_fault_t vmf_error(int err)
return VM_FAULT_SIGBUS;
}

-struct page *follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int foll_flags,
- unsigned int *page_mask);
+struct page *follow_page_mask(struct follow_page_context *ctx);

static inline struct page *follow_page(struct vm_area_struct *vma,
unsigned long address, unsigned int foll_flags)
{
- unsigned int unused_page_mask;
- return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
+ struct follow_page_context ctx = {
+ .vma = vma,
+ .address = address,
+ .page_mask = 0,
+ .flags = foll_flags,
+ };
+
+ return follow_page_mask(&ctx);
}

#define FOLL_WRITE 0x01 /* check pte is writable */
diff --git a/mm/gup.c b/mm/gup.c
index 1abc8b4afff6..4c4da54f8dbe 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -20,8 +20,7 @@

#include "internal.h"

-static struct page *no_page_table(struct vm_area_struct *vma,
- unsigned int flags)
+static struct page *no_page_table(struct follow_page_context *ctx)
{
/*
* When core dumping an enormous anonymous area that nobody
@@ -31,28 +30,28 @@ static struct page *no_page_table(struct vm_area_struct *vma,
* But we can only make this optimization where a hole would surely
* be zero-filled if handle_mm_fault() actually did handle it.
*/
- if ((flags & FOLL_DUMP) && (!vma->vm_ops || !vma->vm_ops->fault))
+ if ((ctx->flags & FOLL_DUMP) && (!ctx->vma->vm_ops ||
+ !ctx->vma->vm_ops->fault))
return ERR_PTR(-EFAULT);
return NULL;
}

-static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
- pte_t *pte, unsigned int flags)
+static int follow_pfn_pte(struct follow_page_context *ctx, pte_t *pte)
{
/* No page to get reference */
- if (flags & FOLL_GET)
+ if (ctx->flags & FOLL_GET)
return -EFAULT;

- if (flags & FOLL_TOUCH) {
+ if (ctx->flags & FOLL_TOUCH) {
pte_t entry = *pte;

- if (flags & FOLL_WRITE)
+ if (ctx->flags & FOLL_WRITE)
entry = pte_mkdirty(entry);
entry = pte_mkyoung(entry);

if (!pte_same(*pte, entry)) {
- set_pte_at(vma->vm_mm, address, pte, entry);
- update_mmu_cache(vma, address, pte);
+ set_pte_at(ctx->vma->vm_mm, ctx->address, pte, entry);
+ update_mmu_cache(ctx->vma, ctx->address, pte);
}
}

@@ -70,10 +69,9 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}

-static struct page *follow_page_pte(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmd, unsigned int flags)
+static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t *pmd)
{
- struct mm_struct *mm = vma->vm_mm;
+ struct mm_struct *mm = ctx->vma->vm_mm;
struct dev_pagemap *pgmap = NULL;
struct page *page;
spinlock_t *ptl;
@@ -81,9 +79,9 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,

retry:
if (unlikely(pmd_bad(*pmd)))
- return no_page_table(vma, flags);
+ return no_page_table(ctx);

- ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ ptep = pte_offset_map_lock(mm, pmd, ctx->address, &ptl);
pte = *ptep;
if (!pte_present(pte)) {
swp_entry_t entry;
@@ -92,7 +90,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
* even while it is being migrated, so for that case we
* need migration_entry_wait().
*/
- if (likely(!(flags & FOLL_MIGRATION)))
+ if (likely(!(ctx->flags & FOLL_MIGRATION)))
goto no_page;
if (pte_none(pte))
goto no_page;
@@ -100,18 +98,18 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
if (!is_migration_entry(entry))
goto no_page;
pte_unmap_unlock(ptep, ptl);
- migration_entry_wait(mm, pmd, address);
+ migration_entry_wait(mm, pmd, ctx->address);
goto retry;
}
- if ((flags & FOLL_NUMA) && pte_protnone(pte))
+ if ((ctx->flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
+ if ((ctx->flags & FOLL_WRITE) && !can_follow_write_pte(pte, ctx->flags)) {
pte_unmap_unlock(ptep, ptl);
return NULL;
}

- page = vm_normal_page(vma, address, pte);
- if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
+ page = vm_normal_page(ctx->vma, ctx->address, pte);
+ if (!page && pte_devmap(pte) && (ctx->flags & FOLL_GET)) {
/*
* Only return device mapping pages in the FOLL_GET case since
* they are only valid while holding the pgmap reference.
@@ -122,7 +120,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
else
goto no_page;
} else if (unlikely(!page)) {
- if (flags & FOLL_DUMP) {
+ if (ctx->flags & FOLL_DUMP) {
/* Avoid special (like zero) pages in core dumps */
page = ERR_PTR(-EFAULT);
goto out;
@@ -133,13 +131,13 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
} else {
int ret;

- ret = follow_pfn_pte(vma, address, ptep, flags);
+ ret = follow_pfn_pte(ctx, ptep);
page = ERR_PTR(ret);
goto out;
}
}

- if (flags & FOLL_SPLIT && PageTransCompound(page)) {
+ if (ctx->flags & FOLL_SPLIT && PageTransCompound(page)) {
int ret;
get_page(page);
pte_unmap_unlock(ptep, ptl);
@@ -152,7 +150,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
goto retry;
}

- if (flags & FOLL_GET) {
+ if (ctx->flags & FOLL_GET) {
get_page(page);

/* drop the pgmap reference now that we hold the page */
@@ -161,8 +159,8 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pgmap = NULL;
}
}
- if (flags & FOLL_TOUCH) {
- if ((flags & FOLL_WRITE) &&
+ if (ctx->flags & FOLL_TOUCH) {
+ if ((ctx->flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
set_page_dirty(page);
/*
@@ -172,7 +170,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
*/
mark_page_accessed(page);
}
- if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
+ if ((ctx->flags & FOLL_MLOCK) && (ctx->vma->vm_flags & VM_LOCKED)) {
/* Do not mlock pte-mapped THP */
if (PageTransCompound(page))
goto out;
@@ -205,44 +203,42 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pte_unmap_unlock(ptep, ptl);
if (!pte_none(pte))
return NULL;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}

-static struct page *follow_pmd_mask(struct vm_area_struct *vma,
- unsigned long address, pud_t *pudp,
- unsigned int flags, unsigned int *page_mask)
+static struct page *follow_pmd_mask(struct follow_page_context *ctx, pud_t *pudp)
{
pmd_t *pmd, pmdval;
spinlock_t *ptl;
struct page *page;
- struct mm_struct *mm = vma->vm_mm;
+ struct mm_struct *mm = ctx->vma->vm_mm;

- pmd = pmd_offset(pudp, address);
+ pmd = pmd_offset(pudp, ctx->address);
/*
* The READ_ONCE() will stabilize the pmdval in a register or
* on the stack so that it will stop changing under the code.
*/
pmdval = READ_ONCE(*pmd);
if (pmd_none(pmdval))
- return no_page_table(vma, flags);
- if (pmd_huge(pmdval) && vma->vm_flags & VM_HUGETLB) {
- page = follow_huge_pmd(mm, address, pmd, flags);
+ return no_page_table(ctx);
+ if (pmd_huge(pmdval) && ctx->vma->vm_flags & VM_HUGETLB) {
+ page = follow_huge_pmd(mm, ctx->address, pmd, ctx->flags);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}
if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(pmd_val(pmdval)), flags,
- PMD_SHIFT);
+ page = follow_huge_pd(ctx->vma, ctx->address,
+ __hugepd(pmd_val(pmdval)),
+ ctx->flags, PMD_SHIFT);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}
retry:
if (!pmd_present(pmdval)) {
- if (likely(!(flags & FOLL_MIGRATION)))
- return no_page_table(vma, flags);
+ if (likely(!(ctx->flags & FOLL_MIGRATION)))
+ return no_page_table(ctx);
VM_BUG_ON(thp_migration_supported() &&
!is_pmd_migration_entry(pmdval));
if (is_pmd_migration_entry(pmdval))
@@ -253,46 +249,46 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
* mmap_sem is held in read mode
*/
if (pmd_none(pmdval))
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
goto retry;
}
if (pmd_devmap(pmdval)) {
ptl = pmd_lock(mm, pmd);
- page = follow_devmap_pmd(vma, address, pmd, flags);
+ page = follow_devmap_pmd(ctx, pmd);
spin_unlock(ptl);
if (page)
return page;
}
if (likely(!pmd_trans_huge(pmdval)))
- return follow_page_pte(vma, address, pmd, flags);
+ return follow_page_pte(ctx, pmd);

- if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
- return no_page_table(vma, flags);
+ if ((ctx->flags & FOLL_NUMA) && pmd_protnone(pmdval))
+ return no_page_table(ctx);

retry_locked:
ptl = pmd_lock(mm, pmd);
if (unlikely(pmd_none(*pmd))) {
spin_unlock(ptl);
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}
if (unlikely(!pmd_present(*pmd))) {
spin_unlock(ptl);
- if (likely(!(flags & FOLL_MIGRATION)))
- return no_page_table(vma, flags);
+ if (likely(!(ctx->flags & FOLL_MIGRATION)))
+ return no_page_table(ctx);
pmd_migration_entry_wait(mm, pmd);
goto retry_locked;
}
if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl);
- return follow_page_pte(vma, address, pmd, flags);
+ return follow_page_pte(ctx, pmd);
}
- if (flags & FOLL_SPLIT) {
+ if (ctx->flags & FOLL_SPLIT) {
int ret;
page = pmd_page(*pmd);
if (is_huge_zero_page(page)) {
spin_unlock(ptl);
ret = 0;
- split_huge_pmd(vma, pmd, address);
+ split_huge_pmd(ctx->vma, pmd, ctx->address);
if (pmd_trans_unstable(pmd))
ret = -EBUSY;
} else {
@@ -303,82 +299,76 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
unlock_page(page);
put_page(page);
if (pmd_none(*pmd))
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}

return ret ? ERR_PTR(ret) :
- follow_page_pte(vma, address, pmd, flags);
+ follow_page_pte(ctx, pmd);
}
- page = follow_trans_huge_pmd(vma, address, pmd, flags);
+ page = follow_trans_huge_pmd(ctx->vma, ctx->address, pmd, ctx->flags);
spin_unlock(ptl);
- *page_mask = HPAGE_PMD_NR - 1;
+ ctx->page_mask = HPAGE_PMD_NR - 1;
return page;
}

-
-static struct page *follow_pud_mask(struct vm_area_struct *vma,
- unsigned long address, p4d_t *p4dp,
- unsigned int flags, unsigned int *page_mask)
+static struct page *follow_pud_mask(struct follow_page_context *ctx, p4d_t *p4dp)
{
pud_t *pud;
spinlock_t *ptl;
struct page *page;
- struct mm_struct *mm = vma->vm_mm;
+ struct mm_struct *mm = ctx->vma->vm_mm;

- pud = pud_offset(p4dp, address);
+ pud = pud_offset(p4dp, ctx->address);
if (pud_none(*pud))
- return no_page_table(vma, flags);
- if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
- page = follow_huge_pud(mm, address, pud, flags);
+ return no_page_table(ctx);
+ if (pud_huge(*pud) && ctx->vma->vm_flags & VM_HUGETLB) {
+ page = follow_huge_pud(mm, ctx->address, pud, ctx->flags);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}
if (is_hugepd(__hugepd(pud_val(*pud)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(pud_val(*pud)), flags,
- PUD_SHIFT);
+ page = follow_huge_pd(ctx->vma, ctx->address,
+ __hugepd(pud_val(*pud)),
+ ctx->flags, PUD_SHIFT);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}
if (pud_devmap(*pud)) {
ptl = pud_lock(mm, pud);
- page = follow_devmap_pud(vma, address, pud, flags);
+ page = follow_devmap_pud(ctx, pud);
spin_unlock(ptl);
if (page)
return page;
}
if (unlikely(pud_bad(*pud)))
- return no_page_table(vma, flags);
+ return no_page_table(ctx);

- return follow_pmd_mask(vma, address, pud, flags, page_mask);
+ return follow_pmd_mask(ctx, pud);
}

-
-static struct page *follow_p4d_mask(struct vm_area_struct *vma,
- unsigned long address, pgd_t *pgdp,
- unsigned int flags, unsigned int *page_mask)
+static struct page *follow_p4d_mask(struct follow_page_context *ctx, pgd_t *pgdp)
{
p4d_t *p4d;
struct page *page;

- p4d = p4d_offset(pgdp, address);
+ p4d = p4d_offset(pgdp, ctx->address);
if (p4d_none(*p4d))
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
BUILD_BUG_ON(p4d_huge(*p4d));
if (unlikely(p4d_bad(*p4d)))
- return no_page_table(vma, flags);
+ return no_page_table(ctx);

if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(p4d_val(*p4d)), flags,
- P4D_SHIFT);
+ page = follow_huge_pd(ctx->vma, ctx->address,
+ __hugepd(p4d_val(*p4d)),
+ ctx->flags, P4D_SHIFT);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}
- return follow_pud_mask(vma, address, p4d, flags, page_mask);
+ return follow_pud_mask(ctx, p4d);
}

/**
@@ -394,44 +384,40 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
* an error pointer if there is a mapping to something not represented
* by a page descriptor (see also vm_normal_page()).
*/
-struct page *follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags,
- unsigned int *page_mask)
+struct page *follow_page_mask(struct follow_page_context *ctx)
{
pgd_t *pgd;
struct page *page;
- struct mm_struct *mm = vma->vm_mm;
-
- *page_mask = 0;
+ struct mm_struct *mm = ctx->vma->vm_mm;

/* make this handle hugepd */
- page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
+ page = follow_huge_addr(mm, ctx->address, ctx->flags & FOLL_WRITE);
if (!IS_ERR(page)) {
- BUG_ON(flags & FOLL_GET);
+ BUG_ON(ctx->flags & FOLL_GET);
return page;
}

- pgd = pgd_offset(mm, address);
+ pgd = pgd_offset(mm, ctx->address);

if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return no_page_table(vma, flags);
+ return no_page_table(ctx);

if (pgd_huge(*pgd)) {
- page = follow_huge_pgd(mm, address, pgd, flags);
+ page = follow_huge_pgd(mm, ctx->address, pgd, ctx->flags);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}
if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
- page = follow_huge_pd(vma, address,
- __hugepd(pgd_val(*pgd)), flags,
- PGDIR_SHIFT);
+ page = follow_huge_pd(ctx->vma, ctx->address,
+ __hugepd(pgd_val(*pgd)),
+ ctx->flags, PGDIR_SHIFT);
if (page)
return page;
- return no_page_table(vma, flags);
+ return no_page_table(ctx);
}

- return follow_p4d_mask(vma, address, pgd, flags, page_mask);
+ return follow_p4d_mask(ctx, pgd);
}

static int get_gate_page(struct mm_struct *mm, unsigned long address,
@@ -493,31 +479,31 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
* *@flags does not include FOLL_NOWAIT, the mmap_sem may be released.
* If it is, *@nonblocking will be set to 0 and -EBUSY returned.
*/
-static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
- unsigned long address, unsigned int *flags, int *nonblocking)
+static int faultin_page(struct task_struct *tsk, struct follow_page_context *ctx,
+ int *nonblocking)
{
unsigned int fault_flags = 0;
vm_fault_t ret;

/* mlock all present pages, but do not fault in new pages */
- if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+ if ((ctx->flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
return -ENOENT;
- if (*flags & FOLL_WRITE)
+ if (ctx->flags & FOLL_WRITE)
fault_flags |= FAULT_FLAG_WRITE;
- if (*flags & FOLL_REMOTE)
+ if (ctx->flags & FOLL_REMOTE)
fault_flags |= FAULT_FLAG_REMOTE;
if (nonblocking)
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
- if (*flags & FOLL_NOWAIT)
+ if (ctx->flags & FOLL_NOWAIT)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
- if (*flags & FOLL_TRIED) {
+ if (ctx->flags & FOLL_TRIED) {
VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
fault_flags |= FAULT_FLAG_TRIED;
}

- ret = handle_mm_fault(vma, address, fault_flags);
+ ret = handle_mm_fault(ctx->vma, ctx->address, fault_flags);
if (ret & VM_FAULT_ERROR) {
- int err = vm_fault_to_errno(ret, *flags);
+ int err = vm_fault_to_errno(ret, ctx->flags);

if (err)
return err;
@@ -546,8 +532,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
* which a read fault here might prevent (a readonly page might get
* reCOWed by userspace write).
*/
- if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags |= FOLL_COW;
+ if ((ret & VM_FAULT_WRITE) && !(ctx->vma->vm_flags & VM_WRITE))
+ ctx->flags |= FOLL_COW;
return 0;
}

@@ -660,8 +646,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
struct vm_area_struct **vmas, int *nonblocking)
{
long i = 0;
- unsigned int page_mask;
struct vm_area_struct *vma = NULL;
+ struct follow_page_context ctx = {};

if (!nr_pages)
return 0;
@@ -676,9 +662,9 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (!(gup_flags & FOLL_FORCE))
gup_flags |= FOLL_NUMA;

+ ctx.flags = gup_flags;
do {
struct page *page;
- unsigned int foll_flags = gup_flags;
unsigned int page_increm;

/* first iteration or cross vma bound */
@@ -691,7 +677,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
pages ? &pages[i] : NULL);
if (ret)
return i ? : ret;
- page_mask = 0;
+ ctx.page_mask = 0;
goto next_page;
}

@@ -704,6 +690,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
continue;
}
}
+ ctx.vma = vma;
+ ctx.address = start;
retry:
/*
* If we have a pending SIGKILL, don't keep faulting pages and
@@ -712,11 +700,11 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(fatal_signal_pending(current)))
return i ? i : -ERESTARTSYS;
cond_resched();
- page = follow_page_mask(vma, start, foll_flags, &page_mask);
+
+ page = follow_page_mask(&ctx);
if (!page) {
int ret;
- ret = faultin_page(tsk, vma, start, &foll_flags,
- nonblocking);
+ ret = faultin_page(tsk, &ctx, nonblocking);
switch (ret) {
case 0:
goto retry;
@@ -743,14 +731,14 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
pages[i] = page;
flush_anon_page(vma, page, start);
flush_dcache_page(page);
- page_mask = 0;
+ ctx.page_mask = 0;
}
next_page:
if (vmas) {
vmas[i] = vma;
- page_mask = 0;
+ ctx.page_mask = 0;
}
- page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
+ page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
if (page_increm > nr_pages)
page_increm = nr_pages;
i += page_increm;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 533f9b00147d..abd36e6afe2c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -851,11 +851,10 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache_pmd(vma, addr, pmd);
}

-struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, int flags)
+struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd)
{
unsigned long pfn = pmd_pfn(*pmd);
- struct mm_struct *mm = vma->vm_mm;
+ struct mm_struct *mm = ctx->vma->vm_mm;
struct dev_pagemap *pgmap;
struct page *page;

@@ -865,9 +864,9 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
* When we COW a devmap PMD entry, we split it into PTEs, so we should
* not be in this function with `flags & FOLL_COW` set.
*/
- WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
+ WARN_ONCE(ctx->flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");

- if (flags & FOLL_WRITE && !pmd_write(*pmd))
+ if (ctx->flags & FOLL_WRITE && !pmd_write(*pmd))
return NULL;

if (pmd_present(*pmd) && pmd_devmap(*pmd))
@@ -875,17 +874,17 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
else
return NULL;

- if (flags & FOLL_TOUCH)
- touch_pmd(vma, addr, pmd, flags);
+ if (ctx->flags & FOLL_TOUCH)
+ touch_pmd(ctx->vma, ctx->address, pmd, ctx->flags);

/*
* device mapped pages can only be returned if the
* caller will manage the page reference count.
*/
- if (!(flags & FOLL_GET))
+ if (!(ctx->flags & FOLL_GET))
return ERR_PTR(-EEXIST);

- pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
+ pfn += (ctx->address & ~PMD_MASK) >> PAGE_SHIFT;
pgmap = get_dev_pagemap(pfn, NULL);
if (!pgmap)
return ERR_PTR(-EFAULT);
@@ -999,17 +998,16 @@ static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache_pud(vma, addr, pud);
}

-struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, int flags)
+struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud)
{
unsigned long pfn = pud_pfn(*pud);
- struct mm_struct *mm = vma->vm_mm;
+ struct mm_struct *mm = ctx->vma->vm_mm;
struct dev_pagemap *pgmap;
struct page *page;

assert_spin_locked(pud_lockptr(mm, pud));

- if (flags & FOLL_WRITE && !pud_write(*pud))
+ if (ctx->flags & FOLL_WRITE && !pud_write(*pud))
return NULL;

if (pud_present(*pud) && pud_devmap(*pud))
@@ -1017,17 +1015,17 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
else
return NULL;

- if (flags & FOLL_TOUCH)
- touch_pud(vma, addr, pud, flags);
+ if (ctx->flags & FOLL_TOUCH)
+ touch_pud(ctx->vma, ctx->address, pud, ctx->flags);

/*
* device mapped pages can only be returned if the
* caller will manage the page reference count.
*/
- if (!(flags & FOLL_GET))
+ if (!(ctx->flags & FOLL_GET))
return ERR_PTR(-EEXIST);

- pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
+ pfn += (ctx->address & ~PUD_MASK) >> PAGE_SHIFT;
pgmap = get_dev_pagemap(pfn, NULL);
if (!pgmap)
return ERR_PTR(-EFAULT);
diff --git a/mm/nommu.c b/mm/nommu.c
index e4aac33216ae..59db9f5dbb4e 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1709,11 +1709,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
return ret;
}

-struct page *follow_page_mask(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags,
- unsigned int *page_mask)
+struct page *follow_page_mask(struct follow_page_context *ctx)
{
- *page_mask = 0;
+ ctx->page_mask = 0;
return NULL;
}

--
2.14.4


2018-09-20 20:03:36

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 5/7] tools/gup_benchmark: Add parameter for hugetlb

Cc: Kirill Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dan Williams <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
tools/testing/selftests/vm/gup_benchmark.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index f2c99e2436f8..5d96e2b3d2f1 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -38,7 +38,7 @@ int main(int argc, char **argv)
char *file = NULL;
char *p;

- while ((opt = getopt(argc, argv, "m:r:n:f:tTLU")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:f:tTLUH")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
@@ -64,6 +64,9 @@ int main(int argc, char **argv)
case 'w':
write = 1;
break;
+ case 'H':
+ flags |= MAP_HUGETLB;
+ break;
case 'f':
file = optarg;
flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
--
2.14.4


2018-09-20 20:03:45

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 3/7] tools/gup_benchmark: Fix 'write' flag usage

If the '-w' parameter was provided, the benchmark would exit due to a
mssing 'break'.

Cc: Kirill Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dan Williams <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
tools/testing/selftests/vm/gup_benchmark.c | 1 +
1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index c2f785ded9b9..b2082df8beb4 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -60,6 +60,7 @@ int main(int argc, char **argv)
break;
case 'w':
write = 1;
+ break;
default:
return -1;
}
--
2.14.4


2018-09-20 20:04:45

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 4/7] tools/gup_benchmark: Allow user specified file

The gup benchmark by default maps anonymous memory. This patch allows a
user to specify a file to map, providing a means to test various
file backings, like device and filesystem DAX.

Cc: Kirill Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dan Williams <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
tools/testing/selftests/vm/gup_benchmark.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index b2082df8beb4..f2c99e2436f8 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -33,9 +33,12 @@ int main(int argc, char **argv)
unsigned long size = 128 * MB;
int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
int cmd = GUP_FAST_BENCHMARK;
+ int file_map = -1;
+ int flags = MAP_ANONYMOUS | MAP_PRIVATE;
+ char *file = NULL;
char *p;

- while ((opt = getopt(argc, argv, "m:r:n:tTLU")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:f:tTLU")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
@@ -61,11 +64,22 @@ int main(int argc, char **argv)
case 'w':
write = 1;
break;
+ case 'f':
+ file = optarg;
+ flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
+ flags |= MAP_SHARED;
+ break;
default:
return -1;
}
}

+ if (file) {
+ file_map = open(file, O_RDWR|O_CREAT);
+ if (file_map < 0)
+ perror("open"), exit(file_map);
+ }
+
gup.nr_pages_per_call = nr_pages;
gup.flags = write;

@@ -73,8 +87,7 @@ int main(int argc, char **argv)
if (fd == -1)
perror("open"), exit(1);

- p = mmap(NULL, size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, file_map, 0);
if (p == MAP_FAILED)
perror("mmap"), exit(1);
gup.addr = (unsigned long)p;
--
2.14.4


2018-09-20 20:04:58

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 1/7] mm/gup_benchmark: Time put_page

We'd like to measure time to unpin user pages, so this adds a second
benchmark timer on put_page, separate from get_page.

Adding the field will breaks this ioctl ABI, but should be okay since
this an in-tree kernel selftest.

Cc: Kirill Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dan Williams <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
mm/gup_benchmark.c | 8 ++++++--
tools/testing/selftests/vm/gup_benchmark.c | 6 ++++--
2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 6a473709e9b6..76cd35e477af 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -8,7 +8,8 @@
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)

struct gup_benchmark {
- __u64 delta_usec;
+ __u64 get_delta_usec;
+ __u64 put_delta_usec;
__u64 addr;
__u64 size;
__u32 nr_pages_per_call;
@@ -47,14 +48,17 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
}
end_time = ktime_get();

- gup->delta_usec = ktime_us_delta(end_time, start_time);
+ gup->get_delta_usec = ktime_us_delta(end_time, start_time);
gup->size = addr - gup->addr;

+ start_time = ktime_get();
for (i = 0; i < nr_pages; i++) {
if (!pages[i])
break;
put_page(pages[i]);
}
+ end_time = ktime_get();
+ gup->put_delta_usec = ktime_us_delta(end_time, start_time);

kvfree(pages);
return 0;
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 36df55132036..bdcb97acd0ac 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -17,7 +17,8 @@
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)

struct gup_benchmark {
- __u64 delta_usec;
+ __u64 get_delta_usec;
+ __u64 put_delta_usec;
__u64 addr;
__u64 size;
__u32 nr_pages_per_call;
@@ -81,7 +82,8 @@ int main(int argc, char **argv)
if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
perror("ioctl"), exit(1);

- printf("Time: %lld us", gup.delta_usec);
+ printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
+ gup.put_delta_usec);
if (gup.size != size)
printf(", truncated (size: %lld)", gup.size);
printf("\n");
--
2.14.4


2018-09-20 20:05:50

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 7/7] mm/gup: Cache dev_pagemap while pinning pages

Pinning pages from ZONE_DEVICE memory needs to check the backing device's
live-ness, which is tracked in the device's dev_pagemap metadata. This
metadata is stored in a radix tree and looking it up adds measurable
software overhead.

This patch avoids repeating this relatively costly operation when
dev_pagemap is used by caching the last dev_pagemap while getting user
pages. The gup_benchmark reports this reduces the time to get user pages
to as low as 1/3 of the previous time.

Cc: Kirill Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dan Williams <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
include/linux/mm.h | 8 +++++++-
mm/gup.c | 41 ++++++++++++++++++++++++-----------------
mm/huge_memory.c | 35 +++++++++++++++--------------------
3 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1fd241c9071..d688e18a19c4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -380,6 +380,7 @@ struct vm_fault {

struct follow_page_context {
struct vm_area_struct *vma;
+ struct dev_pagemap *pgmap;
unsigned long address;
unsigned int page_mask;
unsigned int flags;
@@ -2546,14 +2547,19 @@ struct page *follow_page_mask(struct follow_page_context *ctx);
static inline struct page *follow_page(struct vm_area_struct *vma,
unsigned long address, unsigned int foll_flags)
{
+ struct page *page;
struct follow_page_context ctx = {
.vma = vma,
+ .pgmap = NULL,
.address = address,
.page_mask = 0,
.flags = foll_flags,
};

- return follow_page_mask(&ctx);
+ page = follow_page_mask(&ctx);
+ if (ctx.pgmap)
+ put_dev_pagemap(ctx.pgmap);
+ return page;
}

#define FOLL_WRITE 0x01 /* check pte is writable */
diff --git a/mm/gup.c b/mm/gup.c
index 4c4da54f8dbe..c98ea05eaa59 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -72,7 +72,6 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t *pmd)
{
struct mm_struct *mm = ctx->vma->vm_mm;
- struct dev_pagemap *pgmap = NULL;
struct page *page;
spinlock_t *ptl;
pte_t *ptep, pte;
@@ -114,8 +113,8 @@ static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t *pmd)
* Only return device mapping pages in the FOLL_GET case since
* they are only valid while holding the pgmap reference.
*/
- pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
- if (pgmap)
+ ctx->pgmap = get_dev_pagemap(pte_pfn(pte), ctx->pgmap);
+ if (ctx->pgmap)
page = pte_page(pte);
else
goto no_page;
@@ -154,9 +153,9 @@ static struct page *follow_page_pte(struct follow_page_context *ctx, pmd_t *pmd)
get_page(page);

/* drop the pgmap reference now that we hold the page */
- if (pgmap) {
- put_dev_pagemap(pgmap);
- pgmap = NULL;
+ if (ctx->pgmap) {
+ put_dev_pagemap(ctx->pgmap);
+ ctx->pgmap = NULL;
}
}
if (ctx->flags & FOLL_TOUCH) {
@@ -645,7 +644,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
{
- long i = 0;
+ long ret = 0, i = 0;
struct vm_area_struct *vma = NULL;
struct follow_page_context ctx = {};

@@ -681,8 +680,10 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
goto next_page;
}

- if (!vma || check_vma_flags(vma, gup_flags))
- return i ? : -EFAULT;
+ if (!vma || check_vma_flags(vma, gup_flags)) {
+ ret = -EFAULT;
+ goto out;
+ }
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
@@ -697,23 +698,25 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
* If we have a pending SIGKILL, don't keep faulting pages and
* potentially allocating memory.
*/
- if (unlikely(fatal_signal_pending(current)))
- return i ? i : -ERESTARTSYS;
+ if (unlikely(fatal_signal_pending(current))) {
+ ret = -ERESTARTSYS;
+ goto out;
+ }
cond_resched();

page = follow_page_mask(&ctx);
if (!page) {
- int ret;
ret = faultin_page(tsk, &ctx, nonblocking);
switch (ret) {
case 0:
goto retry;
+ case -EBUSY:
+ ret = 0;
+ /* FALLTHRU */
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
- return i ? i : ret;
- case -EBUSY:
- return i;
+ goto out;
case -ENOENT:
goto next_page;
}
@@ -725,7 +728,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
*/
goto next_page;
} else if (IS_ERR(page)) {
- return i ? i : PTR_ERR(page);
+ ret = PTR_ERR(page);
+ goto out;
}
if (pages) {
pages[i] = page;
@@ -745,7 +749,10 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
start += page_increm * PAGE_SIZE;
nr_pages -= page_increm;
} while (nr_pages);
- return i;
+out:
+ if (ctx.pgmap)
+ put_dev_pagemap(ctx.pgmap);
+ return i ? i : ret;
}

static bool vma_permits_fault(struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index abd36e6afe2c..6787011385ce 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -851,12 +851,23 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
update_mmu_cache_pmd(vma, addr, pmd);
}

+static struct page *pagemap_page(struct follow_page_context *ctx,
+ unsigned long pfn)
+{
+ struct page *page;
+
+ ctx->pgmap = get_dev_pagemap(pfn, ctx->pgmap);
+ if (!ctx->pgmap)
+ return ERR_PTR(-EFAULT);
+ page = pfn_to_page(pfn);
+ get_page(page);
+ return page;
+}
+
struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd)
{
unsigned long pfn = pmd_pfn(*pmd);
struct mm_struct *mm = ctx->vma->vm_mm;
- struct dev_pagemap *pgmap;
- struct page *page;

assert_spin_locked(pmd_lockptr(mm, pmd));

@@ -885,14 +896,7 @@ struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd)
return ERR_PTR(-EEXIST);

pfn += (ctx->address & ~PMD_MASK) >> PAGE_SHIFT;
- pgmap = get_dev_pagemap(pfn, NULL);
- if (!pgmap)
- return ERR_PTR(-EFAULT);
- page = pfn_to_page(pfn);
- get_page(page);
- put_dev_pagemap(pgmap);
-
- return page;
+ return pagemap_page(ctx, pfn);
}

int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1002,8 +1006,6 @@ struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud)
{
unsigned long pfn = pud_pfn(*pud);
struct mm_struct *mm = ctx->vma->vm_mm;
- struct dev_pagemap *pgmap;
- struct page *page;

assert_spin_locked(pud_lockptr(mm, pud));

@@ -1026,14 +1028,7 @@ struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud)
return ERR_PTR(-EEXIST);

pfn += (ctx->address & ~PUD_MASK) >> PAGE_SHIFT;
- pgmap = get_dev_pagemap(pfn, NULL);
- if (!pgmap)
- return ERR_PTR(-EFAULT);
- page = pfn_to_page(pfn);
- get_page(page);
- put_dev_pagemap(pgmap);
-
- return page;
+ return pagemap_page(ctx, pfn);
}

int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
--
2.14.4


2018-09-20 20:06:15

by Keith Busch

[permalink] [raw]
Subject: [PATCHv2 2/7] mm/gup_benchmark: Add additional pinning methods

This patch provides new gup benchmark ioctl commands to run different
user page pinning methods, get_user_pages_longterm and get_user_pages,
in addition to the existing get_user_pages_fast.

Cc: Kirill Shutemov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Dan Williams <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
mm/gup_benchmark.c | 28 ++++++++++++++++++++++++++--
tools/testing/selftests/vm/gup_benchmark.c | 13 +++++++++++--
2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 76cd35e477af..e6d9ce001ffa 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -6,6 +6,8 @@
#include <linux/debugfs.h>

#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
+#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
+#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)

struct gup_benchmark {
__u64 get_delta_usec;
@@ -41,7 +43,23 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
nr = (next - addr) / PAGE_SIZE;
}

- nr = get_user_pages_fast(addr, nr, gup->flags & 1, pages + i);
+ switch (cmd) {
+ case GUP_FAST_BENCHMARK:
+ nr = get_user_pages_fast(addr, nr, gup->flags & 1,
+ pages + i);
+ break;
+ case GUP_LONGTERM_BENCHMARK:
+ nr = get_user_pages_longterm(addr, nr, gup->flags & 1,
+ pages + i, NULL);
+ break;
+ case GUP_BENCHMARK:
+ nr = get_user_pages(addr, nr, gup->flags & 1, pages + i,
+ NULL);
+ break;
+ default:
+ return -1;
+ }
+
if (nr <= 0)
break;
i += nr;
@@ -70,8 +88,14 @@ static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
struct gup_benchmark gup;
int ret;

- if (cmd != GUP_FAST_BENCHMARK)
+ switch (cmd) {
+ case GUP_FAST_BENCHMARK:
+ case GUP_LONGTERM_BENCHMARK:
+ case GUP_BENCHMARK:
+ break;
+ default:
return -EINVAL;
+ }

if (copy_from_user(&gup, (void __user *)arg, sizeof(gup)))
return -EFAULT;
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index bdcb97acd0ac..c2f785ded9b9 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,6 +15,8 @@
#define PAGE_SIZE sysconf(_SC_PAGESIZE)

#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
+#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
+#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)

struct gup_benchmark {
__u64 get_delta_usec;
@@ -30,9 +32,10 @@ int main(int argc, char **argv)
struct gup_benchmark gup;
unsigned long size = 128 * MB;
int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+ int cmd = GUP_FAST_BENCHMARK;
char *p;

- while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:tTLU")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
@@ -49,6 +52,12 @@ int main(int argc, char **argv)
case 'T':
thp = 0;
break;
+ case 'L':
+ cmd = GUP_LONGTERM_BENCHMARK;
+ break;
+ case 'U':
+ cmd = GUP_BENCHMARK;
+ break;
case 'w':
write = 1;
default:
@@ -79,7 +88,7 @@ int main(int argc, char **argv)

for (i = 0; i < repeats; i++) {
gup.size = size;
- if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
+ if (ioctl(fd, cmd, &gup))
perror("ioctl"), exit(1);

printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
--
2.14.4


2018-09-21 19:46:23

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCHv2 6/7] mm/gup: Combine parameters into struct

On Thu, Sep 20, 2018 at 1:03 PM Keith Busch <[email protected]> wrote:
>
> This will make it easier to add new parameters that we may wish to
> thread through these function calls.
>
> Cc: Kirill Shutemov <[email protected]>
> Cc: Dave Hansen <[email protected]>
> Cc: Dan Williams <[email protected]>
> Signed-off-by: Keith Busch <[email protected]>
> ---
> include/linux/huge_mm.h | 12 +--
> include/linux/hugetlb.h | 2 +-
> include/linux/mm.h | 21 ++++-
> mm/gup.c | 238 +++++++++++++++++++++++-------------------------
> mm/huge_memory.c | 32 +++----
> mm/nommu.c | 6 +-
> 6 files changed, 151 insertions(+), 160 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 99c19b06d9a4..7d22e2c7f154 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -212,10 +212,8 @@ static inline int hpage_nr_pages(struct page *page)
> return 1;
> }
>
> -struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, int flags);
> -struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
> - pud_t *pud, int flags);
> +struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd);
> +struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud);
>
> extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
>
> @@ -343,14 +341,12 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm)
> return;
> }
>
> -static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
> - unsigned long addr, pmd_t *pmd, int flags)
> +static inline struct page *follow_devmap_pmd(struct gup_context *ctx, pmd_t *pmd)
> {
> return NULL;
> }
>
> -static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
> - unsigned long addr, pud_t *pud, int flags)
> +static inline struct page *follow_devmap_pud(struct gup_context *ctx, pud_t *pud)
> {
> return NULL;
> }
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 6b68e345f0ca..64b675863793 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -180,7 +180,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
> static inline void hugetlb_show_meminfo(void)
> {
> }
> -#define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL
> +#define follow_huge_pd(ctx, hpd, pdshift) NULL
> #define follow_huge_pmd(mm, addr, pmd, flags) NULL
> #define follow_huge_pud(mm, addr, pud, flags) NULL
> #define follow_huge_pgd(mm, addr, pgd, flags) NULL
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index a61ebe8ad4ca..f1fd241c9071 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -378,6 +378,13 @@ struct vm_fault {
> */
> };
>
> +struct follow_page_context {
> + struct vm_area_struct *vma;
> + unsigned long address;
> + unsigned int page_mask;
> + unsigned int flags;
> +};

I was thinking that the context would only contain the in/out
parameters of page_mask and pgmap. I don't think we plan to have more
parameters beyond this one addition and from a code readability
standpoint the less "ctx->" the better.

2018-09-21 19:47:24

by Dan Williams

[permalink] [raw]
Subject: Re: [PATCHv2 6/7] mm/gup: Combine parameters into struct

On Fri, Sep 21, 2018 at 12:45 PM Dan Williams <[email protected]> wrote:
>
> On Thu, Sep 20, 2018 at 1:03 PM Keith Busch <[email protected]> wrote:
> >
> > This will make it easier to add new parameters that we may wish to
> > thread through these function calls.
> >
> > Cc: Kirill Shutemov <[email protected]>
> > Cc: Dave Hansen <[email protected]>
> > Cc: Dan Williams <[email protected]>
> > Signed-off-by: Keith Busch <[email protected]>
> > ---
> > include/linux/huge_mm.h | 12 +--
> > include/linux/hugetlb.h | 2 +-
> > include/linux/mm.h | 21 ++++-
> > mm/gup.c | 238 +++++++++++++++++++++++-------------------------
> > mm/huge_memory.c | 32 +++----
> > mm/nommu.c | 6 +-
> > 6 files changed, 151 insertions(+), 160 deletions(-)
> >
> > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > index 99c19b06d9a4..7d22e2c7f154 100644
> > --- a/include/linux/huge_mm.h
> > +++ b/include/linux/huge_mm.h
> > @@ -212,10 +212,8 @@ static inline int hpage_nr_pages(struct page *page)
> > return 1;
> > }
> >
> > -struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
> > - pmd_t *pmd, int flags);
> > -struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
> > - pud_t *pud, int flags);
> > +struct page *follow_devmap_pmd(struct follow_page_context *ctx, pmd_t *pmd);
> > +struct page *follow_devmap_pud(struct follow_page_context *ctx, pud_t *pud);
> >
> > extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
> >
> > @@ -343,14 +341,12 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm)
> > return;
> > }
> >
> > -static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
> > - unsigned long addr, pmd_t *pmd, int flags)
> > +static inline struct page *follow_devmap_pmd(struct gup_context *ctx, pmd_t *pmd)
> > {
> > return NULL;
> > }
> >
> > -static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
> > - unsigned long addr, pud_t *pud, int flags)
> > +static inline struct page *follow_devmap_pud(struct gup_context *ctx, pud_t *pud)
> > {
> > return NULL;
> > }
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index 6b68e345f0ca..64b675863793 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -180,7 +180,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
> > static inline void hugetlb_show_meminfo(void)
> > {
> > }
> > -#define follow_huge_pd(vma, addr, hpd, flags, pdshift) NULL
> > +#define follow_huge_pd(ctx, hpd, pdshift) NULL
> > #define follow_huge_pmd(mm, addr, pmd, flags) NULL
> > #define follow_huge_pud(mm, addr, pud, flags) NULL
> > #define follow_huge_pgd(mm, addr, pgd, flags) NULL
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index a61ebe8ad4ca..f1fd241c9071 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -378,6 +378,13 @@ struct vm_fault {
> > */
> > };
> >
> > +struct follow_page_context {
> > + struct vm_area_struct *vma;
> > + unsigned long address;
> > + unsigned int page_mask;
> > + unsigned int flags;
> > +};
>
> I was thinking that the context would only contain the in/out
> parameters of page_mask and pgmap. I don't think we plan to have more
> parameters beyond this one addition and from a code readability
> standpoint the less "ctx->" the better.

Also, the next version of this series should be copied to linux-mm.

2018-09-23 03:24:53

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCHv2 6/7] mm/gup: Combine parameters into struct

Hi Keith,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.19-rc4 next-20180921]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/Keith-Busch/mm-gup_benchmark-Time-put_page/20180921-223159
config: x86_64-kexec (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

mm/gup.c: In function 'follow_pmd_mask':
>> mm/gup.c:233:32: error: macro "follow_huge_pd" passed 5 arguments, but takes just 3
ctx->flags, PMD_SHIFT);
^
>> mm/gup.c:231:10: error: 'follow_huge_pd' undeclared (first use in this function); did you mean 'follow_page_pte'?
page = follow_huge_pd(ctx->vma, ctx->address,
^~~~~~~~~~~~~~
follow_page_pte
mm/gup.c:231:10: note: each undeclared identifier is reported only once for each function it appears in
mm/gup.c: In function 'follow_pud_mask':
mm/gup.c:333:32: error: macro "follow_huge_pd" passed 5 arguments, but takes just 3
ctx->flags, PUD_SHIFT);
^
mm/gup.c:331:10: error: 'follow_huge_pd' undeclared (first use in this function); did you mean 'follow_page_pte'?
page = follow_huge_pd(ctx->vma, ctx->address,
^~~~~~~~~~~~~~
follow_page_pte
mm/gup.c: In function 'follow_p4d_mask':
mm/gup.c:366:32: error: macro "follow_huge_pd" passed 5 arguments, but takes just 3
ctx->flags, P4D_SHIFT);
^
mm/gup.c:364:10: error: 'follow_huge_pd' undeclared (first use in this function); did you mean 'follow_page_pte'?
page = follow_huge_pd(ctx->vma, ctx->address,
^~~~~~~~~~~~~~
follow_page_pte
mm/gup.c: In function 'follow_page_mask':
mm/gup.c:414:34: error: macro "follow_huge_pd" passed 5 arguments, but takes just 3
ctx->flags, PGDIR_SHIFT);
^
mm/gup.c:412:10: error: 'follow_huge_pd' undeclared (first use in this function); did you mean 'follow_page_pte'?
page = follow_huge_pd(ctx->vma, ctx->address,
^~~~~~~~~~~~~~
follow_page_pte

vim +/follow_huge_pd +233 mm/gup.c

208
209 static struct page *follow_pmd_mask(struct follow_page_context *ctx, pud_t *pudp)
210 {
211 pmd_t *pmd, pmdval;
212 spinlock_t *ptl;
213 struct page *page;
214 struct mm_struct *mm = ctx->vma->vm_mm;
215
216 pmd = pmd_offset(pudp, ctx->address);
217 /*
218 * The READ_ONCE() will stabilize the pmdval in a register or
219 * on the stack so that it will stop changing under the code.
220 */
221 pmdval = READ_ONCE(*pmd);
222 if (pmd_none(pmdval))
223 return no_page_table(ctx);
224 if (pmd_huge(pmdval) && ctx->vma->vm_flags & VM_HUGETLB) {
225 page = follow_huge_pmd(mm, ctx->address, pmd, ctx->flags);
226 if (page)
227 return page;
228 return no_page_table(ctx);
229 }
230 if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
> 231 page = follow_huge_pd(ctx->vma, ctx->address,
232 __hugepd(pmd_val(pmdval)),
> 233 ctx->flags, PMD_SHIFT);
234 if (page)
235 return page;
236 return no_page_table(ctx);
237 }
238 retry:
239 if (!pmd_present(pmdval)) {
240 if (likely(!(ctx->flags & FOLL_MIGRATION)))
241 return no_page_table(ctx);
242 VM_BUG_ON(thp_migration_supported() &&
243 !is_pmd_migration_entry(pmdval));
244 if (is_pmd_migration_entry(pmdval))
245 pmd_migration_entry_wait(mm, pmd);
246 pmdval = READ_ONCE(*pmd);
247 /*
248 * MADV_DONTNEED may convert the pmd to null because
249 * mmap_sem is held in read mode
250 */
251 if (pmd_none(pmdval))
252 return no_page_table(ctx);
253 goto retry;
254 }
255 if (pmd_devmap(pmdval)) {
256 ptl = pmd_lock(mm, pmd);
257 page = follow_devmap_pmd(ctx, pmd);
258 spin_unlock(ptl);
259 if (page)
260 return page;
261 }
262 if (likely(!pmd_trans_huge(pmdval)))
263 return follow_page_pte(ctx, pmd);
264
265 if ((ctx->flags & FOLL_NUMA) && pmd_protnone(pmdval))
266 return no_page_table(ctx);
267
268 retry_locked:
269 ptl = pmd_lock(mm, pmd);
270 if (unlikely(pmd_none(*pmd))) {
271 spin_unlock(ptl);
272 return no_page_table(ctx);
273 }
274 if (unlikely(!pmd_present(*pmd))) {
275 spin_unlock(ptl);
276 if (likely(!(ctx->flags & FOLL_MIGRATION)))
277 return no_page_table(ctx);
278 pmd_migration_entry_wait(mm, pmd);
279 goto retry_locked;
280 }
281 if (unlikely(!pmd_trans_huge(*pmd))) {
282 spin_unlock(ptl);
283 return follow_page_pte(ctx, pmd);
284 }
285 if (ctx->flags & FOLL_SPLIT) {
286 int ret;
287 page = pmd_page(*pmd);
288 if (is_huge_zero_page(page)) {
289 spin_unlock(ptl);
290 ret = 0;
291 split_huge_pmd(ctx->vma, pmd, ctx->address);
292 if (pmd_trans_unstable(pmd))
293 ret = -EBUSY;
294 } else {
295 get_page(page);
296 spin_unlock(ptl);
297 lock_page(page);
298 ret = split_huge_page(page);
299 unlock_page(page);
300 put_page(page);
301 if (pmd_none(*pmd))
302 return no_page_table(ctx);
303 }
304
305 return ret ? ERR_PTR(ret) :
306 follow_page_pte(ctx, pmd);
307 }
308 page = follow_trans_huge_pmd(ctx->vma, ctx->address, pmd, ctx->flags);
309 spin_unlock(ptl);
310 ctx->page_mask = HPAGE_PMD_NR - 1;
311 return page;
312 }
313

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (6.04 kB)
.config.gz (25.44 kB)
Download all attachments