API __get_user_pages_fast() renamed to get_user_pages_fast_only()
to align with pin_user_pages_fast_only().
As part of this we will get rid of write parameter. Instead caller
will pass FOLL_WRITE to get_user_pages_fast_only(). This will not
change any existing functionality of the API.
All the callers are changed to pass FOLL_WRITE.
There are few places where 1 is passed to 2nd parameter of
__get_user_pages_fast() and return value is checked for 1
like [1]. Those are replaced with new inline
get_user_page_fast_only().
[1] if (__get_user_pages_fast(hva, 1, 1, &page) == 1)
Updated the documentation of the API.
Signed-off-by: Souptick Joarder <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Matthew Wilcox <[email protected]>
---
v2:
Updated the subject line and change log.
Address Matthew's comment to fix a bug and added
new inline get_user_page_fast_only().
arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
arch/powerpc/kvm/book3s_64_mmu_radix.c | 2 +-
arch/powerpc/perf/callchain_64.c | 4 +---
include/linux/mm.h | 10 ++++++++--
kernel/events/core.c | 4 ++--
mm/gup.c | 29 ++++++++++++++++-------------
virt/kvm/kvm_main.c | 8 +++-----
7 files changed, 32 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 18aed97..ddfc4c9 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -581,7 +581,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
* We always ask for write permission since the common case
* is that the page is writable.
*/
- if (__get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ if (get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
write_ok = true;
} else {
/* Call KVM generic code to do the slow-path check */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 3248f78..5d4c087 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -795,7 +795,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
* is that the page is writable.
*/
hva = gfn_to_hva_memslot(memslot, gfn);
- if (!kvm_ro && __get_user_pages_fast(hva, 1, 1, &page) == 1) {
+ if (!kvm_ro && get_user_page_fast_only(hva, FOLL_WRITE, &page)) {
upgrade_write = true;
} else {
unsigned long pfn;
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
index 1bff896d..814d1c2 100644
--- a/arch/powerpc/perf/callchain_64.c
+++ b/arch/powerpc/perf/callchain_64.c
@@ -29,11 +29,9 @@ int read_user_stack_slow(void __user *ptr, void *buf, int nb)
unsigned long addr = (unsigned long) ptr;
unsigned long offset;
struct page *page;
- int nrpages;
void *kaddr;
- nrpages = __get_user_pages_fast(addr, 1, 1, &page);
- if (nrpages == 1) {
+ if (get_user_page_fast_only(addr, FOLL_WRITE, &page)) {
kaddr = page_address(page);
/* align address to page boundary */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 93d93bd..8d4597f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1817,10 +1817,16 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
/*
* doesn't attempt to fault and will return short.
*/
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages);
+int get_user_pages_fast_only(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages);
int pin_user_pages_fast_only(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
+
+static inline bool get_user_page_fast_only(unsigned long addr,
+ unsigned int gup_flags, struct page **pagep)
+{
+ return get_user_pages_fast_only(addr, 1, gup_flags, pagep) == 1;
+}
/*
* per-process(per-mm_struct) statistics.
*/
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c94eb27..856d98c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6934,12 +6934,12 @@ static u64 perf_virt_to_phys(u64 virt)
* Walking the pages tables for user address.
* Interrupts are disabled, so it prevents any tear down
* of the page tables.
- * Try IRQ-safe __get_user_pages_fast first.
+ * Try IRQ-safe get_user_page_fast_only first.
* If failed, leave phys_addr as 0.
*/
if (current->mm != NULL) {
pagefault_disable();
- if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
+ if (get_user_page_fast_only(virt, 0, &p))
phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
pagefault_enable();
}
diff --git a/mm/gup.c b/mm/gup.c
index 80f51a36..bb59f5c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2278,7 +2278,7 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
* to be special.
*
* For a futex to be placed on a THP tail page, get_futex_key requires a
- * __get_user_pages_fast implementation that can pin pages. Thus it's still
+ * get_user_pages_fast_only implementation that can pin pages. Thus it's still
* useful to have gup_huge_pmd even if we can't operate on ptes.
*/
static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
@@ -2683,7 +2683,7 @@ static inline void gup_pgd_range(unsigned long addr, unsigned long end,
#ifndef gup_fast_permitted
/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * Check if it's allowed to use get_user_pages_fast_only() for the range, or
* we need to fall back to the slow version:
*/
static bool gup_fast_permitted(unsigned long start, unsigned long end)
@@ -2776,8 +2776,14 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
return ret;
}
-
-/*
+/**
+ * get_user_pages_fast_only() - pin user pages in memory
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @gup_flags: flags modifying pin behaviour
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long.
+ *
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP.
* Note a difference with get_user_pages_fast: this always returns the
@@ -2786,8 +2792,8 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
* If the architecture does not support this function, simply return with no
* pages pinned.
*/
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
+int get_user_pages_fast_only(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages)
{
int nr_pinned;
/*
@@ -2797,10 +2803,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
* FOLL_FAST_ONLY is required in order to match the API description of
* this routine: no fall back to regular ("slow") GUP.
*/
- unsigned int gup_flags = FOLL_GET | FOLL_FAST_ONLY;
-
- if (write)
- gup_flags |= FOLL_WRITE;
+ gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
pages);
@@ -2815,7 +2818,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
return nr_pinned;
}
-EXPORT_SYMBOL_GPL(__get_user_pages_fast);
+EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
/**
* get_user_pages_fast() - pin user pages in memory
@@ -2886,8 +2889,8 @@ int pin_user_pages_fast(unsigned long start, int nr_pages,
EXPORT_SYMBOL_GPL(pin_user_pages_fast);
/*
- * This is the FOLL_PIN equivalent of __get_user_pages_fast(). Behavior is the
- * same, except that this one sets FOLL_PIN instead of FOLL_GET.
+ * This is the FOLL_PIN equivalent of get_user_pages_fast_only(). Behavior
+ * is the same, except that this one sets FOLL_PIN instead of FOLL_GET.
*
* The API rules are the same, too: no negative values may be returned.
*/
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc38d63..b62ea62 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1740,7 +1740,6 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
bool *writable, kvm_pfn_t *pfn)
{
struct page *page[1];
- int npages;
/*
* Fast pin a writable pfn only if it is a write fault request
@@ -1750,8 +1749,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
if (!(write_fault || writable))
return false;
- npages = __get_user_pages_fast(addr, 1, 1, page);
- if (npages == 1) {
+ if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
*pfn = page_to_pfn(page[0]);
if (writable)
@@ -1791,7 +1789,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
if (unlikely(!write_fault) && writable) {
struct page *wpage;
- if (__get_user_pages_fast(addr, 1, 1, &wpage) == 1) {
+ if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
*writable = true;
put_page(page);
page = wpage;
@@ -1998,7 +1996,7 @@ int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
if (entry < nr_pages)
return 0;
- return __get_user_pages_fast(addr, nr_pages, 1, pages);
+ return get_user_pages_fast_only(addr, nr_pages, FOLL_WRITE, pages);
}
EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
--
1.9.1
On 2020-05-23 21:27, Souptick Joarder wrote:
> API __get_user_pages_fast() renamed to get_user_pages_fast_only()
> to align with pin_user_pages_fast_only().
>
> As part of this we will get rid of write parameter. Instead caller
> will pass FOLL_WRITE to get_user_pages_fast_only(). This will not
> change any existing functionality of the API.
>
> All the callers are changed to pass FOLL_WRITE.
This looks good. A few nits below, but with those fixed, feel free to
add:
Reviewed-by: John Hubbard <[email protected]>
>
> There are few places where 1 is passed to 2nd parameter of
> __get_user_pages_fast() and return value is checked for 1
> like [1]. Those are replaced with new inline
> get_user_page_fast_only().
>
> [1] if (__get_user_pages_fast(hva, 1, 1, &page) == 1)
>
We try to avoid talking *too* much about the previous version of
the code. Just enough. So, instead of the above two paragraphs,
I'd compress it down to:
Also: introduce get_user_page_fast_only(), and use it in a few
places that hard-code nr_pages to 1.
...
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 93d93bd..8d4597f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1817,10 +1817,16 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
> /*
> * doesn't attempt to fault and will return short.
> */
> -int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> - struct page **pages);
> +int get_user_pages_fast_only(unsigned long start, int nr_pages,
> + unsigned int gup_flags, struct page **pages);
Silly nit:
Can you please leave the original indentation in place? I don't normally
comment about this, but I like the original indentation better, and it matches
the pin_user_pages_fast() below, too.
...
> @@ -2786,8 +2792,8 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
> * If the architecture does not support this function, simply return with no
> * pages pinned.
> */
> -int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> - struct page **pages)
> +int get_user_pages_fast_only(unsigned long start, int nr_pages,
> + unsigned int gup_flags, struct page **pages)
Same thing here: you've changed the original indentation, which was (arguably, but
to my mind anyway) more readable, and for no reason. It still would have fit within
80 cols.
I'm sure it's a perfect 50/50 mix of people who prefer either indentation style, and
so for brand new code, I'll remain silent, as long as it is consistent with either
itself and/or the surrounding code. But changing it back and forth is a bit
aggravating, and best avoided. :)
thanks,
--
John Hubbard
NVIDIA
On Mon, May 25, 2020 at 6:36 AM John Hubbard <[email protected]> wrote:
>
> On 2020-05-23 21:27, Souptick Joarder wrote:
> > API __get_user_pages_fast() renamed to get_user_pages_fast_only()
> > to align with pin_user_pages_fast_only().
> >
> > As part of this we will get rid of write parameter. Instead caller
> > will pass FOLL_WRITE to get_user_pages_fast_only(). This will not
> > change any existing functionality of the API.
> >
> > All the callers are changed to pass FOLL_WRITE.
>
> This looks good. A few nits below, but with those fixed, feel free to
> add:
>
> Reviewed-by: John Hubbard <[email protected]>
>
> >
> > There are few places where 1 is passed to 2nd parameter of
> > __get_user_pages_fast() and return value is checked for 1
> > like [1]. Those are replaced with new inline
> > get_user_page_fast_only().
> >
> > [1] if (__get_user_pages_fast(hva, 1, 1, &page) == 1)
> >
>
> We try to avoid talking *too* much about the previous version of
> the code. Just enough. So, instead of the above two paragraphs,
> I'd compress it down to:
>
> Also: introduce get_user_page_fast_only(), and use it in a few
> places that hard-code nr_pages to 1.
>
> ...
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index 93d93bd..8d4597f 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -1817,10 +1817,16 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
> > /*
> > * doesn't attempt to fault and will return short.
> > */
> > -int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> > - struct page **pages);
> > +int get_user_pages_fast_only(unsigned long start, int nr_pages,
> > + unsigned int gup_flags, struct page **pages);
>
> Silly nit:
>
> Can you please leave the original indentation in place? I don't normally
> comment about this, but I like the original indentation better, and it matches
> the pin_user_pages_fast() below, too.
>
> ...
> > @@ -2786,8 +2792,8 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
> > * If the architecture does not support this function, simply return with no
> > * pages pinned.
> > */
> > -int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> > - struct page **pages)
> > +int get_user_pages_fast_only(unsigned long start, int nr_pages,
> > + unsigned int gup_flags, struct page **pages)
>
>
> Same thing here: you've changed the original indentation, which was (arguably, but
> to my mind anyway) more readable, and for no reason. It still would have fit within
> 80 cols.
>
> I'm sure it's a perfect 50/50 mix of people who prefer either indentation style, and
> so for brand new code, I'll remain silent, as long as it is consistent with either
> itself and/or the surrounding code. But changing it back and forth is a bit
> aggravating, and best avoided. :)
Ok, along with these changes I will remove the *RFC* tag and repost it.