This patch series incorporates a few changes in the get_user_page usage
of sgi-gru.
The main change is the first patch, which is a trivial one line change to
convert put_page to put_user_page to enable tracking of get_user_pages.
The second patch removes an uneccessary ifdef of CONFIG_HUGETLB.
The third patch adds __get_user_pages_fast in atomic_pte_lookup to retrive
a physical user page in an atomic context instead of manually walking up
the page tables like the current code does. This patch should be subject to
more review from the gup people.
drivers/misc/sgi-gru/* builds after this patch series. But I do not have the
hardware to verify these changes.
The first patch implements gup tracking in the current code. This is to be tested
as to check whether gup tracking works properly. Currently, in the upstream kernels
put_user_page simply calls put_page. But that is to change in the future.
Any suggestions as to how to test this code?
The implementation of gup tracking is in:
https://github.com/johnhubbard/linux/tree/gup_dma_core
We could test it by applying the first patch to the above tree and test it.
More details are in the individual changelogs.
Bharath Vedartham (3):
sgi-gru: Convert put_page() to get_user_page*()
sgi-gru: Remove CONFIG_HUGETLB_PAGE ifdef
sgi-gru: Use __get_user_pages_fast in atomic_pte_lookup
drivers/misc/sgi-gru/grufault.c | 50 +++++++----------------------------------
1 file changed, 8 insertions(+), 42 deletions(-)
--
2.7.4
For pages that were retained via get_user_pages*(), release those pages
via the new put_user_page*() routines, instead of via put_page().
This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
("mm: introduce put_user_page*(), placeholder versions").
Cc: Ira Weiny <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Jérôme Glisse <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Dimitri Sivanich <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Bharath Vedartham <[email protected]>
---
drivers/misc/sgi-gru/grufault.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 4b713a8..61b3447 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -188,7 +188,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
return -EFAULT;
*paddr = page_to_phys(page);
- put_page(page);
+ put_user_page(page);
return 0;
}
--
2.7.4
is_vm_hugetlb_page has checks for whether CONFIG_HUGETLB_PAGE is defined
or not. If CONFIG_HUGETLB_PAGE is not defined is_vm_hugetlb_page will
always return false. There is no need to have an uneccessary
CONFIG_HUGETLB_PAGE check in the code.
Cc: Ira Weiny <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Jérôme Glisse <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Dimitri Sivanich <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Bharath Vedartham <[email protected]>
---
drivers/misc/sgi-gru/grufault.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 61b3447..75108d2 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -180,11 +180,8 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
{
struct page *page;
-#ifdef CONFIG_HUGETLB_PAGE
*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
-#else
- *pageshift = PAGE_SHIFT;
-#endif
+
if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
return -EFAULT;
*paddr = page_to_phys(page);
@@ -238,11 +235,9 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
return 1;
*paddr = pte_pfn(pte) << PAGE_SHIFT;
-#ifdef CONFIG_HUGETLB_PAGE
+
*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
-#else
- *pageshift = PAGE_SHIFT;
-#endif
+
return 0;
err:
--
2.7.4
*pte_lookup functions get the physical address for a given virtual
address by getting a physical page using gup and use page_to_phys to get
the physical address.
Currently, atomic_pte_lookup manually walks the page tables. If this
function fails to get a physical page, it will fall back too
non_atomic_pte_lookup to get a physical page which uses the slow gup
path to get the physical page.
Instead of manually walking the page tables use __get_user_pages_fast
which does the same thing and it does not fall back to the slow gup
path.
This is largely inspired from kvm code. kvm uses __get_user_pages_fast
in hva_to_pfn_fast function which can run in an atomic context.
Cc: Ira Weiny <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Jérôme Glisse <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Dimitri Sivanich <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Bharath Vedartham <[email protected]>
---
drivers/misc/sgi-gru/grufault.c | 39 +++++----------------------------------
1 file changed, 5 insertions(+), 34 deletions(-)
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 75108d2..121c9a4 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -202,46 +202,17 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
int write, unsigned long *paddr, int *pageshift)
{
- pgd_t *pgdp;
- p4d_t *p4dp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t pte;
-
- pgdp = pgd_offset(vma->vm_mm, vaddr);
- if (unlikely(pgd_none(*pgdp)))
- goto err;
-
- p4dp = p4d_offset(pgdp, vaddr);
- if (unlikely(p4d_none(*p4dp)))
- goto err;
-
- pudp = pud_offset(p4dp, vaddr);
- if (unlikely(pud_none(*pudp)))
- goto err;
+ struct page *page;
- pmdp = pmd_offset(pudp, vaddr);
- if (unlikely(pmd_none(*pmdp)))
- goto err;
-#ifdef CONFIG_X86_64
- if (unlikely(pmd_large(*pmdp)))
- pte = *(pte_t *) pmdp;
- else
-#endif
- pte = *pte_offset_kernel(pmdp, vaddr);
+ *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
- if (unlikely(!pte_present(pte) ||
- (write && (!pte_write(pte) || !pte_dirty(pte)))))
+ if (!__get_user_pages_fast(vaddr, 1, write, &page))
return 1;
- *paddr = pte_pfn(pte) << PAGE_SHIFT;
-
- *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
+ *paddr = page_to_phys(page);
+ put_user_page(page);
return 0;
-
-err:
- return 1;
}
static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
--
2.7.4
On 7/21/19 8:58 AM, Bharath Vedartham wrote:
> For pages that were retained via get_user_pages*(), release those pages
> via the new put_user_page*() routines, instead of via put_page().
>
> This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
> ("mm: introduce put_user_page*(), placeholder versions").
>
> Cc: Ira Weiny <[email protected]>
> Cc: John Hubbard <[email protected]>
> Cc: Jérôme Glisse <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: Dimitri Sivanich <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Bharath Vedartham <[email protected]>
> ---
> drivers/misc/sgi-gru/grufault.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
Reviewed-by: John Hubbard <[email protected]>
thanks,
--
John Hubbard
NVIDIA
> diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> index 4b713a8..61b3447 100644
> --- a/drivers/misc/sgi-gru/grufault.c
> +++ b/drivers/misc/sgi-gru/grufault.c
> @@ -188,7 +188,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
> if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
> return -EFAULT;
> *paddr = page_to_phys(page);
> - put_page(page);
> + put_user_page(page);
> return 0;
> }
>
>
On 7/21/19 8:58 AM, Bharath Vedartham wrote:
> *pte_lookup functions get the physical address for a given virtual
> address by getting a physical page using gup and use page_to_phys to get
> the physical address.
>
> Currently, atomic_pte_lookup manually walks the page tables. If this
> function fails to get a physical page, it will fall back too
> non_atomic_pte_lookup to get a physical page which uses the slow gup
> path to get the physical page.
>
> Instead of manually walking the page tables use __get_user_pages_fast
> which does the same thing and it does not fall back to the slow gup
> path.
>
> This is largely inspired from kvm code. kvm uses __get_user_pages_fast
> in hva_to_pfn_fast function which can run in an atomic context.
>
> Cc: Ira Weiny <[email protected]>
> Cc: John Hubbard <[email protected]>
> Cc: Jérôme Glisse <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: Dimitri Sivanich <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Bharath Vedartham <[email protected]>
> ---
> drivers/misc/sgi-gru/grufault.c | 39 +++++----------------------------------
> 1 file changed, 5 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> index 75108d2..121c9a4 100644
> --- a/drivers/misc/sgi-gru/grufault.c
> +++ b/drivers/misc/sgi-gru/grufault.c
> @@ -202,46 +202,17 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
> static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
> int write, unsigned long *paddr, int *pageshift)
> {
> - pgd_t *pgdp;
> - p4d_t *p4dp;
> - pud_t *pudp;
> - pmd_t *pmdp;
> - pte_t pte;
> -
> - pgdp = pgd_offset(vma->vm_mm, vaddr);
> - if (unlikely(pgd_none(*pgdp)))
> - goto err;
> -
> - p4dp = p4d_offset(pgdp, vaddr);
> - if (unlikely(p4d_none(*p4dp)))
> - goto err;
> -
> - pudp = pud_offset(p4dp, vaddr);
> - if (unlikely(pud_none(*pudp)))
> - goto err;
> + struct page *page;
>
> - pmdp = pmd_offset(pudp, vaddr);
> - if (unlikely(pmd_none(*pmdp)))
> - goto err;
> -#ifdef CONFIG_X86_64
> - if (unlikely(pmd_large(*pmdp)))
> - pte = *(pte_t *) pmdp;
> - else
> -#endif
> - pte = *pte_offset_kernel(pmdp, vaddr);
> + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
>
> - if (unlikely(!pte_present(pte) ||
> - (write && (!pte_write(pte) || !pte_dirty(pte)))))
> + if (!__get_user_pages_fast(vaddr, 1, write, &page))
> return 1;
Let's please use numeric, not boolean comparison, for the return value of
gup.
Also, optional: as long as you're there, atomic_pte_lookup() ought to
either return a bool (true == success) or an errno, rather than a
numeric zero or one.
Other than that, this looks like a good cleanup, I wonder how many
open-coded gup implementations are floating around like this.
thanks,
--
John Hubbard
NVIDIA
>
> - *paddr = pte_pfn(pte) << PAGE_SHIFT;
> -
> - *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> + *paddr = page_to_phys(page);
> + put_user_page(page);
>
> return 0;
> -
> -err:
> - return 1;
> }
>
> static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
>
On 7/21/19 8:58 AM, Bharath Vedartham wrote:
> is_vm_hugetlb_page has checks for whether CONFIG_HUGETLB_PAGE is defined
> or not. If CONFIG_HUGETLB_PAGE is not defined is_vm_hugetlb_page will
> always return false. There is no need to have an uneccessary
> CONFIG_HUGETLB_PAGE check in the code.
>
> Cc: Ira Weiny <[email protected]>
> Cc: John Hubbard <[email protected]>
> Cc: Jérôme Glisse <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: Dimitri Sivanich <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Bharath Vedartham <[email protected]>
> ---
> drivers/misc/sgi-gru/grufault.c | 11 +++--------
> 1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> index 61b3447..75108d2 100644
> --- a/drivers/misc/sgi-gru/grufault.c
> +++ b/drivers/misc/sgi-gru/grufault.c
> @@ -180,11 +180,8 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
> {
> struct page *page;
>
> -#ifdef CONFIG_HUGETLB_PAGE
> *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> -#else
> - *pageshift = PAGE_SHIFT;
> -#endif
> +
> if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
> return -EFAULT;
> *paddr = page_to_phys(page);
> @@ -238,11 +235,9 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
> return 1;
>
> *paddr = pte_pfn(pte) << PAGE_SHIFT;
> -#ifdef CONFIG_HUGETLB_PAGE
> +
> *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> -#else
> - *pageshift = PAGE_SHIFT;
> -#endif
> +
> return 0;
>
> err:
>
Looks like an accurate cleanup to me.
Reviewed-by: John Hubbard <[email protected]>
thanks,
--
John Hubbard
NVIDIA
I suspect I'm being massively pedantic here, but the comments for atomic_pte_lookup() note:
* Only supports Intel large pages (2MB only) on x86_64.
* ZZZ - hugepage support is incomplete
That makes me wonder how many systems using this hardware are actually configured with CONFIG_HUGETLB_PAGE.
I ask as in the most common case, this is likely introducing a few extra instructions and possibly an additional branch to a routine that is called per-fault.
So the nit-picky questions are:
1) Does the code really need to be cleaned up in this way?
2) If it does, does it make more sense (given the way pmd_large() is handled now in atomic_pte_lookup()) for this to be coded as:
if (unlikely(is_vm_hugetlb_page(vma)))
*pageshift = HPAGE_SHIFT;
else
*pageshift = PAGE_SHIFT;
In all likelihood, these questions are no-ops, and the optimizer may even make my questions completely moot, but I thought I might as well ask anyway.
> On Jul 21, 2019, at 9:58 AM, Bharath Vedartham <[email protected]> wrote:
>
> is_vm_hugetlb_page has checks for whether CONFIG_HUGETLB_PAGE is defined
> or not. If CONFIG_HUGETLB_PAGE is not defined is_vm_hugetlb_page will
> always return false. There is no need to have an uneccessary
> CONFIG_HUGETLB_PAGE check in the code.
>
> Cc: Ira Weiny <[email protected]>
> Cc: John Hubbard <[email protected]>
> Cc: Jérôme Glisse <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>
> Cc: Dimitri Sivanich <[email protected]>
> Cc: Arnd Bergmann <[email protected]>
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Bharath Vedartham <[email protected]>
> ---
> drivers/misc/sgi-gru/grufault.c | 11 +++--------
> 1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> index 61b3447..75108d2 100644
> --- a/drivers/misc/sgi-gru/grufault.c
> +++ b/drivers/misc/sgi-gru/grufault.c
> @@ -180,11 +180,8 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
> {
> struct page *page;
>
> -#ifdef CONFIG_HUGETLB_PAGE
> *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> -#else
> - *pageshift = PAGE_SHIFT;
> -#endif
> +
> if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
> return -EFAULT;
> *paddr = page_to_phys(page);
> @@ -238,11 +235,9 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
> return 1;
>
> *paddr = pte_pfn(pte) << PAGE_SHIFT;
> -#ifdef CONFIG_HUGETLB_PAGE
> +
> *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> -#else
> - *pageshift = PAGE_SHIFT;
> -#endif
> +
> return 0;
>
> err:
> --
> 2.7.4
>
On Sun, Jul 21, 2019 at 09:28:02PM +0530, Bharath Vedartham wrote:
> This patch series incorporates a few changes in the get_user_page usage
> of sgi-gru.
>
> The main change is the first patch, which is a trivial one line change to
> convert put_page to put_user_page to enable tracking of get_user_pages.
>
> The second patch removes an uneccessary ifdef of CONFIG_HUGETLB.
>
> The third patch adds __get_user_pages_fast in atomic_pte_lookup to retrive
> a physical user page in an atomic context instead of manually walking up
> the page tables like the current code does. This patch should be subject to
> more review from the gup people.
>
> drivers/misc/sgi-gru/* builds after this patch series. But I do not have the
> hardware to verify these changes.
>
> The first patch implements gup tracking in the current code. This is to be tested
> as to check whether gup tracking works properly. Currently, in the upstream kernels
> put_user_page simply calls put_page. But that is to change in the future.
> Any suggestions as to how to test this code?
>
> The implementation of gup tracking is in:
> https://github.com/johnhubbard/linux/tree/gup_dma_core
>
> We could test it by applying the first patch to the above tree and test it.
>
> More details are in the individual changelogs.
>
> Bharath Vedartham (3):
I don't have an opinion on the second patch regarding any performance concerns
since I'm not familiar with this hardware.
But from a GUP POV For the series.
Reviewed-by: Ira Weiny <[email protected]>
> sgi-gru: Convert put_page() to get_user_page*()
> sgi-gru: Remove CONFIG_HUGETLB_PAGE ifdef
> sgi-gru: Use __get_user_pages_fast in atomic_pte_lookup
>
> drivers/misc/sgi-gru/grufault.c | 50 +++++++----------------------------------
> 1 file changed, 8 insertions(+), 42 deletions(-)
>
> --
> 2.7.4
>
On Sun, Jul 21, 2019 at 07:25:31PM -0700, John Hubbard wrote:
> On 7/21/19 8:58 AM, Bharath Vedartham wrote:
> > For pages that were retained via get_user_pages*(), release those pages
> > via the new put_user_page*() routines, instead of via put_page().
> >
> > This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
> > ("mm: introduce put_user_page*(), placeholder versions").
> >
> > Cc: Ira Weiny <[email protected]>
> > Cc: John Hubbard <[email protected]>
> > Cc: J?r?me Glisse <[email protected]>
> > Cc: Greg Kroah-Hartman <[email protected]>
> > Cc: Dimitri Sivanich <[email protected]>
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Bharath Vedartham <[email protected]>
> > ---
> > drivers/misc/sgi-gru/grufault.c | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
>
> Reviewed-by: John Hubbard <[email protected]>
Thanks!
> thanks,
> --
> John Hubbard
> NVIDIA
>
> > diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> > index 4b713a8..61b3447 100644
> > --- a/drivers/misc/sgi-gru/grufault.c
> > +++ b/drivers/misc/sgi-gru/grufault.c
> > @@ -188,7 +188,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
> > if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
> > return -EFAULT;
> > *paddr = page_to_phys(page);
> > - put_page(page);
> > + put_user_page(page);
> > return 0;
> > }
> >
> >
On Sun, Jul 21, 2019 at 09:20:38PM -0600, William Kucharski wrote:
> I suspect I'm being massively pedantic here, but the comments for atomic_pte_lookup() note:
>
> * Only supports Intel large pages (2MB only) on x86_64.
> * ZZZ - hugepage support is incomplete
>
> That makes me wonder how many systems using this hardware are actually configured with CONFIG_HUGETLB_PAGE.
>
> I ask as in the most common case, this is likely introducing a few extra instructions and possibly an additional branch to a routine that is called per-fault.
>
> So the nit-picky questions are:
>
> 1) Does the code really need to be cleaned up in this way?
>
> 2) If it does, does it make more sense (given the way pmd_large() is handled now in atomic_pte_lookup()) for this to be coded as:
>
> if (unlikely(is_vm_hugetlb_page(vma)))
> *pageshift = HPAGE_SHIFT;
> else
> *pageshift = PAGE_SHIFT;
>
> In all likelihood, these questions are no-ops, and the optimizer may even make my questions completely moot, but I thought I might as well ask anyway.
>
That sounds reasonable. I am not really sure as to how much of
an improvement it would be, the condition will be evaluated eitherways
AFAIK? Eitherways, the ternary operator does not look good. I ll make a
version 2 of this.
> > On Jul 21, 2019, at 9:58 AM, Bharath Vedartham <[email protected]> wrote:
> >
> > is_vm_hugetlb_page has checks for whether CONFIG_HUGETLB_PAGE is defined
> > or not. If CONFIG_HUGETLB_PAGE is not defined is_vm_hugetlb_page will
> > always return false. There is no need to have an uneccessary
> > CONFIG_HUGETLB_PAGE check in the code.
> >
> > Cc: Ira Weiny <[email protected]>
> > Cc: John Hubbard <[email protected]>
> > Cc: J?r?me Glisse <[email protected]>
> > Cc: Greg Kroah-Hartman <[email protected]>
> > Cc: Dimitri Sivanich <[email protected]>
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Bharath Vedartham <[email protected]>
> > ---
> > drivers/misc/sgi-gru/grufault.c | 11 +++--------
> > 1 file changed, 3 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> > index 61b3447..75108d2 100644
> > --- a/drivers/misc/sgi-gru/grufault.c
> > +++ b/drivers/misc/sgi-gru/grufault.c
> > @@ -180,11 +180,8 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
> > {
> > struct page *page;
> >
> > -#ifdef CONFIG_HUGETLB_PAGE
> > *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> > -#else
> > - *pageshift = PAGE_SHIFT;
> > -#endif
> > +
> > if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
> > return -EFAULT;
> > *paddr = page_to_phys(page);
> > @@ -238,11 +235,9 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
> > return 1;
> >
> > *paddr = pte_pfn(pte) << PAGE_SHIFT;
> > -#ifdef CONFIG_HUGETLB_PAGE
> > +
> > *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> > -#else
> > - *pageshift = PAGE_SHIFT;
> > -#endif
> > +
> > return 0;
> >
> > err:
> > --
> > 2.7.4
> >
>
On Sun, Jul 21, 2019 at 07:32:36PM -0700, John Hubbard wrote:
> On 7/21/19 8:58 AM, Bharath Vedartham wrote:
> > *pte_lookup functions get the physical address for a given virtual
> > address by getting a physical page using gup and use page_to_phys to get
> > the physical address.
> >
> > Currently, atomic_pte_lookup manually walks the page tables. If this
> > function fails to get a physical page, it will fall back too
> > non_atomic_pte_lookup to get a physical page which uses the slow gup
> > path to get the physical page.
> >
> > Instead of manually walking the page tables use __get_user_pages_fast
> > which does the same thing and it does not fall back to the slow gup
> > path.
> >
> > This is largely inspired from kvm code. kvm uses __get_user_pages_fast
> > in hva_to_pfn_fast function which can run in an atomic context.
> >
> > Cc: Ira Weiny <[email protected]>
> > Cc: John Hubbard <[email protected]>
> > Cc: J?r?me Glisse <[email protected]>
> > Cc: Greg Kroah-Hartman <[email protected]>
> > Cc: Dimitri Sivanich <[email protected]>
> > Cc: Arnd Bergmann <[email protected]>
> > Cc: [email protected]
> > Cc: [email protected]
> > Signed-off-by: Bharath Vedartham <[email protected]>
> > ---
> > drivers/misc/sgi-gru/grufault.c | 39 +++++----------------------------------
> > 1 file changed, 5 insertions(+), 34 deletions(-)
> >
> > diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> > index 75108d2..121c9a4 100644
> > --- a/drivers/misc/sgi-gru/grufault.c
> > +++ b/drivers/misc/sgi-gru/grufault.c
> > @@ -202,46 +202,17 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
> > static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
> > int write, unsigned long *paddr, int *pageshift)
> > {
> > - pgd_t *pgdp;
> > - p4d_t *p4dp;
> > - pud_t *pudp;
> > - pmd_t *pmdp;
> > - pte_t pte;
> > -
> > - pgdp = pgd_offset(vma->vm_mm, vaddr);
> > - if (unlikely(pgd_none(*pgdp)))
> > - goto err;
> > -
> > - p4dp = p4d_offset(pgdp, vaddr);
> > - if (unlikely(p4d_none(*p4dp)))
> > - goto err;
> > -
> > - pudp = pud_offset(p4dp, vaddr);
> > - if (unlikely(pud_none(*pudp)))
> > - goto err;
> > + struct page *page;
> >
> > - pmdp = pmd_offset(pudp, vaddr);
> > - if (unlikely(pmd_none(*pmdp)))
> > - goto err;
> > -#ifdef CONFIG_X86_64
> > - if (unlikely(pmd_large(*pmdp)))
> > - pte = *(pte_t *) pmdp;
> > - else
> > -#endif
> > - pte = *pte_offset_kernel(pmdp, vaddr);
> > + *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> >
> > - if (unlikely(!pte_present(pte) ||
> > - (write && (!pte_write(pte) || !pte_dirty(pte)))))
> > + if (!__get_user_pages_fast(vaddr, 1, write, &page))
> > return 1;
>
> Let's please use numeric, not boolean comparison, for the return value of
> gup.
Alright then! I ll resubmit it!
> Also, optional: as long as you're there, atomic_pte_lookup() ought to
> either return a bool (true == success) or an errno, rather than a
> numeric zero or one.
That makes sense. But the code which uses atomic_pte_lookup uses the
return value of 1 for success and failure value of 0 in gru_vtop. That's
why I did not mess with the return values in this code. It would require
some change in the driver functionality which I am not ready to do :(
> Other than that, this looks like a good cleanup, I wonder how many
> open-coded gup implementations are floating around like this.
I ll be on the lookout!
> thanks,
> --
> John Hubbard
> NVIDIA
>
> >
> > - *paddr = pte_pfn(pte) << PAGE_SHIFT;
> > -
> > - *pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
> > + *paddr = page_to_phys(page);
> > + put_user_page(page);
> >
> > return 0;
> > -
> > -err:
> > - return 1;
> > }
> >
> > static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
> >
> On Jul 22, 2019, at 11:50 AM, Bharath Vedartham <[email protected]> wrote:
>
>>
>>
>> In all likelihood, these questions are no-ops, and the optimizer may even make my questions completely moot, but I thought I might as well ask anyway.
>>
> That sounds reasonable. I am not really sure as to how much of
> an improvement it would be, the condition will be evaluated eitherways
> AFAIK? Eitherways, the ternary operator does not look good. I ll make a
> version 2 of this.
In THEORY the "unlikely" hints to the compiler that that leg of the "if" can be made the branch and jump leg, though in reality optimization is much more complex than that.
Still, the unlikely() call is also nicely self-documenting as to what the expected outcome is.
Reviewed-by: William Kucharski <[email protected]>
On 7/22/19 10:53 AM, Bharath Vedartham wrote:
> On Sun, Jul 21, 2019 at 07:32:36PM -0700, John Hubbard wrote:
>> On 7/21/19 8:58 AM, Bharath Vedartham wrote:
...
>> Also, optional: as long as you're there, atomic_pte_lookup() ought to
>> either return a bool (true == success) or an errno, rather than a
>> numeric zero or one.
> That makes sense. But the code which uses atomic_pte_lookup uses the
> return value of 1 for success and failure value of 0 in gru_vtop. That's
> why I did not mess with the return values in this code. It would require
> some change in the driver functionality which I am not ready to do :(
It's a static function with only one caller. You could just merge in
something like this, on top of what you have:
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index 121c9a4ccb94..2f768fc06432 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -189,10 +189,11 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
return 0;
}
-/*
- * atomic_pte_lookup
+/**
+ * atomic_pte_lookup() - Convert a user virtual address to a physical address
+ * @Return: true for success, false for failure. Failure means that the page
+ * could not be pinned via gup fast.
*
- * Convert a user virtual address to a physical address
* Only supports Intel large pages (2MB only) on x86_64.
* ZZZ - hugepage support is incomplete
*
@@ -207,12 +208,12 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
*pageshift = is_vm_hugetlb_page(vma) ? HPAGE_SHIFT : PAGE_SHIFT;
if (!__get_user_pages_fast(vaddr, 1, write, &page))
- return 1;
+ return false;
*paddr = page_to_phys(page);
put_user_page(page);
- return 0;
+ return true;
}
static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
@@ -221,7 +222,8 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
struct mm_struct *mm = gts->ts_mm;
struct vm_area_struct *vma;
unsigned long paddr;
- int ret, ps;
+ int ps;
+ bool success;
vma = find_vma(mm, vaddr);
if (!vma)
@@ -232,8 +234,8 @@ static int gru_vtop(struct gru_thread_state *gts, unsigned long vaddr,
* context.
*/
rmb(); /* Must/check ms_range_active before loading PTEs */
- ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
- if (ret) {
+ success = atomic_pte_lookup(vma, vaddr, write, &paddr, &ps);
+ if (!success) {
if (atomic)
goto upm;
if (non_atomic_pte_lookup(vma, vaddr, write, &paddr, &ps))
thanks,
--
John Hubbard
NVIDIA