Add a generic interface to follow pfn in a pfnmap vma range. This is used by
one of the subsequent x86 PAT related patch to keep track of memory types
for vma regions across vma copy and free.
Signed-off-by: Venkatesh Pallipadi <[email protected]>
Signed-off-by: Suresh Siddha <[email protected]>
---
include/linux/mm.h | 3 +++
mm/memory.c | 43 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)
Index: tip/include/linux/mm.h
===================================================================
--- tip.orig/include/linux/mm.h 2008-11-06 15:41:43.000000000 -0800
+++ tip/include/linux/mm.h 2008-11-10 09:44:45.000000000 -0800
@@ -1223,6 +1223,9 @@ struct page *follow_page(struct vm_area_
#define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
+unsigned long follow_pfnmap_pte(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ret_ptep);
+
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
Index: tip/mm/memory.c
===================================================================
--- tip.orig/mm/memory.c 2008-11-06 15:41:43.000000000 -0800
+++ tip/mm/memory.c 2008-11-10 09:44:45.000000000 -0800
@@ -1111,6 +1111,49 @@ no_page_table:
return page;
}
+unsigned long follow_pfnmap_pte(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ret_ptep)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep, pte;
+ spinlock_t *ptl;
+ struct page *page;
+ struct mm_struct *mm = vma->vm_mm;
+
+ if (!is_pfn_mapping(vma))
+ goto err;
+
+ page = NULL;
+ pgd = pgd_offset(mm, address);
+ if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+ goto err;
+
+ pud = pud_offset(pgd, address);
+ if (pud_none(*pud) || unlikely(pud_bad(*pud)))
+ goto err;
+
+ pmd = pmd_offset(pud, address);
+ if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ goto err;
+
+ ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+
+ pte = *ptep;
+ if (!pte_present(pte))
+ goto err_unlock;
+
+ *ret_ptep = pte;
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+
+err_unlock:
+ pte_unmap_unlock(ptep, ptl);
+err:
+ return -1;
+}
+
/* Can we do the FOLL_ANON optimization? */
static inline int use_zero_page(struct vm_area_struct *vma)
{
--
On Wed, Nov 12, 2008 at 01:26:50PM -0800, Venkatesh Pallipadi wrote:
>
> Add a generic interface to follow pfn in a pfnmap vma range. This is used by
> one of the subsequent x86 PAT related patch to keep track of memory types
> for vma regions across vma copy and free.
>
> Signed-off-by: Venkatesh Pallipadi <[email protected]>
> Signed-off-by: Suresh Siddha <[email protected]>
>
> ---
> include/linux/mm.h | 3 +++
> mm/memory.c | 43 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 46 insertions(+)
>
> Index: tip/include/linux/mm.h
> ===================================================================
> --- tip.orig/include/linux/mm.h 2008-11-06 15:41:43.000000000 -0800
> +++ tip/include/linux/mm.h 2008-11-10 09:44:45.000000000 -0800
> @@ -1223,6 +1223,9 @@ struct page *follow_page(struct vm_area_
> #define FOLL_GET 0x04 /* do get_page on page */
> #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
>
> +unsigned long follow_pfnmap_pte(struct vm_area_struct *vma,
> + unsigned long address, pte_t *ret_ptep);
> +
> typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
> void *data);
> extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
> Index: tip/mm/memory.c
> ===================================================================
> --- tip.orig/mm/memory.c 2008-11-06 15:41:43.000000000 -0800
> +++ tip/mm/memory.c 2008-11-10 09:44:45.000000000 -0800
> @@ -1111,6 +1111,49 @@ no_page_table:
> return page;
> }
>
> +unsigned long follow_pfnmap_pte(struct vm_area_struct *vma,
> + unsigned long address, pte_t *ret_ptep)
> +{
I think we'd typically return an int for functions like this. It
probably wouldn't hurt to return more descriptive errors either.
-EINVAL, perhaps, seems to fit best with what existing code there
is doing.
> + pgd_t *pgd;
> + pud_t *pud;
> + pmd_t *pmd;
> + pte_t *ptep, pte;
> + spinlock_t *ptl;
> + struct page *page;
> + struct mm_struct *mm = vma->vm_mm;
> +
> + if (!is_pfn_mapping(vma))
> + goto err;
> +
> + page = NULL;
> + pgd = pgd_offset(mm, address);
> + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
> + goto err;
> +
> + pud = pud_offset(pgd, address);
> + if (pud_none(*pud) || unlikely(pud_bad(*pud)))
> + goto err;
> +
> + pmd = pmd_offset(pud, address);
> + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
> + goto err;
> +
> + ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
> +
> + pte = *ptep;
> + if (!pte_present(pte))
> + goto err_unlock;
> +
> + *ret_ptep = pte;
> + pte_unmap_unlock(ptep, ptl);
> + return 0;
> +
> +err_unlock:
> + pte_unmap_unlock(ptep, ptl);
> +err:
> + return -1;
> +}
> +
> /* Can we do the FOLL_ANON optimization? */
> static inline int use_zero_page(struct vm_area_struct *vma)
> {
>
> --
>-----Original Message-----
>From: Nick Piggin [mailto:[email protected]]
>Sent: Wednesday, November 12, 2008 3:27 PM
>To: Pallipadi, Venkatesh
>Cc: Ingo Molnar; Thomas Gleixner; H.Peter Anvin; Hugh Dickins;
>Roland Dreier; Jesse Barnes; Jeremy Fitzhardinge; Arjan van de
>Ven; [email protected]; Siddha, Suresh B
>Subject: Re: [patch 3/8] x86 PAT: Add follow_pfnmp_pte routine
>to help tracking pfnmap pages
>
>On Wed, Nov 12, 2008 at 01:26:50PM -0800, Venkatesh Pallipadi wrote:
>>
>> Add a generic interface to follow pfn in a pfnmap vma range.
>This is used by
>> one of the subsequent x86 PAT related patch to keep track of
>memory types
>> for vma regions across vma copy and free.
>>
>> Signed-off-by: Venkatesh Pallipadi <[email protected]>
>> Signed-off-by: Suresh Siddha <[email protected]>
>>
>> ---
>> include/linux/mm.h | 3 +++
>> mm/memory.c | 43
>+++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 46 insertions(+)
>>
>> Index: tip/include/linux/mm.h
>> ===================================================================
>> --- tip.orig/include/linux/mm.h 2008-11-06
>15:41:43.000000000 -0800
>> +++ tip/include/linux/mm.h 2008-11-10 09:44:45.000000000 -0800
>> @@ -1223,6 +1223,9 @@ struct page *follow_page(struct vm_area_
>> #define FOLL_GET 0x04 /* do get_page on page */
>> #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
>>
>> +unsigned long follow_pfnmap_pte(struct vm_area_struct *vma,
>> + unsigned long address, pte_t *ret_ptep);
>> +
>> typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token,
>unsigned long addr,
>> void *data);
>> extern int apply_to_page_range(struct mm_struct *mm,
>unsigned long address,
>> Index: tip/mm/memory.c
>> ===================================================================
>> --- tip.orig/mm/memory.c 2008-11-06 15:41:43.000000000 -0800
>> +++ tip/mm/memory.c 2008-11-10 09:44:45.000000000 -0800
>> @@ -1111,6 +1111,49 @@ no_page_table:
>> return page;
>> }
>>
>> +unsigned long follow_pfnmap_pte(struct vm_area_struct *vma,
>> + unsigned long address, pte_t *ret_ptep)
>> +{
>
>I think we'd typically return an int for functions like this. It
>probably wouldn't hurt to return more descriptive errors either.
>-EINVAL, perhaps, seems to fit best with what existing code there
>is doing.
>
Yes. Will change the return type to int with more appropriate
return values and update the patch.
Thanks,
Venki-