Starting with commit c6f3c5ee40c1 "mm/huge_memory.c: fix modifying of
page protection by insert_pfn_pmd()" vmf_insert_pfn_pmd() internally
calls pmdp_set_access_flags(). That helper enforces a pmd aligned
@address argument via VM_BUG_ON() assertion.
Update the implementation to take a 'struct vm_fault' argument directly
and apply the address alignment fixup internally to fix crash signatures
like:
kernel BUG at arch/x86/mm/pgtable.c:515!
invalid opcode: 0000 [#1] SMP NOPTI
CPU: 51 PID: 43713 Comm: java Tainted: G OE 4.19.35 #1
[..]
RIP: 0010:pmdp_set_access_flags+0x48/0x50
[..]
Call Trace:
vmf_insert_pfn_pmd+0x198/0x350
dax_iomap_fault+0xe82/0x1190
ext4_dax_huge_fault+0x103/0x1f0
? __switch_to_asm+0x40/0x70
__handle_mm_fault+0x3f6/0x1370
? __switch_to_asm+0x34/0x70
? __switch_to_asm+0x40/0x70
handle_mm_fault+0xda/0x200
__do_page_fault+0x249/0x4f0
do_page_fault+0x32/0x110
? page_fault+0x8/0x30
page_fault+0x1e/0x30
Cc: <[email protected]>
Fixes: c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd()")
Reported-by: Piotr Balcer <[email protected]>
Tested-by: Yan Ma <[email protected]>
Cc: Aneesh Kumar K.V <[email protected]>
Cc: Chandan Rajendra <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Matthew Wilcox <[email protected]>
Cc: Souptick Joarder <[email protected]>
Signed-off-by: Dan Williams <[email protected]>
---
drivers/dax/device.c | 6 ++----
fs/dax.c | 6 ++----
include/linux/huge_mm.h | 6 ++----
mm/huge_memory.c | 16 ++++++++++------
4 files changed, 16 insertions(+), 18 deletions(-)
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index e428468ab661..996d68ff992a 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -184,8 +184,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
- return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn,
- vmf->flags & FAULT_FLAG_WRITE);
+ return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
}
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
@@ -235,8 +234,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
- return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn,
- vmf->flags & FAULT_FLAG_WRITE);
+ return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
}
#else
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
diff --git a/fs/dax.c b/fs/dax.c
index e5e54da1715f..83009875308c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
}
trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
- result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
- write);
+ result = vmf_insert_pfn_pmd(vmf, pfn, write);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
@@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
#ifdef CONFIG_FS_DAX_PMD
else if (order == PMD_ORDER)
- ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
- pfn, true);
+ ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
#endif
else
ret = VM_FAULT_FALLBACK;
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 381e872bfde0..7cd5c150c21d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -47,10 +47,8 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot,
int prot_numa);
-vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, pfn_t pfn, bool write);
-vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
+vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_FLAG,
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 165ea46bf149..4310c6e9e5a3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -793,11 +793,13 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
pte_free(mm, pgtable);
}
-vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, pfn_t pfn, bool write)
+vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
{
+ unsigned long addr = vmf->address & PMD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
pgprot_t pgprot = vma->vm_page_prot;
pgtable_t pgtable = NULL;
+
/*
* If we had pmd_special, we could avoid all these restrictions,
* but we need to be consistent with PTEs and architectures that
@@ -820,7 +822,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
track_pfn_insert(vma, &pgprot, pfn);
- insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable);
+ insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
@@ -869,10 +871,12 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
spin_unlock(ptl);
}
-vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
- pud_t *pud, pfn_t pfn, bool write)
+vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
{
+ unsigned long addr = vmf->address & PUD_MASK;
+ struct vm_area_struct *vma = vmf->vma;
pgprot_t pgprot = vma->vm_page_prot;
+
/*
* If we had pud_special, we could avoid all these restrictions,
* but we need to be consistent with PTEs and architectures that
@@ -889,7 +893,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
track_pfn_insert(vma, &pgprot, pfn);
- insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
+ insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
On Thu, May 09, 2019 at 09:31:41AM -0700, Dan Williams wrote:
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index e428468ab661..996d68ff992a 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -184,8 +184,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
> + return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
I think we can ditch the third parameter too. Going through the callers ...
> @@ -235,8 +234,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
> + return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
> @@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> + result = vmf_insert_pfn_pmd(vmf, pfn, write);
This 'write' parameter came earlier from:
bool write = vmf->flags & FAULT_FLAG_WRITE;
and it is not modified subsequently.
> @@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
> + ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
If FAULT_FLAG_WRITE is not set in a mkwrite handler, I don't know
what's gone wrong with the world.
Even without these changes,
Reviewed-by: Matthew Wilcox <[email protected]>
On Thu 09-05-19 09:31:41, Dan Williams wrote:
> Starting with commit c6f3c5ee40c1 "mm/huge_memory.c: fix modifying of
> page protection by insert_pfn_pmd()" vmf_insert_pfn_pmd() internally
> calls pmdp_set_access_flags(). That helper enforces a pmd aligned
> @address argument via VM_BUG_ON() assertion.
>
> Update the implementation to take a 'struct vm_fault' argument directly
> and apply the address alignment fixup internally to fix crash signatures
> like:
>
> kernel BUG at arch/x86/mm/pgtable.c:515!
> invalid opcode: 0000 [#1] SMP NOPTI
> CPU: 51 PID: 43713 Comm: java Tainted: G OE 4.19.35 #1
> [..]
> RIP: 0010:pmdp_set_access_flags+0x48/0x50
> [..]
> Call Trace:
> vmf_insert_pfn_pmd+0x198/0x350
> dax_iomap_fault+0xe82/0x1190
> ext4_dax_huge_fault+0x103/0x1f0
> ? __switch_to_asm+0x40/0x70
> __handle_mm_fault+0x3f6/0x1370
> ? __switch_to_asm+0x34/0x70
> ? __switch_to_asm+0x40/0x70
> handle_mm_fault+0xda/0x200
> __do_page_fault+0x249/0x4f0
> do_page_fault+0x32/0x110
> ? page_fault+0x8/0x30
> page_fault+0x1e/0x30
>
> Cc: <[email protected]>
> Fixes: c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd()")
> Reported-by: Piotr Balcer <[email protected]>
> Tested-by: Yan Ma <[email protected]>
> Cc: Aneesh Kumar K.V <[email protected]>
> Cc: Chandan Rajendra <[email protected]>
> Cc: Jan Kara <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Matthew Wilcox <[email protected]>
> Cc: Souptick Joarder <[email protected]>
> Signed-off-by: Dan Williams <[email protected]>
Looks good to me. You can add:
Reviewed-by: Jan Kara <[email protected]>
Honza
> ---
>
> drivers/dax/device.c | 6 ++----
> fs/dax.c | 6 ++----
> include/linux/huge_mm.h | 6 ++----
> mm/huge_memory.c | 16 ++++++++++------
> 4 files changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index e428468ab661..996d68ff992a 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -184,8 +184,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
>
> *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn,
> - vmf->flags & FAULT_FLAG_WRITE);
> + return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
> }
>
> #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
> @@ -235,8 +234,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
>
> *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn,
> - vmf->flags & FAULT_FLAG_WRITE);
> + return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
> }
> #else
> static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
> diff --git a/fs/dax.c b/fs/dax.c
> index e5e54da1715f..83009875308c 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> }
>
> trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
> - result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
> - write);
> + result = vmf_insert_pfn_pmd(vmf, pfn, write);
> break;
> case IOMAP_UNWRITTEN:
> case IOMAP_HOLE:
> @@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
> ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
> #ifdef CONFIG_FS_DAX_PMD
> else if (order == PMD_ORDER)
> - ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
> - pfn, true);
> + ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
> #endif
> else
> ret = VM_FAULT_FALLBACK;
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 381e872bfde0..7cd5c150c21d 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -47,10 +47,8 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
> unsigned long addr, pgprot_t newprot,
> int prot_numa);
> -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, pfn_t pfn, bool write);
> -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
> - pud_t *pud, pfn_t pfn, bool write);
> +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
> +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
> enum transparent_hugepage_flag {
> TRANSPARENT_HUGEPAGE_FLAG,
> TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 165ea46bf149..4310c6e9e5a3 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -793,11 +793,13 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
> pte_free(mm, pgtable);
> }
>
> -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, pfn_t pfn, bool write)
> +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
> {
> + unsigned long addr = vmf->address & PMD_MASK;
> + struct vm_area_struct *vma = vmf->vma;
> pgprot_t pgprot = vma->vm_page_prot;
> pgtable_t pgtable = NULL;
> +
> /*
> * If we had pmd_special, we could avoid all these restrictions,
> * but we need to be consistent with PTEs and architectures that
> @@ -820,7 +822,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>
> track_pfn_insert(vma, &pgprot, pfn);
>
> - insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable);
> + insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
> return VM_FAULT_NOPAGE;
> }
> EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
> @@ -869,10 +871,12 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
> spin_unlock(ptl);
> }
>
> -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
> - pud_t *pud, pfn_t pfn, bool write)
> +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
> {
> + unsigned long addr = vmf->address & PUD_MASK;
> + struct vm_area_struct *vma = vmf->vma;
> pgprot_t pgprot = vma->vm_page_prot;
> +
> /*
> * If we had pud_special, we could avoid all these restrictions,
> * but we need to be consistent with PTEs and architectures that
> @@ -889,7 +893,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
>
> track_pfn_insert(vma, &pgprot, pfn);
>
> - insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
> + insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write);
> return VM_FAULT_NOPAGE;
> }
> EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
>
--
Jan Kara <[email protected]>
SUSE Labs, CR
>
> Starting with commit c6f3c5ee40c1 "mm/huge_memory.c: fix modifying of
> page protection by insert_pfn_pmd()" vmf_insert_pfn_pmd() internally
> calls pmdp_set_access_flags(). That helper enforces a pmd aligned
> @address argument via VM_BUG_ON() assertion.
>
> Update the implementation to take a 'struct vm_fault' argument directly
> and apply the address alignment fixup internally to fix crash signatures
> like:
>
> kernel BUG at arch/x86/mm/pgtable.c:515!
> invalid opcode: 0000 [#1] SMP NOPTI
> CPU: 51 PID: 43713 Comm: java Tainted: G OE 4.19.35 #1
> [..]
> RIP: 0010:pmdp_set_access_flags+0x48/0x50
> [..]
> Call Trace:
> vmf_insert_pfn_pmd+0x198/0x350
> dax_iomap_fault+0xe82/0x1190
> ext4_dax_huge_fault+0x103/0x1f0
> ? __switch_to_asm+0x40/0x70
> __handle_mm_fault+0x3f6/0x1370
> ? __switch_to_asm+0x34/0x70
> ? __switch_to_asm+0x40/0x70
> handle_mm_fault+0xda/0x200
> __do_page_fault+0x249/0x4f0
> do_page_fault+0x32/0x110
> ? page_fault+0x8/0x30
> page_fault+0x1e/0x30
>
> Cc: vger.kernel.org>
> Fixes: c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page protection by
> insert_pfn_pmd()")
> Reported-by: Piotr Balcer <[email protected]>
> Tested-by: Yan Ma <[email protected]>
> Cc: Aneesh Kumar K.V <[email protected]>
> Cc: Chandan Rajendra <[email protected]>
> Cc: Jan Kara suse.cz>
> Cc: Andrew Morton <[email protected]>
> Cc: Matthew Wilcox infradead.org>
> Cc: Souptick Joarder <[email protected]>
> Signed-off-by: Dan Williams <[email protected]>
> ---
>
> drivers/dax/device.c | 6 ++----
> fs/dax.c | 6 ++----
> include/linux/huge_mm.h | 6 ++----
> mm/huge_memory.c | 16 ++++++++++------
> 4 files changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index e428468ab661..996d68ff992a 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -184,8 +184,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax
> *dev_dax,
>
> *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn,
> - vmf->flags & FAULT_FLAG_WRITE);
> + return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
> }
>
> #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
> @@ -235,8 +234,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax
> *dev_dax,
>
> *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn,
> - vmf->flags & FAULT_FLAG_WRITE);
> + return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
> }
> #else
> static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
> diff --git a/fs/dax.c b/fs/dax.c
> index e5e54da1715f..83009875308c 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault
> *vmf, pfn_t *pfnp,
> }
>
> trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
> - result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
> - write);
> + result = vmf_insert_pfn_pmd(vmf, pfn, write);
> break;
> case IOMAP_UNWRITTEN:
> case IOMAP_HOLE:
> @@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn,
> unsigned int order)
> ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
> #ifdef CONFIG_FS_DAX_PMD
> else if (order == PMD_ORDER)
> - ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
> - pfn, true);
> + ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
> #endif
> else
> ret = VM_FAULT_FALLBACK;
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 381e872bfde0..7cd5c150c21d 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -47,10 +47,8 @@ extern bool move_huge_pmd(struct vm_area_struct *vma,
> unsigned long old_addr,
> extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
> unsigned long addr, pgprot_t newprot,
> int prot_numa);
> -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long
> addr,
> - pmd_t *pmd, pfn_t pfn, bool write);
> -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long
> addr,
> - pud_t *pud, pfn_t pfn, bool write);
> +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
> +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
> enum transparent_hugepage_flag {
> TRANSPARENT_HUGEPAGE_FLAG,
> TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 165ea46bf149..4310c6e9e5a3 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -793,11 +793,13 @@ static void insert_pfn_pmd(struct vm_area_struct *vma,
> unsigned long addr,
> pte_free(mm, pgtable);
> }
>
> -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long
> addr,
> - pmd_t *pmd, pfn_t pfn, bool write)
> +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
> {
> + unsigned long addr = vmf->address & PMD_MASK;
> + struct vm_area_struct *vma = vmf->vma;
> pgprot_t pgprot = vma->vm_page_prot;
> pgtable_t pgtable = NULL;
> +
> /*
> * If we had pmd_special, we could avoid all these restrictions,
> * but we need to be consistent with PTEs and architectures that
> @@ -820,7 +822,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma,
> unsigned long addr,
>
> track_pfn_insert(vma, &pgprot, pfn);
>
> - insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable);
> + insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
> return VM_FAULT_NOPAGE;
> }
> EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
> @@ -869,10 +871,12 @@ static void insert_pfn_pud(struct vm_area_struct *vma,
> unsigned long addr,
> spin_unlock(ptl);
> }
>
> -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long
> addr,
> - pud_t *pud, pfn_t pfn, bool write)
> +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
> {
> + unsigned long addr = vmf->address & PUD_MASK;
> + struct vm_area_struct *vma = vmf->vma;
> pgprot_t pgprot = vma->vm_page_prot;
> +
> /*
> * If we had pud_special, we could avoid all these restrictions,
> * but we need to be consistent with PTEs and architectures that
> @@ -889,7 +893,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma,
> unsigned long addr,
>
> track_pfn_insert(vma, &pgprot, pfn);
>
> - insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
> + insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write);
> return VM_FAULT_NOPAGE;
> }
> EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
>
Thanks for the patch.
This patch solves the issue faced while testing virtio pmem.
Tested-by: Pankaj Gupta <[email protected]>
>
Dan Williams <[email protected]> writes:
> Starting with commit c6f3c5ee40c1 "mm/huge_memory.c: fix modifying of
> page protection by insert_pfn_pmd()" vmf_insert_pfn_pmd() internally
> calls pmdp_set_access_flags(). That helper enforces a pmd aligned
> @address argument via VM_BUG_ON() assertion.
>
> Update the implementation to take a 'struct vm_fault' argument directly
> and apply the address alignment fixup internally to fix crash signatures
> like:
>
> kernel BUG at arch/x86/mm/pgtable.c:515!
> invalid opcode: 0000 [#1] SMP NOPTI
> CPU: 51 PID: 43713 Comm: java Tainted: G OE 4.19.35 #1
> [..]
> RIP: 0010:pmdp_set_access_flags+0x48/0x50
> [..]
> Call Trace:
> vmf_insert_pfn_pmd+0x198/0x350
> dax_iomap_fault+0xe82/0x1190
> ext4_dax_huge_fault+0x103/0x1f0
> ? __switch_to_asm+0x40/0x70
> __handle_mm_fault+0x3f6/0x1370
> ? __switch_to_asm+0x34/0x70
> ? __switch_to_asm+0x40/0x70
> handle_mm_fault+0xda/0x200
> __do_page_fault+0x249/0x4f0
> do_page_fault+0x32/0x110
> ? page_fault+0x8/0x30
> page_fault+0x1e/0x30
>
Reviewed-by: Aneesh Kumar K.V <[email protected]>
> Cc: <[email protected]>
> Fixes: c6f3c5ee40c1 ("mm/huge_memory.c: fix modifying of page protection by insert_pfn_pmd()")
> Reported-by: Piotr Balcer <[email protected]>
> Tested-by: Yan Ma <[email protected]>
> Cc: Aneesh Kumar K.V <[email protected]>
> Cc: Chandan Rajendra <[email protected]>
> Cc: Jan Kara <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Matthew Wilcox <[email protected]>
> Cc: Souptick Joarder <[email protected]>
> Signed-off-by: Dan Williams <[email protected]>
> ---
>
> drivers/dax/device.c | 6 ++----
> fs/dax.c | 6 ++----
> include/linux/huge_mm.h | 6 ++----
> mm/huge_memory.c | 16 ++++++++++------
> 4 files changed, 16 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index e428468ab661..996d68ff992a 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -184,8 +184,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
>
> *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, *pfn,
> - vmf->flags & FAULT_FLAG_WRITE);
> + return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
> }
>
> #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
> @@ -235,8 +234,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
>
> *pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pud(vmf->vma, vmf->address, vmf->pud, *pfn,
> - vmf->flags & FAULT_FLAG_WRITE);
> + return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
> }
> #else
> static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
> diff --git a/fs/dax.c b/fs/dax.c
> index e5e54da1715f..83009875308c 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1575,8 +1575,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
> }
>
> trace_dax_pmd_insert_mapping(inode, vmf, PMD_SIZE, pfn, entry);
> - result = vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
> - write);
> + result = vmf_insert_pfn_pmd(vmf, pfn, write);
> break;
> case IOMAP_UNWRITTEN:
> case IOMAP_HOLE:
> @@ -1686,8 +1685,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
> ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
> #ifdef CONFIG_FS_DAX_PMD
> else if (order == PMD_ORDER)
> - ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
> - pfn, true);
> + ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
> #endif
> else
> ret = VM_FAULT_FALLBACK;
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 381e872bfde0..7cd5c150c21d 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -47,10 +47,8 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
> extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
> unsigned long addr, pgprot_t newprot,
> int prot_numa);
> -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, pfn_t pfn, bool write);
> -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
> - pud_t *pud, pfn_t pfn, bool write);
> +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write);
> +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write);
> enum transparent_hugepage_flag {
> TRANSPARENT_HUGEPAGE_FLAG,
> TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 165ea46bf149..4310c6e9e5a3 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -793,11 +793,13 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
> pte_free(mm, pgtable);
> }
>
> -vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, pfn_t pfn, bool write)
> +vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
> {
> + unsigned long addr = vmf->address & PMD_MASK;
> + struct vm_area_struct *vma = vmf->vma;
> pgprot_t pgprot = vma->vm_page_prot;
> pgtable_t pgtable = NULL;
> +
> /*
> * If we had pmd_special, we could avoid all these restrictions,
> * but we need to be consistent with PTEs and architectures that
> @@ -820,7 +822,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
>
> track_pfn_insert(vma, &pgprot, pfn);
>
> - insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write, pgtable);
> + insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable);
> return VM_FAULT_NOPAGE;
> }
> EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd);
> @@ -869,10 +871,12 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
> spin_unlock(ptl);
> }
>
> -vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
> - pud_t *pud, pfn_t pfn, bool write)
> +vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
> {
> + unsigned long addr = vmf->address & PUD_MASK;
> + struct vm_area_struct *vma = vmf->vma;
> pgprot_t pgprot = vma->vm_page_prot;
> +
> /*
> * If we had pud_special, we could avoid all these restrictions,
> * but we need to be consistent with PTEs and architectures that
> @@ -889,7 +893,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
>
> track_pfn_insert(vma, &pgprot, pfn);
>
> - insert_pfn_pud(vma, addr, pud, pfn, pgprot, write);
> + insert_pfn_pud(vma, addr, vmf->pud, pfn, pgprot, write);
> return VM_FAULT_NOPAGE;
> }
> EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);