The s390 MMIO syscalls when using the classic PCI instructions do not
cause a page fault when follow_pte() fails due to the page not being
present. Besides being a general deficiency this breaks vfio-pci's mmap()
handling once VFIO_PCI_MMAP gets enabled as this lazily maps on first
access. Fix this by following a failed follow_pte() with
fixup_user_page() and retrying the follow_pte().
Reviewed-by: Jason Gunthorpe <[email protected]>
Signed-off-by: Niklas Schnelle <[email protected]>
---
arch/s390/pci/pci_mmio.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index a90499c087f0..217defbcb4f1 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -170,8 +170,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
goto out_unlock_mmap;
ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
- if (ret)
- goto out_unlock_mmap;
+ if (ret) {
+ fixup_user_fault(vma->vm_mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
+ ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+ if (ret)
+ goto out_unlock_mmap;
+ }
io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
@@ -305,12 +309,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
goto out_unlock_mmap;
ret = -EACCES;
- if (!(vma->vm_flags & VM_WRITE))
+ if (!(vma->vm_flags & VM_READ))
goto out_unlock_mmap;
ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
- if (ret)
- goto out_unlock_mmap;
+ if (ret) {
+ fixup_user_fault(vma->vm_mm, mmio_addr, 0, NULL);
+ ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
+ if (ret)
+ goto out_unlock_mmap;
+ }
io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
(mmio_addr & ~PAGE_MASK));
--
2.40.1
On Thu, 2024-05-23 at 13:10 +0200, Niklas Schnelle wrote:
> The s390 MMIO syscalls when using the classic PCI instructions do not
> cause a page fault when follow_pte() fails due to the page not being
> present. Besides being a general deficiency this breaks vfio-pci's mmap()
> handling once VFIO_PCI_MMAP gets enabled as this lazily maps on first
> access. Fix this by following a failed follow_pte() with
> fixup_user_page() and retrying the follow_pte().
>
> Reviewed-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Niklas Schnelle <[email protected]>
> ---
> arch/s390/pci/pci_mmio.c | 18 +++++++++++++-----
> 1 file changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
> index a90499c087f0..217defbcb4f1 100644
> --- a/arch/s390/pci/pci_mmio.c
> +++ b/arch/s390/pci/pci_mmio.c
> @@ -170,8 +170,12 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
> goto out_unlock_mmap;
>
> ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
> - if (ret)
> - goto out_unlock_mmap;
> + if (ret) {
> + fixup_user_fault(vma->vm_mm, mmio_addr, FAULT_FLAG_WRITE, NULL);
> + ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
This needs a slight adjustment in v6.10-rc1 due to a change in
follow_pte()'s signature. The above should take "vma" directly. We
could then also use current->mm in fixup_user_fault() as seems more
common. I've already pushed this to my kernel.org repository and will
send a v3 rebased on v6.10-rc1 with just this as I'm not tracking any
other feedback.
> + if (ret)
> + goto out_unlock_mmap;
> + }
>
> io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
> (mmio_addr & ~PAGE_MASK));
> @@ -305,12 +309,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
> if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
> goto out_unlock_mmap;
> ret = -EACCES;
> - if (!(vma->vm_flags & VM_WRITE))
> + if (!(vma->vm_flags & VM_READ))
> goto out_unlock_mmap;
>
> ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
> - if (ret)
> - goto out_unlock_mmap;
> + if (ret) {
> + fixup_user_fault(vma->vm_mm, mmio_addr, 0, NULL);
> + ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl);
Here as well
> + if (ret)
> + goto out_unlock_mmap;
> + }
>
> io_addr = (void __iomem *)((pte_pfn(*ptep) << PAGE_SHIFT) |
> (mmio_addr & ~PAGE_MASK));
>
On 5/23/24 7:10 AM, Niklas Schnelle wrote:
> The s390 MMIO syscalls when using the classic PCI instructions do not
> cause a page fault when follow_pte() fails due to the page not being
> present. Besides being a general deficiency this breaks vfio-pci's mmap()
> handling once VFIO_PCI_MMAP gets enabled as this lazily maps on first
> access. Fix this by following a failed follow_pte() with
> fixup_user_page() and retrying the follow_pte().
>
> Reviewed-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Niklas Schnelle <[email protected]>
Reviewed-by: Matthew Rosato <[email protected]>
FYI, as of -rc1 this patch has a merge conflict with 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte().")