Hello,
during Jasons work with postcopy migration support for s390 a problem regarding
gmap faults was discovered.
The gmap code will call fixup_userfault which will end up always in
handle_mm_fault. Till now we never cared about retries, but as the userfaultfd
code kind of relies on it, this needed some fix. This patchset includes the
retry logic fory gmap fault scenarios, as well as passing back VM_FAULT_RETRY
from fixup_userfault.
Thanks,
Dominik
Dominik Dingel (2):
mm: fixup_userfault returns VM_FAULT_RETRY if asked
s390/mm: allow gmap code to retry on faulting in guest memory
arch/s390/mm/pgtable.c | 28 ++++++++++++++++++++++++----
mm/gup.c | 2 ++
2 files changed, 26 insertions(+), 4 deletions(-)
--
2.3.9
When calling fixup_userfault with FAULT_FLAG_ALLOW_RETRY, fixup_userfault
didn't care about VM_FAULT_RETRY and returned 0. If the VM_FAULT_RETRY flag is
set we will return the complete result of handle_mm_fault.
Signed-off-by: Dominik Dingel <[email protected]>
---
mm/gup.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mm/gup.c b/mm/gup.c
index deafa2c..2af3b31 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -609,6 +609,8 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
return -EFAULT;
BUG();
}
+ if (ret & VM_FAULT_RETRY)
+ return ret;
if (tsk) {
if (ret & VM_FAULT_MAJOR)
tsk->maj_flt++;
--
2.3.9
The userfaultfd does need FAULT_FLAG_ALLOW_RETRY to not return
VM_FAULT_SIGBUS. So we improve the gmap code to handle one
VM_FAULT_RETRY.
Signed-off-by: Dominik Dingel <[email protected]>
---
arch/s390/mm/pgtable.c | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 54ef3bc..8a0025d 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -577,15 +577,22 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr,
unsigned int fault_flags)
{
unsigned long vmaddr;
- int rc;
+ int rc, fault;
+ fault_flags |= FAULT_FLAG_ALLOW_RETRY;
+retry:
down_read(&gmap->mm->mmap_sem);
vmaddr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(vmaddr)) {
rc = vmaddr;
goto out_up;
}
- if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
+ fault = fixup_user_fault(current, gmap->mm, vmaddr, fault_flags);
+ if (fault & VM_FAULT_RETRY) {
+ fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ fault_flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ } else if (fault) {
rc = -EFAULT;
goto out_up;
}
@@ -717,10 +724,13 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
spinlock_t *ptl;
pte_t *ptep, entry;
pgste_t pgste;
+ int fault, fault_flags;
int rc = 0;
+ fault_flags = FAULT_FLAG_WRITE | FAULT_FLAG_ALLOW_RETRY;
if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
return -EINVAL;
+retry:
down_read(&gmap->mm->mmap_sem);
while (len) {
/* Convert gmap address and connect the page tables */
@@ -730,7 +740,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
break;
}
/* Get the page mapped */
- if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) {
+ fault = fixup_user_fault(current, gmap->mm, addr, fault_flags);
+ if (fault & VM_FAULT_RETRY) {
+ fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ fault_flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ } else if (fault) {
rc = -EFAULT;
break;
}
@@ -794,7 +809,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
spinlock_t *ptl;
pgste_t old, new;
pte_t *ptep;
+ int fault, fault_flags;
+ fault_flags = FAULT_FLAG_WRITE | FAULT_FLAG_ALLOW_RETRY;
down_read(&mm->mmap_sem);
retry:
ptep = get_locked_pte(mm, addr, &ptl);
@@ -805,10 +822,13 @@ retry:
if (!(pte_val(*ptep) & _PAGE_INVALID) &&
(pte_val(*ptep) & _PAGE_PROTECT)) {
pte_unmap_unlock(ptep, ptl);
- if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
+ fault = fixup_user_fault(current, mm, addr, fault_flags);
+ if (fault && !(fault & VM_FAULT_RETRY)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
+ fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ fault_flags |= FAULT_FLAG_TRIED;
goto retry;
}
--
2.3.9
On Thu, Nov 19, 2015 at 12:49:57AM +0100, Dominik Dingel wrote:
> When calling fixup_userfault with FAULT_FLAG_ALLOW_RETRY, fixup_userfault
> didn't care about VM_FAULT_RETRY and returned 0. If the VM_FAULT_RETRY flag is
> set we will return the complete result of handle_mm_fault.
>
> Signed-off-by: Dominik Dingel <[email protected]>
> ---
> mm/gup.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/mm/gup.c b/mm/gup.c
> index deafa2c..2af3b31 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -609,6 +609,8 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
> return -EFAULT;
> BUG();
> }
> + if (ret & VM_FAULT_RETRY)
> + return ret;
Nope. fixup_user_fault() return errno, not VM_FAULT_* mask.
I guess it should be
return -EBUSY;
> if (tsk) {
> if (ret & VM_FAULT_MAJOR)
> tsk->maj_flt++;
> --
> 2.3.9
>
--
Kirill A. Shutemov
On Thu, 19 Nov 2015 00:49:58 +0100
Dominik Dingel <[email protected]> wrote:
> The userfaultfd does need FAULT_FLAG_ALLOW_RETRY to not return
> VM_FAULT_SIGBUS. So we improve the gmap code to handle one
> VM_FAULT_RETRY.
>
> Signed-off-by: Dominik Dingel <[email protected]>
> ---
> arch/s390/mm/pgtable.c | 28 ++++++++++++++++++++++++----
> 1 file changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
> index 54ef3bc..8a0025d 100644
> --- a/arch/s390/mm/pgtable.c
> +++ b/arch/s390/mm/pgtable.c
> @@ -577,15 +577,22 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr,
> unsigned int fault_flags)
> {
> unsigned long vmaddr;
> - int rc;
> + int rc, fault;
>
> + fault_flags |= FAULT_FLAG_ALLOW_RETRY;
> +retry:
> down_read(&gmap->mm->mmap_sem);
> vmaddr = __gmap_translate(gmap, gaddr);
> if (IS_ERR_VALUE(vmaddr)) {
> rc = vmaddr;
> goto out_up;
> }
> - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
> + fault = fixup_user_fault(current, gmap->mm, vmaddr, fault_flags);
> + if (fault & VM_FAULT_RETRY) {
> + fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> + fault_flags |= FAULT_FLAG_TRIED;
> + goto retry;
> + } else if (fault) {
> rc = -EFAULT;
> goto out_up;
> }
Me thinks that you want to add the retry code into fixup_user_fault itself.
You basically have the same code around the three calls to fixup_user_fault.
Yes, it will be a common code patch but I guess that it will be acceptable
given userfaultfd as a reason.
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
On 11/19/2015 09:18 AM, Martin Schwidefsky wrote:
> On Thu, 19 Nov 2015 00:49:58 +0100
> Dominik Dingel <[email protected]> wrote:
>
>> The userfaultfd does need FAULT_FLAG_ALLOW_RETRY to not return
>> VM_FAULT_SIGBUS. So we improve the gmap code to handle one
>> VM_FAULT_RETRY.
>>
>> Signed-off-by: Dominik Dingel <[email protected]>
>> ---
>> arch/s390/mm/pgtable.c | 28 ++++++++++++++++++++++++----
>> 1 file changed, 24 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
>> index 54ef3bc..8a0025d 100644
>> --- a/arch/s390/mm/pgtable.c
>> +++ b/arch/s390/mm/pgtable.c
>> @@ -577,15 +577,22 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr,
>> unsigned int fault_flags)
>> {
>> unsigned long vmaddr;
>> - int rc;
>> + int rc, fault;
>>
>> + fault_flags |= FAULT_FLAG_ALLOW_RETRY;
>> +retry:
>> down_read(&gmap->mm->mmap_sem);
>> vmaddr = __gmap_translate(gmap, gaddr);
>> if (IS_ERR_VALUE(vmaddr)) {
>> rc = vmaddr;
>> goto out_up;
>> }
>> - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
>> + fault = fixup_user_fault(current, gmap->mm, vmaddr, fault_flags);
>> + if (fault & VM_FAULT_RETRY) {
>> + fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
>> + fault_flags |= FAULT_FLAG_TRIED;
>> + goto retry;
>> + } else if (fault) {
>> rc = -EFAULT;
>> goto out_up;
>> }
>
> Me thinks that you want to add the retry code into fixup_user_fault itself.
> You basically have the same code around the three calls to fixup_user_fault.
> Yes, it will be a common code patch but I guess that it will be acceptable
> given userfaultfd as a reason.
That makes a lot of sense. In an earlier discussion (a followup of Jasons
mm: Loosen MADV_NOHUGEPAGE to enable Qemu postcopy on s390) patch.
Andrea suggested the following:
It's probably better to add a fixup_user_fault_unlocked that will work
like get_user_pages_unlocked. I.e. leaves the details of the mmap_sem
locking internally to the function, and will handle VM_FAULT_RETRY
automatically by re-taking the mmap_sem and repeating the
fixup_user_fault after updating the FAULT_FLAG_ALLOW_RETRY to
FAULT_FLAG_TRIED.
On Thu, 19 Nov 2015 09:25:24 +0100
Christian Borntraeger <[email protected]> wrote:
> On 11/19/2015 09:18 AM, Martin Schwidefsky wrote:
> > On Thu, 19 Nov 2015 00:49:58 +0100
> > Dominik Dingel <[email protected]> wrote:
> >
> >> The userfaultfd does need FAULT_FLAG_ALLOW_RETRY to not return
> >> VM_FAULT_SIGBUS. So we improve the gmap code to handle one
> >> VM_FAULT_RETRY.
> >>
> >> Signed-off-by: Dominik Dingel <[email protected]>
> >> ---
> >> arch/s390/mm/pgtable.c | 28 ++++++++++++++++++++++++----
> >> 1 file changed, 24 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
> >> index 54ef3bc..8a0025d 100644
> >> --- a/arch/s390/mm/pgtable.c
> >> +++ b/arch/s390/mm/pgtable.c
> >> @@ -577,15 +577,22 @@ int gmap_fault(struct gmap *gmap, unsigned long gaddr,
> >> unsigned int fault_flags)
> >> {
> >> unsigned long vmaddr;
> >> - int rc;
> >> + int rc, fault;
> >>
> >> + fault_flags |= FAULT_FLAG_ALLOW_RETRY;
> >> +retry:
> >> down_read(&gmap->mm->mmap_sem);
> >> vmaddr = __gmap_translate(gmap, gaddr);
> >> if (IS_ERR_VALUE(vmaddr)) {
> >> rc = vmaddr;
> >> goto out_up;
> >> }
> >> - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
> >> + fault = fixup_user_fault(current, gmap->mm, vmaddr, fault_flags);
> >> + if (fault & VM_FAULT_RETRY) {
> >> + fault_flags &= ~FAULT_FLAG_ALLOW_RETRY;
> >> + fault_flags |= FAULT_FLAG_TRIED;
> >> + goto retry;
> >> + } else if (fault) {
> >> rc = -EFAULT;
> >> goto out_up;
> >> }
> >
> > Me thinks that you want to add the retry code into fixup_user_fault itself.
> > You basically have the same code around the three calls to fixup_user_fault.
> > Yes, it will be a common code patch but I guess that it will be acceptable
> > given userfaultfd as a reason.
>
> That makes a lot of sense. In an earlier discussion (a followup of Jasons
> mm: Loosen MADV_NOHUGEPAGE to enable Qemu postcopy on s390) patch.
>
> Andrea suggested the following:
>
> It's probably better to add a fixup_user_fault_unlocked that will work
> like get_user_pages_unlocked. I.e. leaves the details of the mmap_sem
> locking internally to the function, and will handle VM_FAULT_RETRY
> automatically by re-taking the mmap_sem and repeating the
> fixup_user_fault after updating the FAULT_FLAG_ALLOW_RETRY to
> FAULT_FLAG_TRIED.
I know, I saw his mail. But within the gmap code we need to take the mmap_sem before calling fixup_user_fault as well as holding it for later on like __gmap_link.
We could introduce a new wrapper arround fixup_user_fault, like:
fixup_user_fault_retry, which would take care of the retry logic, but does not encapsulate the complete mmap_sem logic.
@Kirill would that be acceptable for you as well?