This patch series is on top of Huang Ying "Replace is_hwpoison_address with
__get_user_pages" series: http://www.mail-archive.com/[email protected]/msg48776.html
Gleb Natapov (2):
Allow GUP to fail instead of waiting on a page.
KVM: Enable async page fault processing.
include/linux/mm.h | 2 ++
mm/filemap.c | 6 ++++--
mm/memory.c | 5 ++++-
virt/kvm/kvm_main.c | 23 +++++++++++++++++++++--
4 files changed, 31 insertions(+), 5 deletions(-)
If asynchronous hva_to_pfn() is requested call GUP with FOLL_NOWAIT to
avoid sleeping on IO. Check for hwpoison is done at the same time,
otherwise check_user_page_hwpoison() will call GUP again and will put
vcpu to sleep.
Signed-off-by: Gleb Natapov <[email protected]>
---
virt/kvm/kvm_main.c | 23 +++++++++++++++++++++--
1 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 74d032a..80f42ab 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1029,6 +1029,17 @@ static pfn_t get_fault_pfn(void)
return fault_pfn;
}
+int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int write, struct page **page)
+{
+ int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET;
+
+ if (write)
+ flags |= FOLL_WRITE;
+
+ return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
+}
+
static inline int check_user_page_hwpoison(unsigned long addr)
{
int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1062,7 +1073,14 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
if (writable)
*writable = write_fault;
- npages = get_user_pages_fast(addr, 1, write_fault, page);
+ if (async) {
+ down_read(¤t->mm->mmap_sem);
+ npages = get_user_page_nowait(current, current->mm,
+ addr, write_fault, page);
+ up_read(¤t->mm->mmap_sem);
+ } else
+ npages = get_user_pages_fast(addr, 1, write_fault,
+ page);
/* map read fault as writable if possible */
if (unlikely(!write_fault) && npages == 1) {
@@ -1085,7 +1103,8 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
return get_fault_pfn();
down_read(¤t->mm->mmap_sem);
- if (check_user_page_hwpoison(addr)) {
+ if (npages == -EHWPOISON ||
+ (!async && check_user_page_hwpoison(addr))) {
up_read(¤t->mm->mmap_sem);
get_page(hwpoison_page);
return page_to_pfn(hwpoison_page);
--
1.7.1
GUP user may want to try to acquire a reference to a page if it is already
in memory, but not if IO, to bring it in, is needed. For example KVM may
tell vcpu to schedule another guest process if current one is trying to
access swapped out page. Meanwhile, the page will be swapped in and the
guest process, that depends on it, will be able to run again.
This patch adds FAULT_FLAG_RETRY_NOWAIT (suggested by Linus) and
FOLL_NOWAIT follow_page flags. FAULT_FLAG_RETRY_NOWAIT, when used in
conjunction with VM_FAULT_ALLOW_RETRY, indicates to handle_mm_fault that
it shouldn't drop mmap_sem and wait on a page, but return VM_FAULT_RETRY
instead.
Signed-off-by: Gleb Natapov <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Rik van Riel <[email protected]>
CC: Hugh Dickins <[email protected]>
CC: Andrew Morton <[email protected]>
---
include/linux/mm.h | 2 ++
mm/filemap.c | 6 ++++--
mm/memory.c | 5 ++++-
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9c3a7c8..fa5e562 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -151,6 +151,7 @@ extern pgprot_t protection_map[16];
#define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */
#define FAULT_FLAG_MKWRITE 0x04 /* Fault was mkwrite of existing pte */
#define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */
+#define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */
/*
* This interface is used by x86 PAT code to identify a pfn mapping that is
@@ -1532,6 +1533,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
#define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
+#define FOLL_NOWAIT 0x20 /* return if disk transfer is needed */
#define FOLL_MLOCK 0x40 /* mark page as mlocked */
#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */
diff --git a/mm/filemap.c b/mm/filemap.c
index 83a45d3..312b6eb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -621,8 +621,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
__lock_page(page);
return 1;
} else {
- up_read(&mm->mmap_sem);
- wait_on_page_locked(page);
+ if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ up_read(&mm->mmap_sem);
+ wait_on_page_locked(page);
+ }
return 0;
}
}
diff --git a/mm/memory.c b/mm/memory.c
index e7c9a99..a52a3e6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1569,6 +1569,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
fault_flags |= FAULT_FLAG_WRITE;
if (nonblocking)
fault_flags |= FAULT_FLAG_ALLOW_RETRY;
+ if (foll_flags & FOLL_NOWAIT)
+ fault_flags |= (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT);
ret = handle_mm_fault(mm, vma, start,
fault_flags);
@@ -1595,7 +1597,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
tsk->min_flt++;
if (ret & VM_FAULT_RETRY) {
- *nonblocking = 0;
+ if (nonblocking)
+ *nonblocking = 0;
return i;
}
--
1.7.1
On 02/01/2011 06:21 AM, Gleb Natapov wrote:
> GUP user may want to try to acquire a reference to a page if it is already
> in memory, but not if IO, to bring it in, is needed. For example KVM may
> tell vcpu to schedule another guest process if current one is trying to
> access swapped out page. Meanwhile, the page will be swapped in and the
> guest process, that depends on it, will be able to run again.
>
> This patch adds FAULT_FLAG_RETRY_NOWAIT (suggested by Linus) and
> FOLL_NOWAIT follow_page flags. FAULT_FLAG_RETRY_NOWAIT, when used in
> conjunction with VM_FAULT_ALLOW_RETRY, indicates to handle_mm_fault that
> it shouldn't drop mmap_sem and wait on a page, but return VM_FAULT_RETRY
> instead.
>
> Signed-off-by: Gleb Natapov<[email protected]>
> CC: Linus Torvalds<[email protected]>
> CC: Rik van Riel<[email protected]>
> CC: Hugh Dickins<[email protected]>
> CC: Andrew Morton<[email protected]>
Acked-by: Rik van Riel <[email protected]>
--
All rights reversed
On Tue, 1 Feb 2011 13:21:46 +0200
Gleb Natapov <[email protected]> wrote:
> GUP user may want to try to acquire a reference to a page if it is already
> in memory, but not if IO, to bring it in, is needed. For example KVM may
> tell vcpu to schedule another guest process if current one is trying to
> access swapped out page. Meanwhile, the page will be swapped in and the
> guest process, that depends on it, will be able to run again.
>
> This patch adds FAULT_FLAG_RETRY_NOWAIT (suggested by Linus) and
> FOLL_NOWAIT follow_page flags. FAULT_FLAG_RETRY_NOWAIT, when used in
> conjunction with VM_FAULT_ALLOW_RETRY, indicates to handle_mm_fault that
> it shouldn't drop mmap_sem and wait on a page, but return VM_FAULT_RETRY
> instead.
>
> ...
>
> +#define FOLL_NOWAIT 0x20 /* return if disk transfer is needed */
The comment is a little misleading. Or incomplete.
For both swap-backed and file-backed pages, the code will initiate the
disk transfer and will then return without waiting for it to complete.
This (important!) information isn't really presented in either the
changelog or the code itself.
This?
--- a/include/linux/mm.h~mm-allow-gup-to-fail-instead-of-waiting-on-a-page-fix
+++ a/include/linux/mm.h
@@ -1537,7 +1537,8 @@ struct page *follow_page(struct vm_area_
#define FOLL_GET 0x04 /* do get_page on page */
#define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
#define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
-#define FOLL_NOWAIT 0x20 /* return if disk transfer is needed */
+#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
+ * and return without waiting upon it */
#define FOLL_MLOCK 0x40 /* mark page as mlocked */
#define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
_
On Tue, Feb 01, 2011 at 04:42:40PM -0800, Andrew Morton wrote:
> On Tue, 1 Feb 2011 13:21:46 +0200
> Gleb Natapov <[email protected]> wrote:
>
> > GUP user may want to try to acquire a reference to a page if it is already
> > in memory, but not if IO, to bring it in, is needed. For example KVM may
> > tell vcpu to schedule another guest process if current one is trying to
> > access swapped out page. Meanwhile, the page will be swapped in and the
> > guest process, that depends on it, will be able to run again.
> >
> > This patch adds FAULT_FLAG_RETRY_NOWAIT (suggested by Linus) and
> > FOLL_NOWAIT follow_page flags. FAULT_FLAG_RETRY_NOWAIT, when used in
> > conjunction with VM_FAULT_ALLOW_RETRY, indicates to handle_mm_fault that
> > it shouldn't drop mmap_sem and wait on a page, but return VM_FAULT_RETRY
> > instead.
> >
> > ...
> >
> > +#define FOLL_NOWAIT 0x20 /* return if disk transfer is needed */
>
> The comment is a little misleading. Or incomplete.
>
> For both swap-backed and file-backed pages, the code will initiate the
> disk transfer and will then return without waiting for it to complete.
> This (important!) information isn't really presented in either the
> changelog or the code itself.
>
> This?
>
Yes, this is better. Thanks you. I see that the patch below is in your queue
already. Should I re-spin my patch with improved comment anyway?
> --- a/include/linux/mm.h~mm-allow-gup-to-fail-instead-of-waiting-on-a-page-fix
> +++ a/include/linux/mm.h
> @@ -1537,7 +1537,8 @@ struct page *follow_page(struct vm_area_
> #define FOLL_GET 0x04 /* do get_page on page */
> #define FOLL_DUMP 0x08 /* give error on hole if it would be zero */
> #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */
> -#define FOLL_NOWAIT 0x20 /* return if disk transfer is needed */
> +#define FOLL_NOWAIT 0x20 /* if a disk transfer is needed, start the IO
> + * and return without waiting upon it */
> #define FOLL_MLOCK 0x40 /* mark page as mlocked */
> #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */
>
> _
--
Gleb.
On Wed, 2 Feb 2011 15:31:57 +0200
Gleb Natapov <[email protected]> wrote:
> > This?
> >
> Yes, this is better. Thanks you. I see that the patch below is in your queue
> already. Should I re-spin my patch with improved comment anyway?
Nope, that's OK - I fold fixup patches into the base patch before
sending them onwards.
There's always a risk that someone will get a hold of an earlier
version of the patch, but a) sending out a v2 doesn't eliminate that
risk and b) it's not very important anyway (in this case) and c)
because I separate the base patch from the fixup patches, I'll easily
notice if someone merges an earlier patch, because I'm left holding
stray fixup patches.
On 01/-9/-28163 03:59 AM, Gleb Natapov wrote:
> If asynchronous hva_to_pfn() is requested call GUP with FOLL_NOWAIT to
> avoid sleeping on IO. Check for hwpoison is done at the same time,
> otherwise check_user_page_hwpoison() will call GUP again and will put
> vcpu to sleep.
>
> Signed-off-by: Gleb Natapov <[email protected]>
> ---
Acked-by: Lai Jiangshan <[email protected]>
On Tue, Feb 01, 2011 at 01:21:47PM +0200, Gleb Natapov wrote:
> If asynchronous hva_to_pfn() is requested call GUP with FOLL_NOWAIT to
> avoid sleeping on IO. Check for hwpoison is done at the same time,
> otherwise check_user_page_hwpoison() will call GUP again and will put
> vcpu to sleep.
>
FOLL_NOWAIT is now in Linus tree, so this patch can be applied now. I
verified that it still applies and works.
> Signed-off-by: Gleb Natapov <[email protected]>
> ---
> virt/kvm/kvm_main.c | 23 +++++++++++++++++++++--
> 1 files changed, 21 insertions(+), 2 deletions(-)
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 74d032a..80f42ab 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1029,6 +1029,17 @@ static pfn_t get_fault_pfn(void)
> return fault_pfn;
> }
>
> +int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
> + unsigned long start, int write, struct page **page)
> +{
> + int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET;
> +
> + if (write)
> + flags |= FOLL_WRITE;
> +
> + return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
> +}
> +
> static inline int check_user_page_hwpoison(unsigned long addr)
> {
> int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
> @@ -1062,7 +1073,14 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
> if (writable)
> *writable = write_fault;
>
> - npages = get_user_pages_fast(addr, 1, write_fault, page);
> + if (async) {
> + down_read(¤t->mm->mmap_sem);
> + npages = get_user_page_nowait(current, current->mm,
> + addr, write_fault, page);
> + up_read(¤t->mm->mmap_sem);
> + } else
> + npages = get_user_pages_fast(addr, 1, write_fault,
> + page);
>
> /* map read fault as writable if possible */
> if (unlikely(!write_fault) && npages == 1) {
> @@ -1085,7 +1103,8 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
> return get_fault_pfn();
>
> down_read(¤t->mm->mmap_sem);
> - if (check_user_page_hwpoison(addr)) {
> + if (npages == -EHWPOISON ||
> + (!async && check_user_page_hwpoison(addr))) {
> up_read(¤t->mm->mmap_sem);
> get_page(hwpoison_page);
> return page_to_pfn(hwpoison_page);
> --
> 1.7.1
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to [email protected]. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
> Don't email: <a href=mailto:"[email protected]"> [email protected] </a>
--
Gleb.
On 03/24/2011 02:22 PM, Gleb Natapov wrote:
> On Tue, Feb 01, 2011 at 01:21:47PM +0200, Gleb Natapov wrote:
> > If asynchronous hva_to_pfn() is requested call GUP with FOLL_NOWAIT to
> > avoid sleeping on IO. Check for hwpoison is done at the same time,
> > otherwise check_user_page_hwpoison() will call GUP again and will put
> > vcpu to sleep.
> >
> FOLL_NOWAIT is now in Linus tree, so this patch can be applied now. I
> verified that it still applies and works.
Thanks, applied and queued for 2.6.39.
--
error compiling committee.c: too many arguments to function