Introduce gfn_to_page_atomic() and gfn_to_pfn_atomic(), those
functions is fast path and can used in atomic context, the later
patch will use those
Signed-off-by: Xiao Guangrong <[email protected]>
---
arch/x86/mm/gup.c | 2 +
include/linux/kvm_host.h | 2 +
virt/kvm/kvm_main.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 54 insertions(+), 0 deletions(-)
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 738e659..0c9034b 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -6,6 +6,7 @@
*/
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/module.h>
#include <linux/vmstat.h>
#include <linux/highmem.h>
@@ -274,6 +275,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
return nr;
}
+EXPORT_SYMBOL_GPL(__get_user_pages_fast);
/**
* get_user_pages_fast() - pin user pages in memory
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2d96555..98c3e00 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -289,6 +289,7 @@ void kvm_arch_flush_shadow(struct kvm *kvm);
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
+struct page *gfn_to_page_atomic(struct kvm *kvm, gfn_t gfn);
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
void kvm_release_page_clean(struct page *page);
@@ -296,6 +297,7 @@ void kvm_release_page_dirty(struct page *page);
void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 84a0906..b806f29 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -942,6 +942,41 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
+static pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+{
+ struct page *page[1];
+ int npages;
+ pfn_t pfn;
+
+ npages = __get_user_pages_fast(addr, 1, 1, page);
+
+ if (unlikely(npages != 1)) {
+ if (is_hwpoison_address(addr)) {
+ get_page(hwpoison_page);
+ return page_to_pfn(hwpoison_page);
+ }
+ get_page(bad_page);
+ return page_to_pfn(bad_page);
+ } else
+ pfn = page_to_pfn(page[0]);
+
+ return pfn;
+}
+
+pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
+{
+ unsigned long addr;
+
+ addr = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(addr)) {
+ get_page(bad_page);
+ return page_to_pfn(bad_page);
+ }
+
+ return hva_to_pfn_atomic(kvm, addr);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_atomic);
+
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
{
struct page *page[1];
@@ -1000,6 +1035,21 @@ pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
return hva_to_pfn(kvm, addr);
}
+struct page *gfn_to_page_atomic(struct kvm *kvm, gfn_t gfn)
+{
+ pfn_t pfn;
+
+ pfn = gfn_to_pfn_atomic(kvm, gfn);
+ if (!kvm_is_mmio_pfn(pfn))
+ return pfn_to_page(pfn);
+
+ WARN_ON(kvm_is_mmio_pfn(pfn));
+
+ get_page(bad_page);
+ return bad_page;
+}
+EXPORT_SYMBOL_GPL(gfn_to_page_atomic);
+
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
pfn_t pfn;
--
1.6.1.2
On 06/15/2010 05:46 AM, Xiao Guangrong wrote:
> Introduce gfn_to_page_atomic() and gfn_to_pfn_atomic(), those
> functions is fast path and can used in atomic context, the later
> patch will use those
>
>
> @@ -942,6 +942,41 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
> }
> EXPORT_SYMBOL_GPL(gfn_to_hva);
>
> +static pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
> +{
> + struct page *page[1];
> + int npages;
> + pfn_t pfn;
> +
> + npages = __get_user_pages_fast(addr, 1, 1, page);
> +
> + if (unlikely(npages != 1)) {
> + if (is_hwpoison_address(addr)) {
> + get_page(hwpoison_page);
> + return page_to_pfn(hwpoison_page);
> + }
> + get_page(bad_page);
> + return page_to_pfn(bad_page);
> + } else
> + pfn = page_to_pfn(page[0]);
> +
> + return pfn;
> +}
>
Too much duplication. How about putting the tail end of the function in
a common helper (with an inatomic flag)?
btw, is_hwpoison_address() is racy. While it looks up the address, some
other task can unmap the page tables under us.
Andi/Huang?
One way of fixing it is get_user_pages_ptes_fast(), which also returns
the pte, also atomically. I want it for other reasons as well (respond
to a read fault by gupping the page for read, but allowing write access
if the pte indicates it is writeable).
--
error compiling committee.c: too many arguments to function
On Tue, Jun 15, 2010 at 02:22:06PM +0300, Avi Kivity wrote:
> Too much duplication. How about putting the tail end of the function in a
> common helper (with an inatomic flag)?
>
> btw, is_hwpoison_address() is racy. While it looks up the address, some
> other task can unmap the page tables under us.
Where is is_hwpoison_address() coming from? I can't find it anywhere.
Anyways hwpoison will not remove the page as long as there
is a reference to it, so as long as you get the reference
race free against another task you're ok. Of course there might
be always an error in between and the hardware may poison
the data, but we don't try to handle all kernel errors.
-Andi
--
[email protected] -- Speaking for myself only.
On 06/16/2010 10:59 AM, Andi Kleen wrote:
> On Tue, Jun 15, 2010 at 02:22:06PM +0300, Avi Kivity wrote:
>
>> Too much duplication. How about putting the tail end of the function in a
>> common helper (with an inatomic flag)?
>>
>> btw, is_hwpoison_address() is racy. While it looks up the address, some
>> other task can unmap the page tables under us.
>>
> Where is is_hwpoison_address() coming from? I can't find it anywhere.
>
>
kvm.git master mm/memory-failure.c (19564281fe).
> Anyways hwpoison will not remove the page as long as there
> is a reference to it, so as long as you get the reference
> race free against another task you're ok. Of course there might
> be always an error in between and the hardware may poison
> the data, but we don't try to handle all kernel errors.
>
The page is fine, the page tables are not. Another task can munmap()
the thing while is_hwpoison_address() is running.
--
error compiling committee.c: too many arguments to function
> The page is fine, the page tables are not. Another task can munmap() the
> thing while is_hwpoison_address() is running.
Ok that boils down to me not seeing that source.
If it accesses the page tables yes then it's racy. But whoever
looked up the page tables in the first place should have
returned an -EFAULT. There's no useful address attached
to poison.
-Andi
--
[email protected] -- Speaking for myself only.
On 06/16/2010 11:49 AM, Andi Kleen wrote:
>> The page is fine, the page tables are not. Another task can munmap() the
>> thing while is_hwpoison_address() is running.
>>
> Ok that boils down to me not seeing that source.
>
> If it accesses the page tables yes then it's racy. But whoever
> looked up the page tables in the first place should have
> returned an -EFAULT. There's no useful address attached
> to poison.
>
We need to distinguish between genuine -EFAULT and poisoned address.
That's why I suggested get_user_pages_ptes_fast. You can return page =
NULL (-EFAULT) and the pte in the same go. No race, and useful for
other cases.
--
error compiling committee.c: too many arguments to function
On Tue, Jun 15, 2010 at 7:22 PM, Avi Kivity <[email protected]> wrote:
> btw, is_hwpoison_address() is racy. While it looks up the address, some
> other task can unmap the page tables under us.
>
> Andi/Huang?
>
> One way of fixing it is get_user_pages_ptes_fast(), which also returns the
> pte, also atomically. I want it for other reasons as well (respond to a
> read fault by gupping the page for read, but allowing write access if the
> pte indicates it is writeable).
Yes. is_hwpoison_address() is racy. But I think it is not absolutely
necessary to call is_hwpoison_address() in hva_to_pfn_atomic(), is it?
For is_hwpoison_address() in hva_to_pfn(), we can protect it with mmap_sem.
Best Regards,
Huang Ying
On 06/16/2010 01:51 PM, huang ying wrote:
> On Tue, Jun 15, 2010 at 7:22 PM, Avi Kivity<[email protected]> wrote:
>
>> btw, is_hwpoison_address() is racy. While it looks up the address, some
>> other task can unmap the page tables under us.
>>
>> Andi/Huang?
>>
>> One way of fixing it is get_user_pages_ptes_fast(), which also returns the
>> pte, also atomically. I want it for other reasons as well (respond to a
>> read fault by gupping the page for read, but allowing write access if the
>> pte indicates it is writeable).
>>
> Yes. is_hwpoison_address() is racy. But I think it is not absolutely
> necessary to call is_hwpoison_address() in hva_to_pfn_atomic(), is it?
>
We can probably ignore it, yes.
> For is_hwpoison_address() in hva_to_pfn(), we can protect it with mmap_sem.
>
Not very appealing, but should work.
--
error compiling committee.c: too many arguments to function
Avi Kivity wrote:
> On 06/15/2010 05:46 AM, Xiao Guangrong wrote:
>> Introduce gfn_to_page_atomic() and gfn_to_pfn_atomic(), those
>> functions is fast path and can used in atomic context, the later
>> patch will use those
>>
>>
>> @@ -942,6 +942,41 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
>> }
>> EXPORT_SYMBOL_GPL(gfn_to_hva);
>>
>> +static pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
>> +{
>> + struct page *page[1];
>> + int npages;
>> + pfn_t pfn;
>> +
>> + npages = __get_user_pages_fast(addr, 1, 1, page);
>> +
>> + if (unlikely(npages != 1)) {
>> + if (is_hwpoison_address(addr)) {
>> + get_page(hwpoison_page);
>> + return page_to_pfn(hwpoison_page);
>> + }
>> + get_page(bad_page);
>> + return page_to_pfn(bad_page);
>> + } else
>> + pfn = page_to_pfn(page[0]);
>> +
>> + return pfn;
>> +}
>>
>
> Too much duplication. How about putting the tail end of the function in
> a common helper (with an inatomic flag)?
>
OK, will cleanup it in the next version.