From: Wanpeng Li <[email protected]>
qemu-system-x86-8600 [004] d..1 7205.687530: kvm_entry: vcpu 2
qemu-system-x86-8600 [004] .... 7205.687532: kvm_exit: reason EXCEPTION_NMI rip 0xffffffffa921297d info ffffeb2c0e44e018 80000b0e
qemu-system-x86-8600 [004] .... 7205.687532: kvm_page_fault: address ffffeb2c0e44e018 error_code 0
qemu-system-x86-8600 [004] .... 7205.687620: kvm_try_async_get_page: gva = 0xffffeb2c0e44e018, gfn = 0x427e4e
qemu-system-x86-8600 [004] .N.. 7205.687628: kvm_async_pf_not_present: token 0x8b002 gva 0xffffeb2c0e44e018
kworker/4:2-7814 [004] .... 7205.687655: kvm_async_pf_completed: gva 0xffffeb2c0e44e018 address 0x7fcc30c4e000
qemu-system-x86-8600 [004] .... 7205.687703: kvm_async_pf_ready: token 0x8b002 gva 0xffffeb2c0e44e018
qemu-system-x86-8600 [004] d..1 7205.687711: kvm_entry: vcpu 2
After running some memory intensive workload in guest, I catch the kworker
which completes the GUP too quickly, and queues an "Page Ready" #PF exception
after the "Page not Present" exception before the next vmentry as the above
trace which will result in #DF injected to guest.
This patch fixes it by clearing the queue for "Page not Present" if "Page Ready"
occurs before the next vmentry since the GUP has already got the required page
and shadow page table has already been fixed by "Page Ready" handler.
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/kvm/x86.c | 37 ++++++++++++++++++++++++++++---------
1 file changed, 28 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6069af8..16c14c9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8619,6 +8619,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
sizeof(val));
}
+static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
+{
+
+ return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
+ sizeof(u32));
+}
+
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
@@ -8646,6 +8653,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
struct x86_exception fault;
+ u32 val;
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
@@ -8653,15 +8661,26 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
trace_kvm_async_pf_ready(work->arch.token, work->gva);
- if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
- !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
- fault.vector = PF_VECTOR;
- fault.error_code_valid = true;
- fault.error_code = 0;
- fault.nested_page_fault = false;
- fault.address = work->arch.token;
- fault.async_page_fault = true;
- kvm_inject_page_fault(vcpu, &fault);
+ if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) {
+ if (!apf_get_user(vcpu, &val)) {
+ if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
+ vcpu->arch.exception.pending &&
+ vcpu->arch.exception.nr == PF_VECTOR &&
+ !apf_put_user(vcpu, 0)) {
+ vcpu->arch.exception.pending = false;
+ vcpu->arch.exception.nr = 0;
+ vcpu->arch.exception.has_error_code = false;
+ vcpu->arch.exception.error_code = 0;
+ } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
+ fault.vector = PF_VECTOR;
+ fault.error_code_valid = true;
+ fault.error_code = 0;
+ fault.nested_page_fault = false;
+ fault.address = work->arch.token;
+ fault.async_page_fault = true;
+ kvm_inject_page_fault(vcpu, &fault);
+ }
+ }
}
vcpu->arch.apf.halted = false;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
--
2.7.4
2017-09-14 03:54-0700, Wanpeng Li:
> From: Wanpeng Li <[email protected]>
>
> qemu-system-x86-8600 [004] d..1 7205.687530: kvm_entry: vcpu 2
> qemu-system-x86-8600 [004] .... 7205.687532: kvm_exit: reason EXCEPTION_NMI rip 0xffffffffa921297d info ffffeb2c0e44e018 80000b0e
> qemu-system-x86-8600 [004] .... 7205.687532: kvm_page_fault: address ffffeb2c0e44e018 error_code 0
> qemu-system-x86-8600 [004] .... 7205.687620: kvm_try_async_get_page: gva = 0xffffeb2c0e44e018, gfn = 0x427e4e
> qemu-system-x86-8600 [004] .N.. 7205.687628: kvm_async_pf_not_present: token 0x8b002 gva 0xffffeb2c0e44e018
> kworker/4:2-7814 [004] .... 7205.687655: kvm_async_pf_completed: gva 0xffffeb2c0e44e018 address 0x7fcc30c4e000
> qemu-system-x86-8600 [004] .... 7205.687703: kvm_async_pf_ready: token 0x8b002 gva 0xffffeb2c0e44e018
> qemu-system-x86-8600 [004] d..1 7205.687711: kvm_entry: vcpu 2
>
> After running some memory intensive workload in guest, I catch the kworker
> which completes the GUP too quickly, and queues an "Page Ready" #PF exception
> after the "Page not Present" exception before the next vmentry as the above
> trace which will result in #DF injected to guest.
The #DF feature can bite us in other cases as well, e.g. when emulating
an instruction that throws #GP/#UD.
Can't we replace all non-#PF exceptions with the PV #PF?
Doing so should be wrong only for trap exceptions and we currently just
override them anyway, so we wouldn't regress. :)
> This patch fixes it by clearing the queue for "Page not Present" if "Page Ready"
> occurs before the next vmentry since the GUP has already got the required page
> and shadow page table has already been fixed by "Page Ready" handler.
>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
> ---
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> @@ -8653,15 +8661,26 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
> kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
> trace_kvm_async_pf_ready(work->arch.token, work->gva);
>
> - if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
> - !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
> - fault.vector = PF_VECTOR;
> - fault.error_code_valid = true;
> - fault.error_code = 0;
> - fault.nested_page_fault = false;
> - fault.address = work->arch.token;
> - fault.async_page_fault = true;
> - kvm_inject_page_fault(vcpu, &fault);
> + if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) {
> + if (!apf_get_user(vcpu, &val)) {
I removed one indentation level when applying by merging these two
condition.
> + if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
> + vcpu->arch.exception.pending &&
> + vcpu->arch.exception.nr == PF_VECTOR &&
> + !apf_put_user(vcpu, 0)) {
> + vcpu->arch.exception.pending = false;
We know that vcpu->arch.exception.injected is false here, but I cleared
it too for safety, thanks.
> + vcpu->arch.exception.nr = 0;
> + vcpu->arch.exception.has_error_code = false;
> + vcpu->arch.exception.error_code = 0;
> + } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
> + fault.vector = PF_VECTOR;
> + fault.error_code_valid = true;
> + fault.error_code = 0;
> + fault.nested_page_fault = false;
> + fault.address = work->arch.token;
> + fault.async_page_fault = true;
> + kvm_inject_page_fault(vcpu, &fault);
> + }
> + }
> }
> vcpu->arch.apf.halted = false;
> vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> --
> 2.7.4
>