Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932805AbdHWUof (ORCPT ); Wed, 23 Aug 2017 16:44:35 -0400 Received: from mail-wm0-f67.google.com ([74.125.82.67]:36406 "EHLO mail-wm0-f67.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932700AbdHWUoI (ORCPT ); Wed, 23 Aug 2017 16:44:08 -0400 From: Paolo Bonzini To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org Cc: wanpeng.li@hotmail.com, david@redhat.com, rkrcmar@redhat.com, jmattson@google.com Subject: [PATCH 4/4] KVM: x86: allow overwriting L2 reinjected exception with L1 vmexit Date: Wed, 23 Aug 2017 22:43:58 +0200 Message-Id: <1503521038-21073-5-git-send-email-pbonzini@redhat.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1503521038-21073-1-git-send-email-pbonzini@redhat.com> References: <1503521038-21073-1-git-send-email-pbonzini@redhat.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10758 Lines: 293 A reinjected exception is already recorded in either the IDT-vectored event information fields or the EXITINTINFO fields; if the handling of an exception in L0 causes a vmexit, we don't really need to keep the reinjected exception in vcpu->arch.exception. Teach kvm_multiple_exception to recognize this scenario through a new kvm_x86_ops callback. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/svm.c | 79 +++++++++++++++++++------------------- arch/x86/kvm/vmx.c | 84 +++++++++++++++++++++-------------------- arch/x86/kvm/x86.c | 9 +++++ 4 files changed, 95 insertions(+), 79 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6db0ed9cf59e..643308143bea 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -962,6 +962,8 @@ struct kvm_x86_ops { unsigned char *hypercall_addr); void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu); + int (*nested_check_exception)(struct kvm_vcpu *vcpu, + struct kvm_queued_exception *ex); void (*queue_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 7e190b21a30b..32c8d8f62985 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -291,8 +291,6 @@ struct amd_svm_iommu_ir { static int nested_svm_exit_handled(struct vcpu_svm *svm); static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); -static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, - bool has_error_code, u32 error_code); enum { VMCB_INTERCEPTS, /* Intercept vectors, TSC offset, @@ -650,6 +648,42 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm_set_interrupt_shadow(vcpu, 0); } +static void nested_svm_queue_exception(struct kvm_vcpu *vcpu, + struct kvm_queued_exception *ex) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->nr; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = ex->error_code; + + /* + * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be + * written only when inject_pending_event runs (DR6 would written here + * too). This should be conditional on a new capability---if the + * capability is disabled, kvm_multiple_exception would write the + * ancillary information to CR2 or DR6, for backwards ABI-compatibility. + */ + if (ex->nested_apf) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; + else + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + + svm->nested.exit_required = true; +} + +static int nested_svm_check_exception(struct kvm_vcpu *vcpu, + struct kvm_queued_exception *ex) +{ + struct vcpu_svm *svm = to_svm(vcpu); + unsigned int nr = ex->nr; + + return ((svm->nested.intercept_exceptions & (1 << nr)) || + (nr == PF_VECTOR && ex->nested_apf)); +} + static void svm_queue_exception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -662,9 +696,11 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) * If we are within a nested VM we'd better #VMEXIT and let the guest * handle the exception */ - if (!reinject && - nested_svm_check_exception(svm, nr, has_error_code, error_code)) + if (!reinject && is_guest_mode(vcpu) && + nested_svm_check_exception(vcpu, &vcpu->arch.exception)) { + nested_svm_queue_exception(vcpu, &vcpu->arch.exception); return; + } if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); @@ -2428,40 +2464,6 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) return 0; } -static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, - bool has_error_code, u32 error_code) -{ - int vmexit; - - if (!is_guest_mode(&svm->vcpu)) - return 0; - - vmexit = nested_svm_intercept(svm); - if (vmexit != NESTED_EXIT_DONE) - return 0; - - svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; - svm->vmcb->control.exit_code_hi = 0; - svm->vmcb->control.exit_info_1 = error_code; - - /* - * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. - * The fix is to add the ancillary datum (CR2 or DR6) to structs - * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be - * written only when inject_pending_event runs (DR6 would written here - * too). This should be conditional on a new capability---if the - * capability is disabled, kvm_multiple_exception would write the - * ancillary information to CR2 or DR6, for backwards ABI-compatibility. - */ - if (svm->vcpu.arch.exception.nested_apf) - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; - else - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; - - svm->nested.exit_required = true; - return vmexit; -} - /* This function returns true if it is save to enable the irq window */ static inline bool nested_svm_intr(struct vcpu_svm *svm) { @@ -5448,6 +5450,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) .patch_hypercall = svm_patch_hypercall, .set_irq = svm_set_irq, .set_nmi = svm_inject_nmi, + .nested_check_exception = nested_svm_check_exception, .queue_exception = svm_queue_exception, .cancel_injection = svm_cancel_injection, .interrupt_allowed = svm_interrupt_allowed, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f8ef38094acc..ddabed8425b3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2438,15 +2438,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) vmx_set_interrupt_shadow(vcpu, 0); } -static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, - unsigned long exit_qual) +static void nested_vmx_queue_exception(struct kvm_vcpu *vcpu, + struct kvm_queued_exception *ex) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned int nr = vcpu->arch.exception.nr; - u32 intr_info = nr | INTR_INFO_VALID_MASK; + unsigned int nr = ex->nr; + u32 intr_info = nr | INTR_INFO_VALID_MASK; + unsigned long exit_qual = 0; - if (vcpu->arch.exception.has_error_code) { - vmcs12->vm_exit_intr_error_code = vcpu->arch.exception.error_code; + /* + * FIXME: we must not write CR2 or DR6 when L1 intercepts an L2 #PF + * or #DB exception. + * + * The fix is to add the ancillary datum (CR2 or DR6) to structs + * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 + * can be written only when inject_pending_event runs. This should be + * conditional on a new capability---if the capability is disabled, + * kvm_multiple_exception would write the ancillary information to + * CR2 or DR6, for backwards ABI-compatibility. + */ + switch (nr) { + case PF_VECTOR: + if (ex->nested_apf) + exit_qual = vcpu->arch.apf.nested_apf_token; + else + exit_qual = vcpu->arch.cr2; + break; + case DB_VECTOR: + exit_qual = vcpu->arch.dr6; + break; + default: + exit_qual = 0; + } + + if (ex->has_error_code) { + vmcs12->vm_exit_intr_error_code = ex->error_code; intr_info |= INTR_INFO_DELIVER_CODE_MASK; } @@ -2466,43 +2492,16 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu, * KVM wants to inject page-faults which it got to the guest. This function * checks whether in a nested guest, we need to inject them to L1 or L2. */ -static int nested_vmx_check_exception(struct kvm_vcpu *vcpu) +static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, + struct kvm_queued_exception *ex) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - unsigned int nr = vcpu->arch.exception.nr; + unsigned int nr = ex->nr; - if (nr == PF_VECTOR) { - if (vcpu->arch.exception.nested_apf) { - nested_vmx_inject_exception_vmexit(vcpu, - vcpu->arch.apf.nested_apf_token); - return 1; - } - /* - * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. - * The fix is to add the ancillary datum (CR2 or DR6) to structs - * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 - * can be written only when inject_pending_event runs. This should be - * conditional on a new capability---if the capability is disabled, - * kvm_multiple_exception would write the ancillary information to - * CR2 or DR6, for backwards ABI-compatibility. - */ - if (nested_vmx_is_page_fault_vmexit(vmcs12, - vcpu->arch.exception.error_code)) { - nested_vmx_inject_exception_vmexit(vcpu, vcpu->arch.cr2); - return 1; - } - } else { - unsigned long exit_qual = 0; - if (nr == DB_VECTOR) - exit_qual = vcpu->arch.dr6; - - if (vmcs12->exception_bitmap & (1u << nr)) { - nested_vmx_inject_exception_vmexit(vcpu, exit_qual); - return 1; - } - } - - return 0; + return (nr == PF_VECTOR + ? (ex->nested_apf || + nested_vmx_is_page_fault_vmexit(vmcs12, ex->error_code)) + : vmcs12->exception_bitmap & (1u << nr)); } static void vmx_queue_exception(struct kvm_vcpu *vcpu) @@ -2515,8 +2514,10 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) u32 intr_info = nr | INTR_INFO_VALID_MASK; if (!reinject && is_guest_mode(vcpu) && - nested_vmx_check_exception(vcpu)) + nested_vmx_check_exception(vcpu, &vcpu->arch.exception)) { + nested_vmx_queue_exception(vcpu, &vcpu->arch.exception); return; + } if (has_error_code) { vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); @@ -11886,6 +11887,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) .patch_hypercall = vmx_patch_hypercall, .set_irq = vmx_inject_irq, .set_nmi = vmx_inject_nmi, + .nested_check_exception = nested_vmx_check_exception, .queue_exception = vmx_queue_exception, .cancel_injection = vmx_cancel_injection, .interrupt_allowed = vmx_interrupt_allowed, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 88b91114c5a8..55c709531eb9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -390,6 +390,15 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, if (!vcpu->arch.exception.pending) goto queue; + /* + * If the previous exception was recorded during an L2->L0 + * exit, we can overwrite it with one that causes an L0->L1 + * nested vmexit. + */ + if (vcpu->arch.exception.reinject && is_guest_mode(vcpu) && + kvm_x86_ops->nested_check_exception(vcpu, &ex)) + goto queue; + /* to check exception */ prev_nr = vcpu->arch.exception.nr; if (prev_nr == DF_VECTOR) { -- 1.8.3.1