Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756628AbcLSUzJ (ORCPT ); Mon, 19 Dec 2016 15:55:09 -0500 Received: from mail-qt0-f180.google.com ([209.85.216.180]:36672 "EHLO mail-qt0-f180.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753803AbcLSUzH (ORCPT ); Mon, 19 Dec 2016 15:55:07 -0500 MIME-Version: 1.0 In-Reply-To: <20161129204041.8839-4-khuey@kylehuey.com> References: <20161129204041.8839-1-khuey@kylehuey.com> <20161129204041.8839-4-khuey@kylehuey.com> From: David Matlack Date: Mon, 19 Dec 2016 12:54:35 -0800 Message-ID: Subject: Re: [PATCH v2 3/5] KVM: VMX: Move skip_emulated_instruction out of nested_vmx_check_vmcs12 To: Kyle Huey Cc: Paolo Bonzini , =?UTF-8?B?UmFkaW0gS3LEjW3DocWZ?= , Thomas Gleixner , Ingo Molnar , "H. Peter Anvin" , X86 ML , Joerg Roedel , kvm list , "linux-kernel@vger.kernel.org" Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12887 Lines: 295 On Tue, Nov 29, 2016 at 12:40 PM, Kyle Huey wrote: > We can't return both the pass/fail boolean for the vmcs and the upcoming > continue/exit-to-userspace boolean for skip_emulated_instruction out of > nested_vmx_check_vmcs, so move skip_emulated_instruction out of it instead. > > Additionally, VMENTER/VMRESUME only trigger singlestep exceptions when > they advance the IP to the following instruction, not when they a) succeed, > b) fail MSR validation or c) throw an exception. Add a separate call to > skip_emulated_instruction that will later not be converted to the variant > that checks the singlestep flag. > > Signed-off-by: Kyle Huey > --- > arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 33 insertions(+), 20 deletions(-) > > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c > index f2f9cf5..f4f6304 100644 > --- a/arch/x86/kvm/vmx.c > +++ b/arch/x86/kvm/vmx.c > @@ -7319,33 +7319,36 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) > * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was > * used before) all generate the same failure when it is missing. > */ > static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > if (vmx->nested.current_vmptr == -1ull) { > nested_vmx_failInvalid(vcpu); > - skip_emulated_instruction(vcpu); > return 0; > } > return 1; > } > > static int handle_vmread(struct kvm_vcpu *vcpu) > { > unsigned long field; > u64 field_value; > unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); > u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); > gva_t gva = 0; > > - if (!nested_vmx_check_permission(vcpu) || > - !nested_vmx_check_vmcs12(vcpu)) > + if (!nested_vmx_check_permission(vcpu)) > + return 1; > + > + if (!nested_vmx_check_vmcs12(vcpu)) { > + skip_emulated_instruction(vcpu); > return 1; > + } > > /* Decode instruction info and find the field to read */ > field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); > /* Read the field, zero-extended to a u64 field_value */ > if (vmcs12_read_any(vcpu, field, &field_value) < 0) { > nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); > skip_emulated_instruction(vcpu); > return 1; > @@ -7383,20 +7386,24 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) > * mode, and eventually we need to write that into a field of several > * possible lengths. The code below first zero-extends the value to 64 > * bit (field_value), and then copies only the appropriate number of > * bits into the vmcs12 field. > */ > u64 field_value = 0; > struct x86_exception e; > > - if (!nested_vmx_check_permission(vcpu) || > - !nested_vmx_check_vmcs12(vcpu)) > + if (!nested_vmx_check_permission(vcpu)) > return 1; > > + if (!nested_vmx_check_vmcs12(vcpu)) { > + skip_emulated_instruction(vcpu); > + return 1; > + } > + > if (vmx_instruction_info & (1u << 10)) > field_value = kvm_register_readl(vcpu, > (((vmx_instruction_info) >> 3) & 0xf)); > else { > if (get_vmx_mem_address(vcpu, exit_qualification, > vmx_instruction_info, false, &gva)) > return 1; > if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, > @@ -10041,21 +10048,22 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > { > struct vmcs12 *vmcs12; > struct vcpu_vmx *vmx = to_vmx(vcpu); > int cpu; > struct loaded_vmcs *vmcs02; > bool ia32e; > u32 msr_entry_idx; > > - if (!nested_vmx_check_permission(vcpu) || > - !nested_vmx_check_vmcs12(vcpu)) > + if (!nested_vmx_check_permission(vcpu)) > return 1; > > - skip_emulated_instruction(vcpu); > + if (!nested_vmx_check_vmcs12(vcpu)) > + goto out; > + > vmcs12 = get_vmcs12(vcpu); > > if (enable_shadow_vmcs) > copy_shadow_to_vmcs12(vmx); > > /* > * The nested entry process starts with enforcing various prerequisites > * on vmcs12 as required by the Intel SDM, and act appropriately when > @@ -10065,43 +10073,43 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > * To speed up the normal (success) code path, we should avoid checking > * for misconfigurations which will anyway be caught by the processor > * when using the merged vmcs02. > */ > if (vmcs12->launch_state == launch) { > nested_vmx_failValid(vcpu, > launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS > : VMXERR_VMRESUME_NONLAUNCHED_VMCS); > - return 1; > + goto out; > } > > if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && > vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > - return 1; > + goto out; > } > > if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > - return 1; > + goto out; > } > > if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > - return 1; > + goto out; > } > > if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > - return 1; > + goto out; > } > > if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > - return 1; > + goto out; > } > > if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, > vmx->nested.nested_vmx_true_procbased_ctls_low, > vmx->nested.nested_vmx_procbased_ctls_high) || > !vmx_control_verify(vmcs12->secondary_vm_exec_control, > vmx->nested.nested_vmx_secondary_ctls_low, > vmx->nested.nested_vmx_secondary_ctls_high) || > @@ -10111,36 +10119,36 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > !vmx_control_verify(vmcs12->vm_exit_controls, > vmx->nested.nested_vmx_true_exit_ctls_low, > vmx->nested.nested_vmx_exit_ctls_high) || > !vmx_control_verify(vmcs12->vm_entry_controls, > vmx->nested.nested_vmx_true_entry_ctls_low, > vmx->nested.nested_vmx_entry_ctls_high)) > { > nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); > - return 1; > + goto out; > } > > if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) || > ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { > nested_vmx_failValid(vcpu, > VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); > - return 1; > + goto out; > } > > if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) || > ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) { > nested_vmx_entry_failure(vcpu, vmcs12, > EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); > - return 1; > + goto out; It's a bug to call kvm_skip_emulated_instruction() after nested_vmx_entry_failure. I sent out "KVM: nVMX: fix instruction skipping during emulated vm-entry" with a fix. > } > if (vmcs12->vmcs_link_pointer != -1ull) { > nested_vmx_entry_failure(vcpu, vmcs12, > EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR); > - return 1; > + goto out; > } > > /* > * If the load IA32_EFER VM-entry control is 1, the following checks > * are performed on the field for the IA32_EFER MSR: > * - Bits reserved in the IA32_EFER MSR must be 0. > * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of > * the IA-32e mode guest VM-exit control. It must also be identical > @@ -10150,17 +10158,17 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { > ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; > if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || > ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || > ((vmcs12->guest_cr0 & X86_CR0_PG) && > ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { > nested_vmx_entry_failure(vcpu, vmcs12, > EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); > - return 1; > + goto out; > } > } > > /* > * If the load IA32_EFER VM-exit control is 1, bits reserved in the > * IA32_EFER MSR must be 0 in the field for that register. In addition, > * the values of the LMA and LME bits in the field must each be that of > * the host address-space size VM-exit control. > @@ -10168,29 +10176,30 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { > ia32e = (vmcs12->vm_exit_controls & > VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; > if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || > ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || > ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { > nested_vmx_entry_failure(vcpu, vmcs12, > EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); > - return 1; > + goto out; > } > } > > /* > * We're finally done with prerequisite checking, and can start with > * the nested entry. > */ > > vmcs02 = nested_get_current_vmcs02(vmx); > if (!vmcs02) > return -ENOMEM; > > + skip_emulated_instruction(vcpu); I wonder if we can drop this call. Advancing RIP past the VMLAUNCH/VMRESUME instruction does nothing because we are going to resume L1 at vmcs12->host_rip. But skip_emulated_instruction() has a second side-effect which is to clear the interrupt shadow, and I'm not sure if that is important here. > enter_guest_mode(vcpu); > > if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) > vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); > > cpu = get_cpu(); > vmx->loaded_vmcs = vmcs02; > vmx_vcpu_put(vcpu); > @@ -10222,16 +10231,20 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) > > /* > * Note no nested_vmx_succeed or nested_vmx_fail here. At this point > * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet > * returned as far as L1 is concerned. It will only return (and set > * the success flag) when L2 exits (see nested_vmx_vmexit()). > */ > return 1; > + > +out: > + skip_emulated_instruction(vcpu); > + return 1; > } > > /* > * On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date > * because L2 may have changed some cr0 bits directly (CRO_GUEST_HOST_MASK). > * This function returns the new value we should put in vmcs12.guest_cr0. > * It's not enough to just return the vmcs02 GUEST_CR0. Rather, > * 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now > -- > 2.10.2 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html