Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751804AbdGaTcH (ORCPT ); Mon, 31 Jul 2017 15:32:07 -0400 Received: from mx1.redhat.com ([209.132.183.28]:52510 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751651AbdGaTcF (ORCPT ); Mon, 31 Jul 2017 15:32:05 -0400 DMARC-Filter: OpenDMARC Filter v1.3.2 mx1.redhat.com 5412E13712 Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; dmarc=none (p=none dis=none) header.from=redhat.com Authentication-Results: ext-mx03.extmail.prod.ext.phx2.redhat.com; spf=fail smtp.mailfrom=bsd@redhat.com From: Bandan Das To: David Hildenbrand Cc: kvm@vger.kernel.org, pbonzini@redhat.com, rkrcmar@redhat.com, jmattson@google.com, linux-kernel@vger.kernel.org Subject: Re: [PATCH v5 3/3] KVM: nVMX: Emulate EPTP switching for the L1 hypervisor References: <20170728195245.1018-1-bsd@redhat.com> <20170728195245.1018-4-bsd@redhat.com> Date: Mon, 31 Jul 2017 15:32:02 -0400 In-Reply-To: (David Hildenbrand's message of "Mon, 31 Jul 2017 13:59:11 +0200") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/25.2 (gnu/linux) MIME-Version: 1.0 Content-Type: text/plain X-Greylist: Sender IP whitelisted, not delayed by milter-greylist-4.5.16 (mx1.redhat.com [10.5.110.27]); Mon, 31 Jul 2017 19:32:05 +0000 (UTC) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6119 Lines: 195 Hi David, David Hildenbrand writes: >> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12) >> +{ >> + return nested_cpu_has_vmfunc(vmcs12) && >> + (vmcs12->vm_function_control & >> + VMX_VMFUNC_EPTP_SWITCHING); >> +} >> + >> static inline bool is_nmi(u32 intr_info) >> { >> return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) >> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) >> if (cpu_has_vmx_vmfunc()) { >> vmx->nested.nested_vmx_secondary_ctls_high |= >> SECONDARY_EXEC_ENABLE_VMFUNC; >> - vmx->nested.nested_vmx_vmfunc_controls = 0; >> + /* >> + * Advertise EPTP switching unconditionally >> + * since we emulate it >> + */ >> + vmx->nested.nested_vmx_vmfunc_controls = >> + VMX_VMFUNC_EPTP_SWITCHING; > > Should this only be advertised, if enable_ept is set (if the guest also > sees/can use SECONDARY_EXEC_ENABLE_EPT)? This represents the function control MSR, which on the hardware is a RO value. The checks for enable_ept and such are somewhere else. >> } >> >> /* >> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu) >> return 1; >> } >> >> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address) > > check_..._valid -> valid_ept_address() ? I think either of the names is fine and I would prefer not to respin unless you feel really strongly about it :) > >> +{ >> + struct vcpu_vmx *vmx = to_vmx(vcpu); >> + u64 mask = VMX_EPT_RWX_MASK; >> + int maxphyaddr = cpuid_maxphyaddr(vcpu); >> + struct kvm_mmu *mmu = vcpu->arch.walk_mmu; >> + >> + /* Check for execute_only validity */ >> + if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) { >> + if (!(vmx->nested.nested_vmx_ept_caps & >> + VMX_EPT_EXECUTE_ONLY_BIT)) >> + return false; >> + } >> + >> + /* Bits 5:3 must be 3 */ >> + if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW) >> + return false; >> + >> + /* Reserved bits should not be set */ >> + if (address >> maxphyaddr || ((address >> 7) & 0x1f)) >> + return false; >> + >> + /* AD, if set, should be supported */ >> + if ((address & VMX_EPT_AD_ENABLE_BIT)) { >> + if (!enable_ept_ad_bits) >> + return false; >> + mmu->ept_ad = true; >> + } else >> + mmu->ept_ad = false; > > I wouldn't expect a "check" function to modify the mmu. Can you move > modifying the mmu outside of this function (leaving the > enable_ept_ad_bits check in place)? (and maybe even set mmu->ept_ad > _after_ the kvm_mmu_unload(vcpu)?, just when setting vmcs12->ept_pointer?) > Well, the correct thing to do is have a wrapper around it in mmu.c without directly calling here and also call this function before nested_mmu is initialized. I am working on a separate patch for this btw. It seems to me setting mmu->ept_ad after kvm_mmu_unload is unnecessary since it's already being set only if everything else succeeds. kvm_mmu_unload() isn't affected by the setting of this flag if I understand correctly. >> + >> + return true; >> +} >> + >> +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, >> + struct vmcs12 *vmcs12) >> +{ >> + u32 index = vcpu->arch.regs[VCPU_REGS_RCX]; >> + u64 *l1_eptp_list, address; >> + struct page *page; >> + >> + if (!nested_cpu_has_eptp_switching(vmcs12) || >> + !nested_cpu_has_ept(vmcs12)) >> + return 1; >> + >> + if (index >= VMFUNC_EPTP_ENTRIES) >> + return 1; >> + >> + page = nested_get_page(vcpu, vmcs12->eptp_list_address); >> + if (!page) >> + return 1; >> + >> + l1_eptp_list = kmap(page); >> + address = l1_eptp_list[index]; >> + >> + /* >> + * If the (L2) guest does a vmfunc to the currently >> + * active ept pointer, we don't have to do anything else >> + */ >> + if (vmcs12->ept_pointer != address) { >> + if (!check_ept_address_valid(vcpu, address)) { >> + kunmap(page); >> + nested_release_page_clean(page); >> + return 1; >> + } >> + kvm_mmu_unload(vcpu); >> + vmcs12->ept_pointer = address; >> + /* >> + * TODO: Check what's the correct approach in case >> + * mmu reload fails. Currently, we just let the next >> + * reload potentially fail >> + */ >> + kvm_mmu_reload(vcpu); > > So, what actually happens if this generates a tripple fault? I guess we > will kill the (nested) hypervisor? Yes. Not sure what's the right thing to do is though... Bandan >> + } >> + >> + kunmap(page); >> + nested_release_page_clean(page); >> + return 0; >> +} >> + >> static int handle_vmfunc(struct kvm_vcpu *vcpu) >> { >> struct vcpu_vmx *vmx = to_vmx(vcpu); >> @@ -7786,7 +7879,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) >> vmcs12 = get_vmcs12(vcpu); >> if ((vmcs12->vm_function_control & (1 << function)) == 0) >> goto fail; >> - WARN_ONCE(1, "VMCS12 VM function control should have been zero"); >> + >> + switch (function) { >> + case 0: >> + if (nested_vmx_eptp_switching(vcpu, vmcs12)) >> + goto fail; >> + break; >> + default: >> + goto fail; >> + } >> + return kvm_skip_emulated_instruction(vcpu); >> >> fail: >> nested_vmx_vmexit(vcpu, vmx->exit_reason, >> @@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) >> vmx->nested.nested_vmx_entry_ctls_high)) >> return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> >> - if (nested_cpu_has_vmfunc(vmcs12) && >> - (vmcs12->vm_function_control & >> - ~vmx->nested.nested_vmx_vmfunc_controls)) >> - return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> + if (nested_cpu_has_vmfunc(vmcs12)) { >> + if (vmcs12->vm_function_control & >> + ~vmx->nested.nested_vmx_vmfunc_controls) >> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> + >> + if (nested_cpu_has_eptp_switching(vmcs12)) { >> + if (!nested_cpu_has_ept(vmcs12) || >> + (vmcs12->eptp_list_address >> >> + cpuid_maxphyaddr(vcpu)) || >> + !IS_ALIGNED(vmcs12->eptp_list_address, 4096)) >> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >> + } >> + } >> + >> >> if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) >> return VMXERR_ENTRY_INVALID_CONTROL_FIELD; >>