Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751982AbbKOQAL (ORCPT ); Sun, 15 Nov 2015 11:00:11 -0500 Received: from mx1.redhat.com ([209.132.183.28]:40559 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751338AbbKOQAI (ORCPT ); Sun, 15 Nov 2015 11:00:08 -0500 Date: Sun, 15 Nov 2015 18:00:02 +0200 From: "Michael S. Tsirkin" To: linux-kernel@vger.kernel.org Cc: Paolo Bonzini , Wanpeng Li , =?us-ascii?B?PT9VVEYtOD9xP1JhZGltPTIwS3I9QzQ9OERtPUMzPUExPUM1PTk5Pz0=?= , Andy Lutomirski , Xiao Guangrong , Kai Huang , =?us-ascii?B?PT9VVEYtOD9xP01paGFpPTIwRG9uPUM4PTlCdT89?= , kvm@vger.kernel.org Subject: [PATCH] kvm/vmx: EPTP switching test Message-ID: <1447598396-6678-1-git-send-email-mst@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline X-Mutt-Fcc: =sent Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7884 Lines: 243 This patch adds a new parameter: eptp_switching_test, which enables testing EPT switching on VMX if supported by hardware. All EPT entries are initialized to the same value so this adds no useful functionality by itself, but can be used to test VMFUNC performance, and serve as a basis for future features based on EPTP switching. Support for nested virt is not enabled. This was tested using the following code within guest: #define VMX_VMFUNC ".byte 0x0f,0x01,0xd4" static void vmfunc(unsigned int nr, unsigned int ept) { asm volatile(VMX_VMFUNC : : "a"(nr), "c"(ept) : "memory"); } VMFUNC instruction cost was measured at ~122 cycles. (Note: recent versions of gnu toolchain support the vmfunc instruction - removing the need for writing the bytecode manually). Signed-off-by: Michael S. Tsirkin --- I think I'd like to put this upstream so future eptp switching work can be implemented on top. Comments? arch/x86/include/asm/vmx.h | 7 ++++ arch/x86/kvm/vmx.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 448b7ca..ceb68d9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -69,10 +69,13 @@ #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VM_FUNCTIONS 0x00002000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 +/* Definitions for VM-function controls */ +#define VM_FUNCTION_EPTP_SWITCHING 0x00000001 #define PIN_BASED_EXT_INTR_MASK 0x00000001 #define PIN_BASED_NMI_EXITING 0x00000008 @@ -153,6 +156,8 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, POSTED_INTR_DESC_ADDR = 0x00002016, POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + VM_FUNCTION_CTRL = 0x00002018, + VM_FUNCTION_CTRL_HIGH = 0x00002019, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, EOI_EXIT_BITMAP0 = 0x0000201c, @@ -163,6 +168,8 @@ enum vmcs_field { EOI_EXIT_BITMAP2_HIGH = 0x00002021, EOI_EXIT_BITMAP3 = 0x00002022, EOI_EXIT_BITMAP3_HIGH = 0x00002023, + EPTP_LIST_ADDRESS = 0x00002024, + EPTP_LIST_ADDRESS_HIGH = 0x00002025, VMREAD_BITMAP = 0x00002026, VMWRITE_BITMAP = 0x00002028, XSS_EXIT_BITMAP = 0x0000202C, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6a8bc64..3d1f613 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "trace.h" #include "pmu.h" @@ -105,6 +106,9 @@ static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; module_param_named(pml, enable_pml, bool, S_IRUGO); +static bool __read_mostly enable_eptp_switching = 0; +module_param_named(eptp_switching_test, enable_eptp_switching, bool, S_IRUGO); + #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ @@ -547,6 +551,10 @@ struct vcpu_vmx { /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; + + /* Support for EPTP switching */ +#define EPTP_LIST_NUM 512 + struct page *eptp_list_pg; }; enum segment_cache_field { @@ -1113,6 +1121,22 @@ static inline bool cpu_has_vmx_pml(void) return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML; } +static inline bool cpu_has_vmx_vm_functions(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_VM_FUNCTIONS; +} + +/* check if the cpu supports writing EPTP switching */ +static inline bool cpu_has_vmx_eptp_switching(void) +{ + u64 vmx_msr; + + rdmsrl(MSR_IA32_VMX_VMFUNC, vmx_msr); + /* This MSR has same format as VM-function controls */ + return vmx_msr & VM_FUNCTION_EPTP_SWITCHING; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -3011,6 +3035,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_ENABLE_INVPCID | + SECONDARY_EXEC_ENABLE_VM_FUNCTIONS | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_SHADOW_VMCS | @@ -3600,6 +3625,13 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) guest_cr3 = cr3; if (enable_ept) { eptp = construct_eptp(cr3); + if (to_vmx(vcpu)->eptp_list_pg) { + u64 *eptp_list = phys_to_virt(page_to_phys(to_vmx(vcpu)->eptp_list_pg)); + int i; + + for (i = 0; i < EPTP_LIST_NUM; ++i) + eptp_list[i] = eptp; + } vmcs_write64(EPT_POINTER, eptp); if (is_paging(vcpu) || is_guest_mode(vcpu)) guest_cr3 = kvm_read_cr3(vcpu); @@ -6089,6 +6121,13 @@ static __init int hardware_setup(void) if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml()) enable_pml = 0; + /* + * Only enable EPT switching when hardware supports EPT switching, and EPT + * and VM functions are enabled -- EPT switching depends on these to work. + */ + if (!enable_ept || !cpu_has_vmx_vm_functions() || !cpu_has_vmx_eptp_switching()) + enable_eptp_switching = 0; + if (!enable_pml) { kvm_x86_ops->slot_enable_log_dirty = NULL; kvm_x86_ops->slot_disable_log_dirty = NULL; @@ -7590,6 +7629,26 @@ static int vmx_enable_pml(struct vcpu_vmx *vmx) return 0; } +static int vmx_enable_ept_switching(struct vcpu_vmx *vmx) +{ + struct page *eptp_list_pg; + u64 vm_function_control; + + eptp_list_pg = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!eptp_list_pg) + return -ENOMEM; + + vmx->eptp_list_pg = eptp_list_pg; + + vmcs_write64(EPTP_LIST_ADDRESS, page_to_phys(vmx->eptp_list_pg)); + + vm_function_control = vmcs_read64(VM_FUNCTION_CTRL); + vm_function_control |= VM_FUNCTION_EPTP_SWITCHING; + vmcs_write64(VM_FUNCTION_CTRL, vm_function_control); + + return 0; +} + static void vmx_disable_pml(struct vcpu_vmx *vmx) { u32 exec_control; @@ -7603,6 +7662,21 @@ static void vmx_disable_pml(struct vcpu_vmx *vmx) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } +static void vmx_disable_ept_switching(struct vcpu_vmx *vmx) +{ + u64 vm_function_control; + + ASSERT(vmx->eptp_list_pg); + __free_page(vmx->eptp_list_pg); + vmx->eptp_list_pg = NULL; + + vmcs_write64(EPTP_LIST_ADDRESS, 0); + + vm_function_control = vmcs_read64(VM_FUNCTION_CTRL); + vm_function_control &= ~VM_FUNCTION_EPTP_SWITCHING; + vmcs_write64(VM_FUNCTION_CTRL, vm_function_control); +} + static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -8476,6 +8550,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (enable_eptp_switching) + vmx_disable_ept_switching(vmx); if (enable_pml) vmx_disable_pml(vmx); free_vpid(vmx); @@ -8564,8 +8640,16 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) goto free_vmcs; } + if (enable_eptp_switching) { + err = vmx_enable_ept_switching(vmx); + if (err) + goto disable_pml; + } + return &vmx->vcpu; +disable_pml: + vmx_disable_pml(vmx); free_vmcs: free_loaded_vmcs(vmx->loaded_vmcs); free_msrs: -- MST -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/