v7:
3/3:
Fix check for AD
Use kvm_vcpu_read_guest_page()
v6:
https://lkml.org/lkml/2017/8/1/1015
3/3:
Fix check for memory type in address
Change check function name as requested in the review
Move setting of mmu->ept_ad to after calling mmu_unload
and also reset base_role.ad_disabled appropriately
Replace IS_ALIGN with page_address_valid()
v5:
https://lkml.org/lkml/2017/7/28/621
1/3 and 2/3 are unchanged but some changes in 3/3. I left
the mmu_load failure path untouched because I am not sure what's
the right thing to do here.
3/3:
Move the eptp switching logic to a different function
Add check for EPTP_ADDRESS in check_vmentry_prereq
Add check for validity of ept pointer
Check if AD bit is set and set ept_ad
Add TODO item about mmu_unload failure
v4:
https://lkml.org/lkml/2017/7/10/705
2/3: Use WARN_ONCE to avoid logging dos
v3:
https://lkml.org/lkml/2017/7/10/684
3/3: Add missing nested_release_page_clean() and check the
eptp as mentioned in SDM 24.6.14
v2:
https://lkml.org/lkml/2017/7/6/813
1/3: Patch to enable vmfunc on the host but cause a #UD if
L1 tries to use it directly. (new)
2/3: Expose vmfunc to the nested hypervisor, but no vm functions
are exposed and L0 emulates a vmfunc vmexit to L1.
3/3: Force a vmfunc vmexit when L2 tries to use vmfunc and emulate
eptp switching. Unconditionally expose EPTP switching to the
L1 hypervisor since L0 fakes eptp switching via a mmu reload.
These patches expose eptp switching/vmfunc to the nested hypervisor.
vmfunc is enabled in the secondary controls for the host and is
exposed to the nested hypervisor. However, if the nested hypervisor
decides to use eptp switching, L0 emulates it.
v1:
https://lkml.org/lkml/2017/6/29/958
Bandan Das (3):
KVM: vmx: Enable VMFUNCs
KVM: nVMX: Enable VMFUNC for the L1 hypervisor
KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
arch/x86/include/asm/vmx.h | 9 +++
arch/x86/kvm/vmx.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 192 insertions(+), 2 deletions(-)
--
2.9.4
Expose VMFUNC in MSRs and VMCS fields. No actual VMFUNCs are enabled.
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/kvm/vmx.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 51 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b8969da..042ea88 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -243,6 +243,7 @@ struct __packed vmcs12 {
u64 virtual_apic_page_addr;
u64 apic_access_addr;
u64 posted_intr_desc_addr;
+ u64 vm_function_control;
u64 ept_pointer;
u64 eoi_exit_bitmap0;
u64 eoi_exit_bitmap1;
@@ -484,6 +485,7 @@ struct nested_vmx {
u64 nested_vmx_cr4_fixed0;
u64 nested_vmx_cr4_fixed1;
u64 nested_vmx_vmcs_enum;
+ u64 nested_vmx_vmfunc_controls;
};
#define POSTED_INTR_ON 0
@@ -766,6 +768,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
+ FIELD64(VM_FUNCTION_CONTROL, vm_function_control),
FIELD64(EPT_POINTER, ept_pointer),
FIELD64(EOI_EXIT_BITMAP0, eoi_exit_bitmap0),
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
@@ -1398,6 +1401,11 @@ static inline bool nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
}
+static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2807,6 +2815,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
+ if (cpu_has_vmx_vmfunc()) {
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+ vmx->nested.nested_vmx_vmfunc_controls = 0;
+ }
+
/*
* Old versions of KVM use the single-context version without
* checking for support, so declare that it is supported even
@@ -3176,6 +3190,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
*pdata = vmx->nested.nested_vmx_ept_caps |
((u64)vmx->nested.nested_vmx_vpid_caps << 32);
break;
+ case MSR_IA32_VMX_VMFUNC:
+ *pdata = vmx->nested.nested_vmx_vmfunc_controls;
+ break;
default:
return 1;
}
@@ -7805,7 +7822,29 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
static int handle_vmfunc(struct kvm_vcpu *vcpu)
{
- kvm_queue_exception(vcpu, UD_VECTOR);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vmcs12 *vmcs12;
+ u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
+
+ /*
+ * VMFUNC is only supported for nested guests, but we always enable the
+ * secondary control for simplicity; for non-nested mode, fake that we
+ * didn't by injecting #UD.
+ */
+ if (!is_guest_mode(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ vmcs12 = get_vmcs12(vcpu);
+ if ((vmcs12->vm_function_control & (1 << function)) == 0)
+ goto fail;
+ WARN_ONCE(1, "VMCS12 VM function control should have been zero");
+
+fail:
+ nested_vmx_vmexit(vcpu, vmx->exit_reason,
+ vmcs_read32(VM_EXIT_INTR_INFO),
+ vmcs_readl(EXIT_QUALIFICATION));
return 1;
}
@@ -10133,7 +10172,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_APIC_REGISTER_VIRT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_ENABLE_VMFUNC);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
@@ -10141,6 +10181,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control |= vmcs12_exec_ctrl;
}
+ /* All VMFUNCs are currently emulated through L0 vmexits. */
+ if (exec_control & SECONDARY_EXEC_ENABLE_VMFUNC)
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) {
vmcs_write64(EOI_EXIT_BITMAP0,
vmcs12->eoi_exit_bitmap0);
@@ -10393,6 +10437,11 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.nested_vmx_entry_ctls_high))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vmfunc(vmcs12) &&
+ (vmcs12->vm_function_control &
+ ~vmx->nested.nested_vmx_vmfunc_controls))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
--
2.9.4
When L2 uses vmfunc, L0 utilizes the associated vmexit to
emulate a switching of the ept pointer by reloading the
guest MMU.
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/include/asm/vmx.h | 6 +++
arch/x86/kvm/vmx.c | 124 ++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index da5375e..5f63a2e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -115,6 +115,10 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+/* VMFUNC functions */
+#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
+#define VMFUNC_EPTP_ENTRIES 512
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -200,6 +204,8 @@ enum vmcs_field {
EOI_EXIT_BITMAP2_HIGH = 0x00002021,
EOI_EXIT_BITMAP3 = 0x00002022,
EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ EPTP_LIST_ADDRESS = 0x00002024,
+ EPTP_LIST_ADDRESS_HIGH = 0x00002025,
VMREAD_BITMAP = 0x00002026,
VMWRITE_BITMAP = 0x00002028,
XSS_EXIT_BITMAP = 0x0000202C,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 042ea88..61f7fe5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -249,6 +249,7 @@ struct __packed vmcs12 {
u64 eoi_exit_bitmap1;
u64 eoi_exit_bitmap2;
u64 eoi_exit_bitmap3;
+ u64 eptp_list_address;
u64 xss_exit_bitmap;
u64 guest_physical_address;
u64 vmcs_link_pointer;
@@ -774,6 +775,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
+ FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
@@ -1406,6 +1408,13 @@ static inline bool nested_cpu_has_vmfunc(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
}
+static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
+{
+ return nested_cpu_has_vmfunc(vmcs12) &&
+ (vmcs12->vm_function_control &
+ VMX_VMFUNC_EPTP_SWITCHING);
+}
+
static inline bool is_nmi(u32 intr_info)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -2818,7 +2827,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (cpu_has_vmx_vmfunc()) {
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_VMFUNC;
- vmx->nested.nested_vmx_vmfunc_controls = 0;
+ /*
+ * Advertise EPTP switching unconditionally
+ * since we emulate it
+ */
+ vmx->nested.nested_vmx_vmfunc_controls =
+ VMX_VMFUNC_EPTP_SWITCHING;
}
/*
@@ -7820,6 +7834,88 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
return 1;
}
+static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ u64 mask = address & 0x7;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ /* Check for memory type validity */
+ switch (mask) {
+ case 0:
+ if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+ return false;
+ break;
+ case 6:
+ if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ /* Bits 5:3 must be 3 */
+ if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
+ return false;
+
+ /* Reserved bits should not be set */
+ if (address >> maxphyaddr || ((address >> 7) & 0x1f))
+ return false;
+
+ /* AD, if set, should be supported */
+ if ((address & VMX_EPT_AD_ENABLE_BIT)) {
+ if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
+ return false;
+ }
+
+ return true;
+}
+
+static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+ u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
+ u64 address;
+ bool accessed_dirty;
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+
+ if (!nested_cpu_has_eptp_switching(vmcs12) ||
+ !nested_cpu_has_ept(vmcs12))
+ return 1;
+
+ if (index >= VMFUNC_EPTP_ENTRIES)
+ return 1;
+
+
+ if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
+ &address, index * 8, 8))
+ return 1;
+
+ accessed_dirty = !!(address & VMX_EPT_AD_ENABLE_BIT);
+
+ /*
+ * If the (L2) guest does a vmfunc to the currently
+ * active ept pointer, we don't have to do anything else
+ */
+ if (vmcs12->ept_pointer != address) {
+ if (!valid_ept_address(vcpu, address))
+ return 1;
+
+ kvm_mmu_unload(vcpu);
+ mmu->ept_ad = accessed_dirty;
+ mmu->base_role.ad_disabled = !accessed_dirty;
+ vmcs12->ept_pointer = address;
+ /*
+ * TODO: Check what's the correct approach in case
+ * mmu reload fails. Currently, we just let the next
+ * reload potentially fail
+ */
+ kvm_mmu_reload(vcpu);
+ }
+
+ return 0;
+}
+
static int handle_vmfunc(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7839,7 +7935,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
vmcs12 = get_vmcs12(vcpu);
if ((vmcs12->vm_function_control & (1 << function)) == 0)
goto fail;
- WARN_ONCE(1, "VMCS12 VM function control should have been zero");
+
+ switch (function) {
+ case 0:
+ if (nested_vmx_eptp_switching(vcpu, vmcs12))
+ goto fail;
+ break;
+ default:
+ goto fail;
+ }
+ return kvm_skip_emulated_instruction(vcpu);
fail:
nested_vmx_vmexit(vcpu, vmx->exit_reason,
@@ -10437,10 +10542,17 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx->nested.nested_vmx_entry_ctls_high))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
- if (nested_cpu_has_vmfunc(vmcs12) &&
- (vmcs12->vm_function_control &
- ~vmx->nested.nested_vmx_vmfunc_controls))
- return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vmfunc(vmcs12)) {
+ if (vmcs12->vm_function_control &
+ ~vmx->nested.nested_vmx_vmfunc_controls)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ if (nested_cpu_has_eptp_switching(vmcs12)) {
+ if (!nested_cpu_has_ept(vmcs12) ||
+ !page_address_valid(vcpu, vmcs12->eptp_list_address))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ }
+ }
if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
--
2.9.4
Enable VMFUNC in the secondary execution controls. This simplifies the
changes necessary to expose it to nested hypervisors. VMFUNCs still
cause #UD when invoked.
Signed-off-by: Paolo Bonzini <[email protected]>
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/include/asm/vmx.h | 3 +++
arch/x86/kvm/vmx.c | 22 +++++++++++++++++++++-
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 35cd06f..da5375e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -72,6 +72,7 @@
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
#define SECONDARY_EXEC_RDRAND 0x00000800
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
#define SECONDARY_EXEC_RDSEED 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
@@ -187,6 +188,8 @@ enum vmcs_field {
APIC_ACCESS_ADDR_HIGH = 0x00002015,
POSTED_INTR_DESC_ADDR = 0x00002016,
POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ VM_FUNCTION_CONTROL = 0x00002018,
+ VM_FUNCTION_CONTROL_HIGH = 0x00002019,
EPT_POINTER = 0x0000201a,
EPT_POINTER_HIGH = 0x0000201b,
EOI_EXIT_BITMAP0 = 0x0000201c,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 39a6222..b8969da 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1318,6 +1318,12 @@ static inline bool cpu_has_vmx_tsc_scaling(void)
SECONDARY_EXEC_TSC_SCALING;
}
+static inline bool cpu_has_vmx_vmfunc(void)
+{
+ return vmcs_config.cpu_based_2nd_exec_ctrl &
+ SECONDARY_EXEC_ENABLE_VMFUNC;
+}
+
static inline bool report_flexpriority(void)
{
return flexpriority_enabled;
@@ -3607,7 +3613,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_SHADOW_VMCS |
SECONDARY_EXEC_XSAVES |
SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_TSC_SCALING;
+ SECONDARY_EXEC_TSC_SCALING |
+ SECONDARY_EXEC_ENABLE_VMFUNC;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@@ -5303,6 +5310,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(HOST_GS_BASE, 0); /* 22.2.4 */
#endif
+ if (cpu_has_vmx_vmfunc())
+ vmcs_write64(VM_FUNCTION_CONTROL, 0);
+
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
@@ -7793,6 +7803,12 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
return 1;
}
+static int handle_vmfunc(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -7843,6 +7859,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_XSAVES] = handle_xsaves,
[EXIT_REASON_XRSTORS] = handle_xrstors,
[EXIT_REASON_PML_FULL] = handle_pml_full,
+ [EXIT_REASON_VMFUNC] = handle_vmfunc,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
};
@@ -8164,6 +8181,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_PML_FULL:
/* We emulate PML support to L1. */
return false;
+ case EXIT_REASON_VMFUNC:
+ /* VM functions are emulated through L2->L0 vmexits. */
+ return false;
default:
return true;
}
--
2.9.4
Looks very good to me now. :)
> /*
> @@ -7820,6 +7834,88 @@ static int handle_preemption_timer(struct kvm_vcpu *vcpu)
> return 1;
> }
>
> +static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + u64 mask = address & 0x7;
> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> + /* Check for memory type validity */
> + switch (mask) {
> + case 0:
> + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
> + return false;
> + break;
> + case 6:
> + if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
> + return false;
> + break;
KVM always sets 6 without checking. But there is also a TODO left in
construct_eptp(). But we could hit this case probably only in very
strange environments (e.g. revoking it for our L1, so it cannot use WB
memory for L2). So we should be safe by requiring WB for now.
>From 0ac06e7242d25ba1b66e4e6e2b20dbfa21ba4308 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <[email protected]>
Date: Fri, 4 Aug 2017 16:40:32 +0200
Subject: [PATCH v1] KVM: VMX: require EPT WB (Write Back) memory type
support
Signed-off-by: David Hildenbrand <[email protected]>
---
arch/x86/kvm/vmx.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 78c66a7..a2f8475 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1192,6 +1192,11 @@ static inline bool cpu_has_vmx_ept_4levels(void)
return vmx_capability.ept & VMX_EPT_PAGE_WALK_4_BIT;
}
+static inline bool cpu_has_vmx_ept_wb_bit(void)
+{
+ return vmx_capability.ept & VMX_EPTP_WB_BIT;
+}
+
static inline bool cpu_has_vmx_ept_ad_bits(void)
{
return vmx_capability.ept & VMX_EPT_AD_BIT;
@@ -4260,7 +4265,6 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu,
unsigned long root_hpa)
{
u64 eptp;
- /* TODO write the value reading from MSR */
eptp = VMX_EPT_DEFAULT_MT |
VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
if (enable_ept_ad_bits &&
@@ -6579,7 +6583,8 @@ static __init int hardware_setup(void)
init_vmcs_shadow_fields();
if (!cpu_has_vmx_ept() ||
- !cpu_has_vmx_ept_4levels()) {
+ !cpu_has_vmx_ept_4levels() ||
+ !cpu_has_vmx_ept_wb_bit()) {
enable_ept = 0;
enable_unrestricted_guest = 0;
enable_ept_ad_bits = 0;
--
2.9.4
--
Thanks,
David
On 03.08.2017 21:54, Bandan Das wrote:
> v7:
> 3/3:
> Fix check for AD
> Use kvm_vcpu_read_guest_page()
>
> v6:
> https://lkml.org/lkml/2017/8/1/1015
> 3/3:
> Fix check for memory type in address
> Change check function name as requested in the review
> Move setting of mmu->ept_ad to after calling mmu_unload
> and also reset base_role.ad_disabled appropriately
> Replace IS_ALIGN with page_address_valid()
>
> v5:
> https://lkml.org/lkml/2017/7/28/621
> 1/3 and 2/3 are unchanged but some changes in 3/3. I left
> the mmu_load failure path untouched because I am not sure what's
> the right thing to do here.
> 3/3:
> Move the eptp switching logic to a different function
> Add check for EPTP_ADDRESS in check_vmentry_prereq
> Add check for validity of ept pointer
> Check if AD bit is set and set ept_ad
> Add TODO item about mmu_unload failure
>
> v4:
> https://lkml.org/lkml/2017/7/10/705
> 2/3: Use WARN_ONCE to avoid logging dos
>
> v3:
> https://lkml.org/lkml/2017/7/10/684
> 3/3: Add missing nested_release_page_clean() and check the
> eptp as mentioned in SDM 24.6.14
>
> v2:
> https://lkml.org/lkml/2017/7/6/813
> 1/3: Patch to enable vmfunc on the host but cause a #UD if
> L1 tries to use it directly. (new)
> 2/3: Expose vmfunc to the nested hypervisor, but no vm functions
> are exposed and L0 emulates a vmfunc vmexit to L1.
> 3/3: Force a vmfunc vmexit when L2 tries to use vmfunc and emulate
> eptp switching. Unconditionally expose EPTP switching to the
> L1 hypervisor since L0 fakes eptp switching via a mmu reload.
>
> These patches expose eptp switching/vmfunc to the nested hypervisor.
> vmfunc is enabled in the secondary controls for the host and is
> exposed to the nested hypervisor. However, if the nested hypervisor
> decides to use eptp switching, L0 emulates it.
>
> v1:
> https://lkml.org/lkml/2017/6/29/958
>
> Bandan Das (3):
> KVM: vmx: Enable VMFUNCs
> KVM: nVMX: Enable VMFUNC for the L1 hypervisor
> KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
>
> arch/x86/include/asm/vmx.h | 9 +++
> arch/x86/kvm/vmx.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 192 insertions(+), 2 deletions(-)
>
Acked-by: David Hildenbrand <[email protected]>
(not 100% confident for a r-b, not because of your patches but because
of the involved complexity (flushes, MMU ...))
--
Thanks,
David
David Hildenbrand <[email protected]> writes:
...
>> v1:
>> https://lkml.org/lkml/2017/6/29/958
>>
>> Bandan Das (3):
>> KVM: vmx: Enable VMFUNCs
>> KVM: nVMX: Enable VMFUNC for the L1 hypervisor
>> KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
>>
>> arch/x86/include/asm/vmx.h | 9 +++
>> arch/x86/kvm/vmx.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-
>> 2 files changed, 192 insertions(+), 2 deletions(-)
>>
>
> Acked-by: David Hildenbrand <[email protected]>
>
> (not 100% confident for a r-b, not because of your patches but because
> of the involved complexity (flushes, MMU ...))
You and Radim both constitute to major revisions and changes in these patches.
I would be 100% confident of a R-b tag by you.
Bandan
Queued, thanks.
2017-08-03 15:54-0400, Bandan Das:
> v7:
> 3/3:
> Fix check for AD
> Use kvm_vcpu_read_guest_page()
>
> v6:
> https://lkml.org/lkml/2017/8/1/1015
> 3/3:
> Fix check for memory type in address
> Change check function name as requested in the review
> Move setting of mmu->ept_ad to after calling mmu_unload
> and also reset base_role.ad_disabled appropriately
> Replace IS_ALIGN with page_address_valid()
>
> v5:
> https://lkml.org/lkml/2017/7/28/621
> 1/3 and 2/3 are unchanged but some changes in 3/3. I left
> the mmu_load failure path untouched because I am not sure what's
> the right thing to do here.
> 3/3:
> Move the eptp switching logic to a different function
> Add check for EPTP_ADDRESS in check_vmentry_prereq
> Add check for validity of ept pointer
> Check if AD bit is set and set ept_ad
> Add TODO item about mmu_unload failure
>
> v4:
> https://lkml.org/lkml/2017/7/10/705
> 2/3: Use WARN_ONCE to avoid logging dos
>
> v3:
> https://lkml.org/lkml/2017/7/10/684
> 3/3: Add missing nested_release_page_clean() and check the
> eptp as mentioned in SDM 24.6.14
>
> v2:
> https://lkml.org/lkml/2017/7/6/813
> 1/3: Patch to enable vmfunc on the host but cause a #UD if
> L1 tries to use it directly. (new)
> 2/3: Expose vmfunc to the nested hypervisor, but no vm functions
> are exposed and L0 emulates a vmfunc vmexit to L1.
> 3/3: Force a vmfunc vmexit when L2 tries to use vmfunc and emulate
> eptp switching. Unconditionally expose EPTP switching to the
> L1 hypervisor since L0 fakes eptp switching via a mmu reload.
>
> These patches expose eptp switching/vmfunc to the nested hypervisor.
> vmfunc is enabled in the secondary controls for the host and is
> exposed to the nested hypervisor. However, if the nested hypervisor
> decides to use eptp switching, L0 emulates it.
>
> v1:
> https://lkml.org/lkml/2017/6/29/958
>
> Bandan Das (3):
> KVM: vmx: Enable VMFUNCs
> KVM: nVMX: Enable VMFUNC for the L1 hypervisor
> KVM: nVMX: Emulate EPTP switching for the L1 hypervisor
>
> arch/x86/include/asm/vmx.h | 9 +++
> arch/x86/kvm/vmx.c | 185 ++++++++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 192 insertions(+), 2 deletions(-)
>
> --
> 2.9.4
>