2018-06-14 08:27:21

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH 0/5] KVM: nVMX: Enlightened VMCS for Hyper-V on KVM

This is an initial implementation of Enlightened VMCS for nested Hyper-V on
KVM. Using it helps to spare 1500 cpu cycles for nested vmexit (tight cpuid
loop in WS2016 with Hyper-V role on KVM: 15200 cycles -> 13700 cycles).

Changes since RFCv2:
- Rename sync_shadow_vmcs to need_vmcs12_sync and reuse for eVMCS case
[Paolo Bonzini]

Ladi Prosek (1):
KVM: hyperv: define VP assist page helpers

Vitaly Kuznetsov (4):
KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability
KVM: nVMX: add enlightened VMCS state
KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR
KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

arch/x86/include/asm/kvm_host.h | 3 +
arch/x86/kvm/hyperv.c | 23 +-
arch/x86/kvm/hyperv.h | 4 +
arch/x86/kvm/lapic.c | 4 +-
arch/x86/kvm/lapic.h | 2 +-
arch/x86/kvm/svm.c | 9 +
arch/x86/kvm/vmx.c | 810 +++++++++++++++++++++++++++++++++-------
arch/x86/kvm/x86.c | 17 +-
include/uapi/linux/kvm.h | 1 +
9 files changed, 724 insertions(+), 149 deletions(-)

--
2.14.4



2018-06-14 08:26:18

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH 1/5] KVM: hyperv: define VP assist page helpers

From: Ladi Prosek <[email protected]>

The state related to the VP assist page is still managed by the LAPIC
code in the pv_eoi field.

Signed-off-by: Ladi Prosek <[email protected]>
Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/kvm/hyperv.c | 23 +++++++++++++++++++++--
arch/x86/kvm/hyperv.h | 4 ++++
arch/x86/kvm/lapic.c | 4 ++--
arch/x86/kvm/lapic.h | 2 +-
arch/x86/kvm/x86.c | 2 +-
5 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 14e0d0ae4e0a..fdf659ca6167 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -688,6 +688,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
stimer_cleanup(&hv_vcpu->stimer[i]);
}

+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
+ return false;
+ return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
+}
+EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
+
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+ struct hv_vp_assist_page *assist_page)
+{
+ if (!kvm_hv_assist_page_enabled(vcpu))
+ return false;
+ return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
+ assist_page, sizeof(*assist_page));
+}
+EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
+
static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
{
struct hv_message *msg = &stimer->msg;
@@ -1048,7 +1066,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)

if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv->hv_vapic = data;
- if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+ if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
@@ -1061,7 +1079,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
hv->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
if (kvm_lapic_enable_pv_eoi(vcpu,
- gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+ gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
+ sizeof(struct hv_vp_assist_page)))
return 1;
break;
}
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 837465d69c6d..db825bb7efc7 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);

+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+ struct hv_vp_assist_page *assist_page);
+
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
int timer_index)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 776391cf69a5..b6d6a36f1a33 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2540,7 +2540,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}

-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
if (!IS_ALIGNED(addr, 4))
@@ -2550,7 +2550,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
if (!pv_eoi_enabled(vcpu))
return 0;
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
- addr, sizeof(u8));
+ addr, len);
}

void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ed0ed39abd36..ff6ef9c3d760 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -120,7 +120,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
}

-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_init(void);
void kvm_lapic_exit(void);

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 06dd4cdb2ca8..a57766b940a5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2442,7 +2442,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)

break;
case MSR_KVM_PV_EOI_EN:
- if (kvm_lapic_enable_pv_eoi(vcpu, data))
+ if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;

--
2.14.4


2018-06-14 08:26:27

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH 3/5] KVM: nVMX: add enlightened VMCS state

Adds hv_evmcs pointer and implement copy_enlightened_to_vmcs12() and
copy_enlightened_to_vmcs12().

prepare_vmcs02()/prepare_vmcs02_full() separation is not valid for
Enlightened VMCS, do full sync for now.

Suggested-by: Ladi Prosek <[email protected]>
Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/kvm/vmx.c | 431 +++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 417 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 51749207cef1..e7fa9f9c6e36 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -640,10 +640,10 @@ struct nested_vmx {
*/
struct vmcs12 *cached_vmcs12;
/*
- * Indicates if the shadow vmcs must be updated with the
- * data hold by vmcs12
+ * Indicates if the shadow vmcs or enlightened vmcs must be updated
+ * with the data held by struct vmcs12.
*/
- bool sync_shadow_vmcs;
+ bool need_vmcs12_sync;
bool dirty_vmcs12;

bool change_vmcs01_virtual_apic_mode;
@@ -689,6 +689,8 @@ struct nested_vmx {
/* in guest mode on SMM entry? */
bool guest_mode;
} smm;
+
+ struct hv_enlightened_vmcs *hv_evmcs;
};

#define POSTED_INTR_ON 0
@@ -8010,7 +8012,7 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
/* copy to memory all shadowed fields in case
they were modified */
copy_shadow_to_vmcs12(vmx);
- vmx->nested.sync_shadow_vmcs = false;
+ vmx->nested.need_vmcs12_sync = false;
vmx_disable_shadow_vmcs(vmx);
}
vmx->nested.posted_intr_nv = -1;
@@ -8187,6 +8189,393 @@ static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,

}

+static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, bool full)
+{
+ struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
+ struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+
+ /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
+ vmcs12->tpr_threshold = evmcs->tpr_threshold;
+ vmcs12->guest_rip = evmcs->guest_rip;
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
+ vmcs12->guest_rsp = evmcs->guest_rsp;
+ vmcs12->guest_rflags = evmcs->guest_rflags;
+ vmcs12->guest_interruptibility_info =
+ evmcs->guest_interruptibility_info;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
+ vmcs12->cpu_based_vm_exec_control =
+ evmcs->cpu_based_vm_exec_control;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
+ vmcs12->exception_bitmap = evmcs->exception_bitmap;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
+ vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
+ vmcs12->vm_entry_intr_info_field =
+ evmcs->vm_entry_intr_info_field;
+ vmcs12->vm_entry_exception_error_code =
+ evmcs->vm_entry_exception_error_code;
+ vmcs12->vm_entry_instruction_len =
+ evmcs->vm_entry_instruction_len;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
+ vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
+ vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
+ vmcs12->host_cr0 = evmcs->host_cr0;
+ vmcs12->host_cr3 = evmcs->host_cr3;
+ vmcs12->host_cr4 = evmcs->host_cr4;
+ vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
+ vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
+ vmcs12->host_rip = evmcs->host_rip;
+ vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
+ vmcs12->host_es_selector = evmcs->host_es_selector;
+ vmcs12->host_cs_selector = evmcs->host_cs_selector;
+ vmcs12->host_ss_selector = evmcs->host_ss_selector;
+ vmcs12->host_ds_selector = evmcs->host_ds_selector;
+ vmcs12->host_fs_selector = evmcs->host_fs_selector;
+ vmcs12->host_gs_selector = evmcs->host_gs_selector;
+ vmcs12->host_tr_selector = evmcs->host_tr_selector;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
+ vmcs12->pin_based_vm_exec_control =
+ evmcs->pin_based_vm_exec_control;
+ vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
+ vmcs12->secondary_vm_exec_control =
+ evmcs->secondary_vm_exec_control;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
+ vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
+ vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
+ vmcs12->msr_bitmap = evmcs->msr_bitmap;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
+ vmcs12->guest_es_base = evmcs->guest_es_base;
+ vmcs12->guest_cs_base = evmcs->guest_cs_base;
+ vmcs12->guest_ss_base = evmcs->guest_ss_base;
+ vmcs12->guest_ds_base = evmcs->guest_ds_base;
+ vmcs12->guest_fs_base = evmcs->guest_fs_base;
+ vmcs12->guest_gs_base = evmcs->guest_gs_base;
+ vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
+ vmcs12->guest_tr_base = evmcs->guest_tr_base;
+ vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
+ vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
+ vmcs12->guest_es_limit = evmcs->guest_es_limit;
+ vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
+ vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
+ vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
+ vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
+ vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
+ vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
+ vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
+ vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
+ vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
+ vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
+ vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
+ vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
+ vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
+ vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
+ vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
+ vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
+ vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
+ vmcs12->guest_es_selector = evmcs->guest_es_selector;
+ vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
+ vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
+ vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
+ vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
+ vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
+ vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
+ vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
+ vmcs12->tsc_offset = evmcs->tsc_offset;
+ vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
+ vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
+ vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
+ vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
+ vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
+ vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
+ vmcs12->guest_cr0 = evmcs->guest_cr0;
+ vmcs12->guest_cr3 = evmcs->guest_cr3;
+ vmcs12->guest_cr4 = evmcs->guest_cr4;
+ vmcs12->guest_dr7 = evmcs->guest_dr7;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
+ vmcs12->host_fs_base = evmcs->host_fs_base;
+ vmcs12->host_gs_base = evmcs->host_gs_base;
+ vmcs12->host_tr_base = evmcs->host_tr_base;
+ vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
+ vmcs12->host_idtr_base = evmcs->host_idtr_base;
+ vmcs12->host_rsp = evmcs->host_rsp;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
+ vmcs12->ept_pointer = evmcs->ept_pointer;
+ vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
+ }
+
+ if (unlikely(full || !(evmcs->hv_clean_fields &
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
+ vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
+ vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
+ vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
+ vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
+ vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
+ vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
+ vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
+ vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
+ vmcs12->guest_pending_dbg_exceptions =
+ evmcs->guest_pending_dbg_exceptions;
+ vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
+ vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
+ vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
+ vmcs12->guest_activity_state = evmcs->guest_activity_state;
+ vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
+ }
+
+ /*
+ * Not used?
+ * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
+ * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
+ * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
+ * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
+ * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
+ * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
+ * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
+ * vmcs12->page_fault_error_code_mask =
+ * evmcs->page_fault_error_code_mask;
+ * vmcs12->page_fault_error_code_match =
+ * evmcs->page_fault_error_code_match;
+ * vmcs12->cr3_target_count = evmcs->cr3_target_count;
+ * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
+ * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
+ * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
+ */
+
+ /*
+ * Read only fields:
+ * vmcs12->guest_physical_address = evmcs->guest_physical_address;
+ * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
+ * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
+ * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
+ * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
+ * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
+ * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
+ * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
+ * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
+ * vmcs12->exit_qualification = evmcs->exit_qualification;
+ * vmcs12->guest_linear_address = evmcs->guest_linear_address;
+ *
+ * Not present in struct vmcs12:
+ * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
+ * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
+ * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
+ * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
+ */
+
+ return 0;
+}
+
+static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
+{
+ struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
+ struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+
+ /*
+ * Should not be changed by KVM:
+ *
+ * evmcs->host_es_selector = vmcs12->host_es_selector;
+ * evmcs->host_cs_selector = vmcs12->host_cs_selector;
+ * evmcs->host_ss_selector = vmcs12->host_ss_selector;
+ * evmcs->host_ds_selector = vmcs12->host_ds_selector;
+ * evmcs->host_fs_selector = vmcs12->host_fs_selector;
+ * evmcs->host_gs_selector = vmcs12->host_gs_selector;
+ * evmcs->host_tr_selector = vmcs12->host_tr_selector;
+ * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
+ * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
+ * evmcs->host_cr0 = vmcs12->host_cr0;
+ * evmcs->host_cr3 = vmcs12->host_cr3;
+ * evmcs->host_cr4 = vmcs12->host_cr4;
+ * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
+ * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
+ * evmcs->host_rip = vmcs12->host_rip;
+ * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
+ * evmcs->host_fs_base = vmcs12->host_fs_base;
+ * evmcs->host_gs_base = vmcs12->host_gs_base;
+ * evmcs->host_tr_base = vmcs12->host_tr_base;
+ * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
+ * evmcs->host_idtr_base = vmcs12->host_idtr_base;
+ * evmcs->host_rsp = vmcs12->host_rsp;
+ * sync_vmcs12() doesn't read these:
+ * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
+ * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
+ * evmcs->msr_bitmap = vmcs12->msr_bitmap;
+ * evmcs->ept_pointer = vmcs12->ept_pointer;
+ * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
+ * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
+ * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
+ * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
+ * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
+ * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
+ * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
+ * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
+ * evmcs->tpr_threshold = vmcs12->tpr_threshold;
+ * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
+ * evmcs->exception_bitmap = vmcs12->exception_bitmap;
+ * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
+ * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
+ * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
+ * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
+ * evmcs->page_fault_error_code_mask =
+ * vmcs12->page_fault_error_code_mask;
+ * evmcs->page_fault_error_code_match =
+ * vmcs12->page_fault_error_code_match;
+ * evmcs->cr3_target_count = vmcs12->cr3_target_count;
+ * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
+ * evmcs->tsc_offset = vmcs12->tsc_offset;
+ * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
+ * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
+ * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
+ * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
+ * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
+ * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
+ * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
+ * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
+ *
+ * Not present in struct vmcs12:
+ * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
+ * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
+ * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
+ * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
+ */
+
+ evmcs->guest_es_selector = vmcs12->guest_es_selector;
+ evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
+ evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
+ evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
+ evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
+ evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
+ evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
+ evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
+
+ evmcs->guest_es_limit = vmcs12->guest_es_limit;
+ evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
+ evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
+ evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
+ evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
+ evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
+ evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
+ evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
+ evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
+ evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
+
+ evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
+ evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
+ evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
+ evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
+ evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
+ evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
+ evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
+ evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
+
+ evmcs->guest_es_base = vmcs12->guest_es_base;
+ evmcs->guest_cs_base = vmcs12->guest_cs_base;
+ evmcs->guest_ss_base = vmcs12->guest_ss_base;
+ evmcs->guest_ds_base = vmcs12->guest_ds_base;
+ evmcs->guest_fs_base = vmcs12->guest_fs_base;
+ evmcs->guest_gs_base = vmcs12->guest_gs_base;
+ evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
+ evmcs->guest_tr_base = vmcs12->guest_tr_base;
+ evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
+ evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
+
+ evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
+ evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
+
+ evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
+ evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
+ evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
+ evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
+
+ evmcs->guest_pending_dbg_exceptions =
+ vmcs12->guest_pending_dbg_exceptions;
+ evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
+ evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
+
+ evmcs->guest_activity_state = vmcs12->guest_activity_state;
+ evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
+
+ evmcs->guest_cr0 = vmcs12->guest_cr0;
+ evmcs->guest_cr3 = vmcs12->guest_cr3;
+ evmcs->guest_cr4 = vmcs12->guest_cr4;
+ evmcs->guest_dr7 = vmcs12->guest_dr7;
+
+ evmcs->guest_physical_address = vmcs12->guest_physical_address;
+
+ evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
+ evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
+ evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
+ evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
+ evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
+ evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
+ evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
+ evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
+
+ evmcs->exit_qualification = vmcs12->exit_qualification;
+
+ evmcs->guest_linear_address = vmcs12->guest_linear_address;
+ evmcs->guest_rsp = vmcs12->guest_rsp;
+ evmcs->guest_rflags = vmcs12->guest_rflags;
+
+ evmcs->guest_interruptibility_info =
+ vmcs12->guest_interruptibility_info;
+ evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
+ evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
+ evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
+ evmcs->vm_entry_exception_error_code =
+ vmcs12->vm_entry_exception_error_code;
+ evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
+
+ evmcs->guest_rip = vmcs12->guest_rip;
+
+ evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
+
+ return 0;
+}
+
/*
* Copy the writable VMCS shadow fields back to the VMCS12, in case
* they have been modified by the L1 guest. Note that the "read-only"
@@ -8398,7 +8787,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER,
__pa(vmx->vmcs01.shadow_vmcs));
- vmx->nested.sync_shadow_vmcs = true;
+ vmx->nested.need_vmcs12_sync = true;
}
vmx->nested.dirty_vmcs12 = true;
}
@@ -9960,9 +10349,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}

- if (vmx->nested.sync_shadow_vmcs) {
- copy_vmcs12_to_shadow(vmx);
- vmx->nested.sync_shadow_vmcs = false;
+ if (vmx->nested.need_vmcs12_sync) {
+ if (unlikely(vmx->nested.hv_evmcs)) {
+ copy_vmcs12_to_enlightened(vmx);
+ /* All fields are clean */
+ vmx->nested.hv_evmcs->hv_clean_fields |=
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ } else {
+ copy_vmcs12_to_shadow(vmx);
+ }
+ vmx->nested.need_vmcs12_sync = false;
}

if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
@@ -11281,7 +11677,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control, vmcs12_exec_ctrl;

- if (vmx->nested.dirty_vmcs12) {
+ if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
prepare_vmcs02_full(vcpu, vmcs12);
vmx->nested.dirty_vmcs12 = false;
}
@@ -11757,8 +12153,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)

vmcs12 = get_vmcs12(vcpu);

- if (enable_shadow_vmcs)
+ if (vmx->nested.hv_evmcs) {
+ copy_enlightened_to_vmcs12(vmx, vmx->nested.dirty_vmcs12);
+ /* Enlightened VMCS doesn't have launch state */
+ vmcs12->launch_state = !launch;
+ } else if (enable_shadow_vmcs) {
copy_shadow_to_vmcs12(vmx);
+ }

/*
* The nested entry process starts with enforcing various prerequisites
@@ -12383,8 +12784,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);

- if (enable_shadow_vmcs && exit_reason != -1)
- vmx->nested.sync_shadow_vmcs = true;
+ if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs))
+ vmx->nested.need_vmcs12_sync = true;

/* in case we halted in L2 */
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -12463,12 +12864,14 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12,
u32 reason, unsigned long qualification)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
load_vmcs12_host_state(vcpu, vmcs12);
vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
vmcs12->exit_qualification = qualification;
nested_vmx_succeed(vcpu);
- if (enable_shadow_vmcs)
- to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
+ if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
+ vmx->nested.need_vmcs12_sync = true;
}

static int vmx_check_intercept(struct kvm_vcpu *vcpu,
--
2.14.4


2018-06-14 08:26:36

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

When Enlightened VMCS is in use by L1 hypervisor we can avoid vmwriting
VMCS fields which did not change.

Our first goal is to achieve minimal impact on traditional VMCS case so
we're not wrapping each vmwrite() with an if-changed checker. We also can't
utilize static keys as Enlightened VMCS usage is per-guest.

This patch implements the simpliest solution: checking fields in groups.
We skip single vmwrite() statements as doing the check will cost us
something even in non-evmcs case and the win is tiny. Unfortunately, this
makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent (and
a bit ugly).

Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/kvm/vmx.c | 143 ++++++++++++++++++++++++++++++-----------------------
1 file changed, 82 insertions(+), 61 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6802ba91468c..9a7d76c5c92b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
return 0;
}

+/*
+ * Check if L1 hypervisor changed the particular field in Enlightened
+ * VMCS and avoid redundant vmwrite if it didn't. Can only be used when
+ * the value we're about to write is unchanged vmcs12->field.
+ */
+#define evmcs_needs_write(vmx, clean_field) ((vmx)->nested.dirty_vmcs12 ||\
+ !(vmx->nested.hv_evmcs->hv_clean_fields &\
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))
+
static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+
+ if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
+ vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
+ vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
+ vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
+ vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
+ vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
+ vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
+ vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
+ vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
+ vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
+ vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
+ vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
+ vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
+ vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
+ vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
+ vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
+ vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+ vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
+ vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+ vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
+ vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
+ vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
+ vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
+ vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
+ vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
+ vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
+ vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
+ vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
+ vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
+ vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
+ vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
+ vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+ }
+
+ if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
+ vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+ vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+ vmcs12->guest_pending_dbg_exceptions);
+ vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+ vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+ if (vmx_mpx_supported())
+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);

- vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
- vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
- vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
- vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
- vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
- vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
- vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
- vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
- vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
- vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
- vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
- vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
- vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
- vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
- vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
- vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
- vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
- vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
- vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
- vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
- vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
- vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
- vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
- vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
- vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
- vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
- vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
- vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
- vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
- vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
- vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
-
- vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
- vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
- vmcs12->guest_pending_dbg_exceptions);
- vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
- vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+ /*
+ * L1 may access the L2's PDPTR, so save them to construct
+ * vmcs12
+ */
+ if (enable_ept) {
+ vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
+ vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
+ vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
+ vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+ }
+ }

if (nested_cpu_has_xsaves(vmcs12))
vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
+
vmcs_write64(VMCS_LINK_POINTER, -1ull);

if (cpu_has_vmx_posted_intr())
@@ -11717,9 +11746,6 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)

set_cr4_guest_host_mask(vmx);

- if (vmx_mpx_supported())
- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
-
if (enable_vpid) {
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
@@ -11727,16 +11753,6 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
}

- /*
- * L1 may access the L2's PDPTR, so save them to construct vmcs12
- */
- if (enable_ept) {
- vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
- vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
- vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
- vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
- }
-
if (cpu_has_vmx_msr_bitmap())
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
}
@@ -11757,6 +11773,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control, vmcs12_exec_ctrl;
+ struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;

if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
prepare_vmcs02_full(vcpu, vmcs12);
@@ -11768,11 +11785,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
* with vmx_shadow_fields.h.
*/

- vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
- vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
- vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
- vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
- vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+ if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
+ vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
+ vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
+ vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
+ vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
+ }

/*
* Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR, HOST_GS_SELECTOR,
@@ -11788,12 +11807,14 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
}
if (vmx->nested.nested_run_pending) {
- vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
- vmcs12->vm_entry_intr_info_field);
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
- vmcs12->vm_entry_exception_error_code);
- vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
- vmcs12->vm_entry_instruction_len);
+ if (!hv_evmcs || evmcs_needs_write(vmx, CONTROL_EVENT)) {
+ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+ vmcs12->vm_entry_intr_info_field);
+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ vmcs12->vm_entry_exception_error_code);
+ vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+ vmcs12->vm_entry_instruction_len);
+ }
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
vmcs12->guest_interruptibility_info);
vmx->loaded_vmcs->nmi_known_unmasked =
--
2.14.4


2018-06-14 08:27:27

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR

Per Hyper-V TLFS 5.0b:

"The L1 hypervisor may choose to use enlightened VMCSs by writing 1 to
the corresponding field in the VP assist page (see section 7.8.7).
Another field in the VP assist page controls the currently active
enlightened VMCS. Each enlightened VMCS is exactly one page (4 KB) in
size and must be initially zeroed. No VMPTRLD instruction must be
executed to make an enlightened VMCS active or current.

After the L1 hypervisor performs a VM entry with an enlightened VMCS,
the VMCS is considered active on the processor. An enlightened VMCS
can only be active on a single processor at the same time. The L1
hypervisor can execute a VMCLEAR instruction to transition an
enlightened VMCS from the active to the non-active state. Any VMREAD
or VMWRITE instructions while an enlightened VMCS is active is
unsupported and can result in unexpected behavior."

Keep Enlightened VMCS structure for the current L2 guest permanently mapped
from struct nested_vmx instead of mapping it every time.

Suggested-by: Ladi Prosek <[email protected]>
Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/kvm/vmx.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 91 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e7fa9f9c6e36..6802ba91468c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -20,6 +20,7 @@
#include "mmu.h"
#include "cpuid.h"
#include "lapic.h"
+#include "hyperv.h"

#include <linux/kvm_host.h>
#include <linux/module.h>
@@ -690,6 +691,8 @@ struct nested_vmx {
bool guest_mode;
} smm;

+ gpa_t hv_evmcs_vmptr;
+ struct page *hv_evmcs_page;
struct hv_enlightened_vmcs *hv_evmcs;
};

@@ -7695,7 +7698,9 @@ static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
u32 vm_instruction_error)
{
- if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) {
/*
* failValid writes the error number to the current VMCS, which
* can't be done there isn't a current VMCS.
@@ -8003,6 +8008,18 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
vmcs_write64(VMCS_LINK_POINTER, -1ull);
}

+static inline void nested_release_evmcs(struct vcpu_vmx *vmx)
+{
+ if (!vmx->nested.hv_evmcs)
+ return;
+
+ kunmap(vmx->nested.hv_evmcs_page);
+ kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
+ vmx->nested.hv_evmcs_vmptr = -1ull;
+ vmx->nested.hv_evmcs_page = NULL;
+ vmx->nested.hv_evmcs = NULL;
+}
+
static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
{
if (vmx->nested.current_vmptr == -1ull)
@@ -8062,6 +8079,8 @@ static void free_nested(struct vcpu_vmx *vmx)
vmx->nested.pi_desc = NULL;
}

+ nested_release_evmcs(vmx);
+
free_loaded_vmcs(&vmx->nested.vmcs02);
}

@@ -8098,12 +8117,18 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}

- if (vmptr == vmx->nested.current_vmptr)
- nested_release_vmcs12(vmx);
+ if (vmx->nested.hv_evmcs_page) {
+ if (vmptr == vmx->nested.hv_evmcs_vmptr)
+ nested_release_evmcs(vmx);
+ } else {
+ if (vmptr == vmx->nested.current_vmptr)
+ nested_release_vmcs12(vmx);

- kvm_vcpu_write_guest(vcpu,
- vmptr + offsetof(struct vmcs12, launch_state),
- &zero, sizeof(zero));
+ kvm_vcpu_write_guest(vcpu,
+ vmptr + offsetof(struct vmcs12,
+ launch_state),
+ &zero, sizeof(zero));
+ }

nested_vmx_succeed(vcpu);
return kvm_skip_emulated_instruction(vcpu);
@@ -8814,6 +8839,10 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}

+ /* Forbid normal VMPTRLD if Enlightened version was used */
+ if (vmx->nested.hv_evmcs)
+ return 1;
+
if (vmx->nested.current_vmptr != vmptr) {
struct vmcs12 *new_vmcs12;
struct page *page;
@@ -8847,6 +8876,55 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}

+/*
+ * This is an equivalent of the nested hypervisor executing the vmptrld
+ * instruction.
+ */
+static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct hv_vp_assist_page assist_page;
+
+ if (likely(!vmx->nested.enlightened_vmcs_enabled))
+ return 1;
+
+ if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
+ return 1;
+
+ if (unlikely(!assist_page.enlighten_vmentry))
+ return 1;
+
+ if (unlikely(assist_page.current_nested_vmcs !=
+ vmx->nested.hv_evmcs_vmptr)) {
+
+ if (!vmx->nested.hv_evmcs)
+ vmx->nested.current_vmptr = -1ull;
+
+ nested_release_evmcs(vmx);
+
+ vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
+ vcpu, assist_page.current_nested_vmcs);
+
+ if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
+ return 0;
+
+ vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
+ vmx->nested.dirty_vmcs12 = true;
+ vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
+
+ /*
+ * Unlike normal vmcs12, enlightened vmcs12 is not fully
+ * reloaded from guest's memory (read only fields, fields not
+ * present in struct hv_enlightened_vmcs, ...). Make sure there
+ * are no leftovers.
+ */
+ memset(vmx->nested.cached_vmcs12, 0,
+ sizeof(*vmx->nested.cached_vmcs12));
+
+ }
+ return 1;
+}
+
/* Emulate the VMPTRST instruction */
static int handle_vmptrst(struct kvm_vcpu *vcpu)
{
@@ -8858,6 +8936,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;

+ if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
+ return 1;
+
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, true, &vmcs_gva))
return 1;
@@ -12148,7 +12229,10 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (!nested_vmx_check_permission(vcpu))
return 1;

- if (!nested_vmx_check_vmcs12(vcpu))
+ if (!nested_vmx_handle_enlightened_vmptrld(vcpu))
+ return 1;
+
+ if (!vmx->nested.hv_evmcs && !nested_vmx_check_vmcs12(vcpu))
goto out;

vmcs12 = get_vmcs12(vcpu);
--
2.14.4


2018-06-14 08:28:54

by Vitaly Kuznetsov

[permalink] [raw]
Subject: [PATCH 2/5] KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability

Enlightened VMCS is opt-in. The current version does not contain all
fields supported by nested VMX so we must not advertise the
corresponding VMX features if enlightened VMCS is enabled.

Userspace is given the enlightened VMCS version supported by KVM as
part of enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS. The version is to
be advertised to the nested hypervisor, currently done via a cpuid
leaf for Hyper-V.

Suggested-by: Ladi Prosek <[email protected]>
Signed-off-by: Vitaly Kuznetsov <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 3 +
arch/x86/kvm/svm.c | 9 +++
arch/x86/kvm/vmx.c | 138 ++++++++++++++++++++++------------------
arch/x86/kvm/x86.c | 15 +++++
include/uapi/linux/kvm.h | 1 +
5 files changed, 105 insertions(+), 61 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0ebe659f2802..d7e8f7155d79 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1095,6 +1095,9 @@ struct kvm_x86_ops {
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);

int (*get_msr_feature)(struct kvm_msr_entry *entry);
+
+ int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version);
};

struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d9305f1723f5..6dc42c870565 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -7009,6 +7009,13 @@ static int svm_unregister_enc_region(struct kvm *kvm,
return ret;
}

+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version)
+{
+ /* Intel-only feature */
+ return -ENODEV;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -7135,6 +7142,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.mem_enc_op = svm_mem_enc_op,
.mem_enc_reg_region = svm_register_enc_region,
.mem_enc_unreg_region = svm_unregister_enc_region,
+
+ .nested_enable_evmcs = nested_enable_evmcs,
};

static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 48989f78be60..51749207cef1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -648,6 +648,13 @@ struct nested_vmx {

bool change_vmcs01_virtual_apic_mode;

+ /*
+ * Enlightened VMCS has been enabled. It does not mean that L1 has to
+ * use it. However, VMX features available to L1 will be limited based
+ * on what the enlightened VMCS supports.
+ */
+ bool enlightened_vmcs_enabled;
+
/* L2 must run next, and mustn't decide to exit to L1. */
bool nested_run_pending;

@@ -1186,6 +1193,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);

#define KVM_EVMCS_VERSION 1

+/*
+ * Enlightened VMCSv1 doesn't support these:
+ *
+ * POSTED_INTR_NV = 0x00000002,
+ * GUEST_INTR_STATUS = 0x00000810,
+ * APIC_ACCESS_ADDR = 0x00002014,
+ * POSTED_INTR_DESC_ADDR = 0x00002016,
+ * EOI_EXIT_BITMAP0 = 0x0000201c,
+ * EOI_EXIT_BITMAP1 = 0x0000201e,
+ * EOI_EXIT_BITMAP2 = 0x00002020,
+ * EOI_EXIT_BITMAP3 = 0x00002022,
+ * GUEST_PML_INDEX = 0x00000812,
+ * PML_ADDRESS = 0x0000200e,
+ * VM_FUNCTION_CONTROL = 0x00002018,
+ * EPTP_LIST_ADDRESS = 0x00002024,
+ * VMREAD_BITMAP = 0x00002026,
+ * VMWRITE_BITMAP = 0x00002028,
+ *
+ * TSC_MULTIPLIER = 0x00002032,
+ * PLE_GAP = 0x00004020,
+ * PLE_WINDOW = 0x00004022,
+ * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ *
+ * Currently unsupported in KVM:
+ * GUEST_IA32_RTIT_CTL = 0x00002814,
+ */
+#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
+ PIN_BASED_VMX_PREEMPTION_TIMER)
+#define EVMCS1_UNSUPPORTED_2NDEXEC \
+ (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
+ SECONDARY_EXEC_APIC_REGISTER_VIRT | \
+ SECONDARY_EXEC_ENABLE_PML | \
+ SECONDARY_EXEC_ENABLE_VMFUNC | \
+ SECONDARY_EXEC_SHADOW_VMCS | \
+ SECONDARY_EXEC_TSC_SCALING | \
+ SECONDARY_EXEC_PAUSE_LOOP_EXITING)
+#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
+
#if IS_ENABLED(CONFIG_HYPERV)
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
@@ -1278,69 +1328,12 @@ static void evmcs_load(u64 phys_addr)

static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
{
- /*
- * Enlightened VMCSv1 doesn't support these:
- *
- * POSTED_INTR_NV = 0x00000002,
- * GUEST_INTR_STATUS = 0x00000810,
- * APIC_ACCESS_ADDR = 0x00002014,
- * POSTED_INTR_DESC_ADDR = 0x00002016,
- * EOI_EXIT_BITMAP0 = 0x0000201c,
- * EOI_EXIT_BITMAP1 = 0x0000201e,
- * EOI_EXIT_BITMAP2 = 0x00002020,
- * EOI_EXIT_BITMAP3 = 0x00002022,
- */
- vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
- vmcs_conf->cpu_based_2nd_exec_ctrl &=
- ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
- vmcs_conf->cpu_based_2nd_exec_ctrl &=
- ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
- vmcs_conf->cpu_based_2nd_exec_ctrl &=
- ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
-
- /*
- * GUEST_PML_INDEX = 0x00000812,
- * PML_ADDRESS = 0x0000200e,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
+ vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;

- /* VM_FUNCTION_CONTROL = 0x00002018, */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC;
+ vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+ vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;

- /*
- * EPTP_LIST_ADDRESS = 0x00002024,
- * VMREAD_BITMAP = 0x00002026,
- * VMWRITE_BITMAP = 0x00002028,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
-
- /*
- * TSC_MULTIPLIER = 0x00002032,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
-
- /*
- * PLE_GAP = 0x00004020,
- * PLE_WINDOW = 0x00004022,
- */
- vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-
- /*
- * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
- */
- vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
-
- /*
- * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
- * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
- */
- vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
- vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
-
- /*
- * Currently unsupported in KVM:
- * GUEST_IA32_RTIT_CTL = 0x00002814,
- */
}
#else /* !IS_ENABLED(CONFIG_HYPERV) */
static inline void evmcs_write64(unsigned long field, u64 value) {}
@@ -1354,6 +1347,27 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */

+static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
+ uint16_t *vmcs_version)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ /* We don't support disabling the feature for simplicity. */
+ if (vmx->nested.enlightened_vmcs_enabled)
+ return 0;
+
+ vmx->nested.enlightened_vmcs_enabled = true;
+ *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
+
+ vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
+ vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
+ vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
+ vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
+ vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
+
+ return 0;
+}
+
static inline bool is_exception_n(u32 intr_info, u8 vector)
{
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -13039,6 +13053,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.pre_enter_smm = vmx_pre_enter_smm,
.pre_leave_smm = vmx_pre_leave_smm,
.enable_smi_window = enable_smi_window,
+
+ .nested_enable_evmcs = nested_enable_evmcs,
};

static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a57766b940a5..51488019dec2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2873,6 +2873,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_VP_INDEX:
case KVM_CAP_HYPERV_EVENTFD:
case KVM_CAP_HYPERV_TLBFLUSH:
+ case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3650,6 +3651,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
+ int r;
+ uint16_t vmcs_version;
+ void __user *user_ptr;
+
if (cap->flags)
return -EINVAL;

@@ -3662,6 +3667,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL;
return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
+ case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+ r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
+ if (!r) {
+ user_ptr = (void __user *)(uintptr_t)cap->args[0];
+ if (copy_to_user(user_ptr, &vmcs_version,
+ sizeof(vmcs_version)))
+ r = -EFAULT;
+ }
+ return r;
+
default:
return -EINVAL;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index b6270a3b38e9..5c4b79c1af19 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_GET_MSR_FEATURES 153
#define KVM_CAP_HYPERV_EVENTFD 154
#define KVM_CAP_HYPERV_TLBFLUSH 155
+#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 156

#ifdef KVM_CAP_IRQ_ROUTING

--
2.14.4


2018-06-14 22:59:40

by Liran Alon

[permalink] [raw]
Subject: Re: [PATCH 2/5] KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability


----- [email protected] wrote:

> Enlightened VMCS is opt-in. The current version does not contain all
> fields supported by nested VMX so we must not advertise the
> corresponding VMX features if enlightened VMCS is enabled.
>
> Userspace is given the enlightened VMCS version supported by KVM as
> part of enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS. The version is to
> be advertised to the nested hypervisor, currently done via a cpuid
> leaf for Hyper-V.
>
> Suggested-by: Ladi Prosek <[email protected]>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---
> arch/x86/include/asm/kvm_host.h | 3 +
> arch/x86/kvm/svm.c | 9 +++
> arch/x86/kvm/vmx.c | 138
> ++++++++++++++++++++++------------------
> arch/x86/kvm/x86.c | 15 +++++
> include/uapi/linux/kvm.h | 1 +
> 5 files changed, 105 insertions(+), 61 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 0ebe659f2802..d7e8f7155d79 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1095,6 +1095,9 @@ struct kvm_x86_ops {
> int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region
> *argp);
>
> int (*get_msr_feature)(struct kvm_msr_entry *entry);
> +
> + int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
> + uint16_t *vmcs_version);
> };
>
> struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d9305f1723f5..6dc42c870565 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7009,6 +7009,13 @@ static int svm_unregister_enc_region(struct kvm
> *kvm,
> return ret;
> }
>
> +static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
> + uint16_t *vmcs_version)
> +{
> + /* Intel-only feature */
> + return -ENODEV;
> +}
> +
> static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
> .cpu_has_kvm_support = has_svm,
> .disabled_by_bios = is_disabled,
> @@ -7135,6 +7142,8 @@ static struct kvm_x86_ops svm_x86_ops
> __ro_after_init = {
> .mem_enc_op = svm_mem_enc_op,
> .mem_enc_reg_region = svm_register_enc_region,
> .mem_enc_unreg_region = svm_unregister_enc_region,
> +
> + .nested_enable_evmcs = nested_enable_evmcs,
> };
>
> static int __init svm_init(void)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 48989f78be60..51749207cef1 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -648,6 +648,13 @@ struct nested_vmx {
>
> bool change_vmcs01_virtual_apic_mode;
>
> + /*
> + * Enlightened VMCS has been enabled. It does not mean that L1 has
> to
> + * use it. However, VMX features available to L1 will be limited
> based
> + * on what the enlightened VMCS supports.
> + */
> + bool enlightened_vmcs_enabled;
> +
> /* L2 must run next, and mustn't decide to exit to L1. */
> bool nested_run_pending;
>
> @@ -1186,6 +1193,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
>
> #define KVM_EVMCS_VERSION 1
>
> +/*
> + * Enlightened VMCSv1 doesn't support these:
> + *
> + * POSTED_INTR_NV = 0x00000002,
> + * GUEST_INTR_STATUS = 0x00000810,
> + * APIC_ACCESS_ADDR = 0x00002014,
> + * POSTED_INTR_DESC_ADDR = 0x00002016,
> + * EOI_EXIT_BITMAP0 = 0x0000201c,
> + * EOI_EXIT_BITMAP1 = 0x0000201e,
> + * EOI_EXIT_BITMAP2 = 0x00002020,
> + * EOI_EXIT_BITMAP3 = 0x00002022,
> + * GUEST_PML_INDEX = 0x00000812,
> + * PML_ADDRESS = 0x0000200e,
> + * VM_FUNCTION_CONTROL = 0x00002018,
> + * EPTP_LIST_ADDRESS = 0x00002024,
> + * VMREAD_BITMAP = 0x00002026,
> + * VMWRITE_BITMAP = 0x00002028,
> + *
> + * TSC_MULTIPLIER = 0x00002032,
> + * PLE_GAP = 0x00004020,
> + * PLE_WINDOW = 0x00004022,
> + * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
> + * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
> + * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
> + *
> + * Currently unsupported in KVM:
> + * GUEST_IA32_RTIT_CTL = 0x00002814,
> + */
> +#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
> + PIN_BASED_VMX_PREEMPTION_TIMER)
> +#define EVMCS1_UNSUPPORTED_2NDEXEC \
> + (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
> + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
> + SECONDARY_EXEC_APIC_REGISTER_VIRT | \
> + SECONDARY_EXEC_ENABLE_PML | \
> + SECONDARY_EXEC_ENABLE_VMFUNC | \
> + SECONDARY_EXEC_SHADOW_VMCS | \
> + SECONDARY_EXEC_TSC_SCALING | \
> + SECONDARY_EXEC_PAUSE_LOOP_EXITING)
> +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL
> (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
> +#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL
> (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
> +#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> static bool __read_mostly enlightened_vmcs = true;
> module_param(enlightened_vmcs, bool, 0444);
> @@ -1278,69 +1328,12 @@ static void evmcs_load(u64 phys_addr)
>
> static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
> {
> - /*
> - * Enlightened VMCSv1 doesn't support these:
> - *
> - * POSTED_INTR_NV = 0x00000002,
> - * GUEST_INTR_STATUS = 0x00000810,
> - * APIC_ACCESS_ADDR = 0x00002014,
> - * POSTED_INTR_DESC_ADDR = 0x00002016,
> - * EOI_EXIT_BITMAP0 = 0x0000201c,
> - * EOI_EXIT_BITMAP1 = 0x0000201e,
> - * EOI_EXIT_BITMAP2 = 0x00002020,
> - * EOI_EXIT_BITMAP3 = 0x00002022,
> - */
> - vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
> - ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
> - ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
> - ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
> -
> - /*
> - * GUEST_PML_INDEX = 0x00000812,
> - * PML_ADDRESS = 0x0000200e,
> - */
> - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
> + vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
> + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
>
> - /* VM_FUNCTION_CONTROL = 0x00002018, */
> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
> ~SECONDARY_EXEC_ENABLE_VMFUNC;
> + vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
> + vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
>
> - /*
> - * EPTP_LIST_ADDRESS = 0x00002024,
> - * VMREAD_BITMAP = 0x00002026,
> - * VMWRITE_BITMAP = 0x00002028,
> - */
> - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
> -
> - /*
> - * TSC_MULTIPLIER = 0x00002032,
> - */
> - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
> -
> - /*
> - * PLE_GAP = 0x00004020,
> - * PLE_WINDOW = 0x00004022,
> - */
> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
> ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
> -
> - /*
> - * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
> - */
> - vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
> -
> - /*
> - * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
> - * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
> - */
> - vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
> - vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
> -
> - /*
> - * Currently unsupported in KVM:
> - * GUEST_IA32_RTIT_CTL = 0x00002814,
> - */

The creation of the EVMCS1_UNSUPPORTED_* macros and the refactor to evmcs_sanitize_exec_ctrls()
should be done in a separate patch in this series before this one.

> }
> #else /* !IS_ENABLED(CONFIG_HYPERV) */
> static inline void evmcs_write64(unsigned long field, u64 value) {}
> @@ -1354,6 +1347,27 @@ static inline void
> evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
> static inline void evmcs_touch_msr_bitmap(void) {}
> #endif /* IS_ENABLED(CONFIG_HYPERV) */
>
> +static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
> + uint16_t *vmcs_version)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + /* We don't support disabling the feature for simplicity. */
> + if (vmx->nested.enlightened_vmcs_enabled)
> + return 0;
> +
> + vmx->nested.enlightened_vmcs_enabled = true;
> + *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;

Please add a comment here explaining the "<< 8) | 1" part.

> +
> + vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
> + vmx->nested.msrs.entry_ctls_high &=
> ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
> + vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
> + vmx->nested.msrs.secondary_ctls_high &=
> ~EVMCS1_UNSUPPORTED_2NDEXEC;
> + vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
> +
> + return 0;
> +}
> +
> static inline bool is_exception_n(u32 intr_info, u8 vector)
> {
> return (intr_info & (INTR_INFO_INTR_TYPE_MASK |
> INTR_INFO_VECTOR_MASK |
> @@ -13039,6 +13053,8 @@ static struct kvm_x86_ops vmx_x86_ops
> __ro_after_init = {
> .pre_enter_smm = vmx_pre_enter_smm,
> .pre_leave_smm = vmx_pre_leave_smm,
> .enable_smi_window = enable_smi_window,
> +
> + .nested_enable_evmcs = nested_enable_evmcs,
> };
>
> static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a57766b940a5..51488019dec2 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2873,6 +2873,7 @@ int kvm_vm_ioctl_check_extension(struct kvm
> *kvm, long ext)
> case KVM_CAP_HYPERV_VP_INDEX:
> case KVM_CAP_HYPERV_EVENTFD:
> case KVM_CAP_HYPERV_TLBFLUSH:
> + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
> case KVM_CAP_PCI_SEGMENT:
> case KVM_CAP_DEBUGREGS:
> case KVM_CAP_X86_ROBUST_SINGLESTEP:
> @@ -3650,6 +3651,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu
> *vcpu)
> static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
> struct kvm_enable_cap *cap)
> {
> + int r;
> + uint16_t vmcs_version;
> + void __user *user_ptr;
> +
> if (cap->flags)
> return -EINVAL;
>
> @@ -3662,6 +3667,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct
> kvm_vcpu *vcpu,
> return -EINVAL;
> return kvm_hv_activate_synic(vcpu, cap->cap ==
> KVM_CAP_HYPERV_SYNIC2);
> + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
> + r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
> + if (!r) {
> + user_ptr = (void __user *)(uintptr_t)cap->args[0];
> + if (copy_to_user(user_ptr, &vmcs_version,
> + sizeof(vmcs_version)))
> + r = -EFAULT;
> + }
> + return r;
> +
> default:
> return -EINVAL;
> }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index b6270a3b38e9..5c4b79c1af19 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_GET_MSR_FEATURES 153
> #define KVM_CAP_HYPERV_EVENTFD 154
> #define KVM_CAP_HYPERV_TLBFLUSH 155
> +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 156
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> --
> 2.14.4

Besides above comments,
Reviewed-By: Liran Alon <[email protected]>

2018-06-14 23:29:56

by Liran Alon

[permalink] [raw]
Subject: Re: [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR


----- [email protected] wrote:

> Per Hyper-V TLFS 5.0b:
>
> "The L1 hypervisor may choose to use enlightened VMCSs by writing 1
> to
> the corresponding field in the VP assist page (see section 7.8.7).
> Another field in the VP assist page controls the currently active
> enlightened VMCS. Each enlightened VMCS is exactly one page (4 KB) in
> size and must be initially zeroed. No VMPTRLD instruction must be
> executed to make an enlightened VMCS active or current.
>
> After the L1 hypervisor performs a VM entry with an enlightened VMCS,
> the VMCS is considered active on the processor. An enlightened VMCS
> can only be active on a single processor at the same time. The L1
> hypervisor can execute a VMCLEAR instruction to transition an
> enlightened VMCS from the active to the non-active state. Any VMREAD
> or VMWRITE instructions while an enlightened VMCS is active is
> unsupported and can result in unexpected behavior."
>
> Keep Enlightened VMCS structure for the current L2 guest permanently
> mapped
> from struct nested_vmx instead of mapping it every time.
>
> Suggested-by: Ladi Prosek <[email protected]>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 98
> ++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 91 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e7fa9f9c6e36..6802ba91468c 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -20,6 +20,7 @@
> #include "mmu.h"
> #include "cpuid.h"
> #include "lapic.h"
> +#include "hyperv.h"
>
> #include <linux/kvm_host.h>
> #include <linux/module.h>
> @@ -690,6 +691,8 @@ struct nested_vmx {
> bool guest_mode;
> } smm;
>
> + gpa_t hv_evmcs_vmptr;
> + struct page *hv_evmcs_page;
> struct hv_enlightened_vmcs *hv_evmcs;
> };
>
> @@ -7695,7 +7698,9 @@ static void nested_vmx_failInvalid(struct
> kvm_vcpu *vcpu)
> static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
> u32 vm_instruction_error)
> {
> - if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) {
> /*
> * failValid writes the error number to the current VMCS, which
> * can't be done there isn't a current VMCS.
> @@ -8003,6 +8008,18 @@ static void vmx_disable_shadow_vmcs(struct
> vcpu_vmx *vmx)
> vmcs_write64(VMCS_LINK_POINTER, -1ull);
> }
>
> +static inline void nested_release_evmcs(struct vcpu_vmx *vmx)
> +{
> + if (!vmx->nested.hv_evmcs)
> + return;
> +
> + kunmap(vmx->nested.hv_evmcs_page);
> + kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
> + vmx->nested.hv_evmcs_vmptr = -1ull;
> + vmx->nested.hv_evmcs_page = NULL;
> + vmx->nested.hv_evmcs = NULL;
> +}
> +
> static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
> {
> if (vmx->nested.current_vmptr == -1ull)
> @@ -8062,6 +8079,8 @@ static void free_nested(struct vcpu_vmx *vmx)
> vmx->nested.pi_desc = NULL;
> }
>
> + nested_release_evmcs(vmx);
> +
> free_loaded_vmcs(&vmx->nested.vmcs02);
> }
>
> @@ -8098,12 +8117,18 @@ static int handle_vmclear(struct kvm_vcpu
> *vcpu)
> return kvm_skip_emulated_instruction(vcpu);
> }
>
> - if (vmptr == vmx->nested.current_vmptr)
> - nested_release_vmcs12(vmx);
> + if (vmx->nested.hv_evmcs_page) {
> + if (vmptr == vmx->nested.hv_evmcs_vmptr)
> + nested_release_evmcs(vmx);
> + } else {
> + if (vmptr == vmx->nested.current_vmptr)
> + nested_release_vmcs12(vmx);
>
> - kvm_vcpu_write_guest(vcpu,
> - vmptr + offsetof(struct vmcs12, launch_state),
> - &zero, sizeof(zero));
> + kvm_vcpu_write_guest(vcpu,
> + vmptr + offsetof(struct vmcs12,
> + launch_state),
> + &zero, sizeof(zero));
> + }
>
> nested_vmx_succeed(vcpu);
> return kvm_skip_emulated_instruction(vcpu);
> @@ -8814,6 +8839,10 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
> return kvm_skip_emulated_instruction(vcpu);
> }
>
> + /* Forbid normal VMPTRLD if Enlightened version was used */
> + if (vmx->nested.hv_evmcs)
> + return 1;
> +
> if (vmx->nested.current_vmptr != vmptr) {
> struct vmcs12 *new_vmcs12;
> struct page *page;
> @@ -8847,6 +8876,55 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
> return kvm_skip_emulated_instruction(vcpu);
> }
>
> +/*
> + * This is an equivalent of the nested hypervisor executing the
> vmptrld
> + * instruction.
> + */
> +static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu
> *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct hv_vp_assist_page assist_page;
> +
> + if (likely(!vmx->nested.enlightened_vmcs_enabled))
> + return 1;
> +
> + if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
> + return 1;
> +
> + if (unlikely(!assist_page.enlighten_vmentry))
> + return 1;
> +
> + if (unlikely(assist_page.current_nested_vmcs !=
> + vmx->nested.hv_evmcs_vmptr)) {
> +
> + if (!vmx->nested.hv_evmcs)
> + vmx->nested.current_vmptr = -1ull;
> +
> + nested_release_evmcs(vmx);
> +
> + vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page(
> + vcpu, assist_page.current_nested_vmcs);
> +
> + if (unlikely(is_error_page(vmx->nested.hv_evmcs_page)))
> + return 0;
> +
> + vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
> + vmx->nested.dirty_vmcs12 = true;
> + vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs;
> +
> + /*
> + * Unlike normal vmcs12, enlightened vmcs12 is not fully
> + * reloaded from guest's memory (read only fields, fields not
> + * present in struct hv_enlightened_vmcs, ...). Make sure there
> + * are no leftovers.
> + */
> + memset(vmx->nested.cached_vmcs12, 0,
> + sizeof(*vmx->nested.cached_vmcs12));
> +
> + }
> + return 1;
> +}
> +
> /* Emulate the VMPTRST instruction */
> static int handle_vmptrst(struct kvm_vcpu *vcpu)
> {
> @@ -8858,6 +8936,9 @@ static int handle_vmptrst(struct kvm_vcpu
> *vcpu)
> if (!nested_vmx_check_permission(vcpu))
> return 1;
>
> + if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
> + return 1;
> +
> if (get_vmx_mem_address(vcpu, exit_qualification,
> vmx_instruction_info, true, &vmcs_gva))
> return 1;
> @@ -12148,7 +12229,10 @@ static int nested_vmx_run(struct kvm_vcpu
> *vcpu, bool launch)
> if (!nested_vmx_check_permission(vcpu))
> return 1;
>
> - if (!nested_vmx_check_vmcs12(vcpu))
> + if (!nested_vmx_handle_enlightened_vmptrld(vcpu))
> + return 1;
> +
> + if (!vmx->nested.hv_evmcs && !nested_vmx_check_vmcs12(vcpu))
> goto out;
>
> vmcs12 = get_vmcs12(vcpu);
> --
> 2.14.4

Reviewed-By: Liran Alon <[email protected]>

2018-06-14 23:34:16

by Liran Alon

[permalink] [raw]
Subject: Re: [PATCH 3/5] KVM: nVMX: add enlightened VMCS state


----- [email protected] wrote:

> Adds hv_evmcs pointer and implement copy_enlightened_to_vmcs12() and
> copy_enlightened_to_vmcs12().
>
> prepare_vmcs02()/prepare_vmcs02_full() separation is not valid for
> Enlightened VMCS, do full sync for now.
>
> Suggested-by: Ladi Prosek <[email protected]>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 431
> +++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 417 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 51749207cef1..e7fa9f9c6e36 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -640,10 +640,10 @@ struct nested_vmx {
> */
> struct vmcs12 *cached_vmcs12;
> /*
> - * Indicates if the shadow vmcs must be updated with the
> - * data hold by vmcs12
> + * Indicates if the shadow vmcs or enlightened vmcs must be updated
> + * with the data held by struct vmcs12.
> */
> - bool sync_shadow_vmcs;
> + bool need_vmcs12_sync;
> bool dirty_vmcs12;
>
> bool change_vmcs01_virtual_apic_mode;
> @@ -689,6 +689,8 @@ struct nested_vmx {
> /* in guest mode on SMM entry? */
> bool guest_mode;
> } smm;
> +
> + struct hv_enlightened_vmcs *hv_evmcs;
> };
>
> #define POSTED_INTR_ON 0
> @@ -8010,7 +8012,7 @@ static inline void nested_release_vmcs12(struct
> vcpu_vmx *vmx)
> /* copy to memory all shadowed fields in case
> they were modified */
> copy_shadow_to_vmcs12(vmx);
> - vmx->nested.sync_shadow_vmcs = false;
> + vmx->nested.need_vmcs12_sync = false;
> vmx_disable_shadow_vmcs(vmx);
> }
> vmx->nested.posted_intr_nv = -1;
> @@ -8187,6 +8189,393 @@ static inline int vmcs12_write_any(struct
> kvm_vcpu *vcpu,
>
> }
>
> +static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, bool
> full)
> +{
> + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
> + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
> +
> + /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
> + vmcs12->tpr_threshold = evmcs->tpr_threshold;
> + vmcs12->guest_rip = evmcs->guest_rip;
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
> + vmcs12->guest_rsp = evmcs->guest_rsp;
> + vmcs12->guest_rflags = evmcs->guest_rflags;
> + vmcs12->guest_interruptibility_info =
> + evmcs->guest_interruptibility_info;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
> + vmcs12->cpu_based_vm_exec_control =
> + evmcs->cpu_based_vm_exec_control;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
> + vmcs12->exception_bitmap = evmcs->exception_bitmap;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
> + vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
> + vmcs12->vm_entry_intr_info_field =
> + evmcs->vm_entry_intr_info_field;
> + vmcs12->vm_entry_exception_error_code =
> + evmcs->vm_entry_exception_error_code;
> + vmcs12->vm_entry_instruction_len =
> + evmcs->vm_entry_instruction_len;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
> + vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
> + vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
> + vmcs12->host_cr0 = evmcs->host_cr0;
> + vmcs12->host_cr3 = evmcs->host_cr3;
> + vmcs12->host_cr4 = evmcs->host_cr4;
> + vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
> + vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
> + vmcs12->host_rip = evmcs->host_rip;
> + vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
> + vmcs12->host_es_selector = evmcs->host_es_selector;
> + vmcs12->host_cs_selector = evmcs->host_cs_selector;
> + vmcs12->host_ss_selector = evmcs->host_ss_selector;
> + vmcs12->host_ds_selector = evmcs->host_ds_selector;
> + vmcs12->host_fs_selector = evmcs->host_fs_selector;
> + vmcs12->host_gs_selector = evmcs->host_gs_selector;
> + vmcs12->host_tr_selector = evmcs->host_tr_selector;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
> + vmcs12->pin_based_vm_exec_control =
> + evmcs->pin_based_vm_exec_control;
> + vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
> + vmcs12->secondary_vm_exec_control =
> + evmcs->secondary_vm_exec_control;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
> + vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
> + vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
> + vmcs12->msr_bitmap = evmcs->msr_bitmap;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
> + vmcs12->guest_es_base = evmcs->guest_es_base;
> + vmcs12->guest_cs_base = evmcs->guest_cs_base;
> + vmcs12->guest_ss_base = evmcs->guest_ss_base;
> + vmcs12->guest_ds_base = evmcs->guest_ds_base;
> + vmcs12->guest_fs_base = evmcs->guest_fs_base;
> + vmcs12->guest_gs_base = evmcs->guest_gs_base;
> + vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
> + vmcs12->guest_tr_base = evmcs->guest_tr_base;
> + vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
> + vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
> + vmcs12->guest_es_limit = evmcs->guest_es_limit;
> + vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
> + vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
> + vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
> + vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
> + vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
> + vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
> + vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
> + vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
> + vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
> + vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
> + vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
> + vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
> + vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
> + vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
> + vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
> + vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
> + vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
> + vmcs12->guest_es_selector = evmcs->guest_es_selector;
> + vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
> + vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
> + vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
> + vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
> + vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
> + vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
> + vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
> + vmcs12->tsc_offset = evmcs->tsc_offset;
> + vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
> + vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
> + vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
> + vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
> + vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
> + vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
> + vmcs12->guest_cr0 = evmcs->guest_cr0;
> + vmcs12->guest_cr3 = evmcs->guest_cr3;
> + vmcs12->guest_cr4 = evmcs->guest_cr4;
> + vmcs12->guest_dr7 = evmcs->guest_dr7;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
> + vmcs12->host_fs_base = evmcs->host_fs_base;
> + vmcs12->host_gs_base = evmcs->host_gs_base;
> + vmcs12->host_tr_base = evmcs->host_tr_base;
> + vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
> + vmcs12->host_idtr_base = evmcs->host_idtr_base;
> + vmcs12->host_rsp = evmcs->host_rsp;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
> + vmcs12->ept_pointer = evmcs->ept_pointer;
> + vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
> + vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
> + vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
> + vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
> + vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
> + vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
> + vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
> + vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
> + vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
> + vmcs12->guest_pending_dbg_exceptions =
> + evmcs->guest_pending_dbg_exceptions;
> + vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
> + vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
> + vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
> + vmcs12->guest_activity_state = evmcs->guest_activity_state;
> + vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
> + }
> +
> + /*
> + * Not used?
> + * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
> + * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
> + * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
> + * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
> + * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
> + * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
> + * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
> + * vmcs12->page_fault_error_code_mask =
> + * evmcs->page_fault_error_code_mask;
> + * vmcs12->page_fault_error_code_match =
> + * evmcs->page_fault_error_code_match;
> + * vmcs12->cr3_target_count = evmcs->cr3_target_count;
> + * vmcs12->vm_exit_msr_store_count =
> evmcs->vm_exit_msr_store_count;
> + * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
> + * vmcs12->vm_entry_msr_load_count =
> evmcs->vm_entry_msr_load_count;
> + */
> +
> + /*
> + * Read only fields:
> + * vmcs12->guest_physical_address = evmcs->guest_physical_address;
> + * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
> + * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
> + * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
> + * vmcs12->vm_exit_intr_error_code =
> evmcs->vm_exit_intr_error_code;
> + * vmcs12->idt_vectoring_info_field =
> evmcs->idt_vectoring_info_field;
> + * vmcs12->idt_vectoring_error_code =
> evmcs->idt_vectoring_error_code;
> + * vmcs12->vm_exit_instruction_len =
> evmcs->vm_exit_instruction_len;
> + * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
> + * vmcs12->exit_qualification = evmcs->exit_qualification;
> + * vmcs12->guest_linear_address = evmcs->guest_linear_address;
> + *
> + * Not present in struct vmcs12:
> + * vmcs12->exit_io_instruction_ecx =
> evmcs->exit_io_instruction_ecx;
> + * vmcs12->exit_io_instruction_esi =
> evmcs->exit_io_instruction_esi;
> + * vmcs12->exit_io_instruction_edi =
> evmcs->exit_io_instruction_edi;
> + * vmcs12->exit_io_instruction_eip =
> evmcs->exit_io_instruction_eip;
> + */
> +
> + return 0;
> +}
> +
> +static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
> +{
> + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
> + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
> +
> + /*
> + * Should not be changed by KVM:
> + *
> + * evmcs->host_es_selector = vmcs12->host_es_selector;
> + * evmcs->host_cs_selector = vmcs12->host_cs_selector;
> + * evmcs->host_ss_selector = vmcs12->host_ss_selector;
> + * evmcs->host_ds_selector = vmcs12->host_ds_selector;
> + * evmcs->host_fs_selector = vmcs12->host_fs_selector;
> + * evmcs->host_gs_selector = vmcs12->host_gs_selector;
> + * evmcs->host_tr_selector = vmcs12->host_tr_selector;
> + * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
> + * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
> + * evmcs->host_cr0 = vmcs12->host_cr0;
> + * evmcs->host_cr3 = vmcs12->host_cr3;
> + * evmcs->host_cr4 = vmcs12->host_cr4;
> + * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
> + * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
> + * evmcs->host_rip = vmcs12->host_rip;
> + * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
> + * evmcs->host_fs_base = vmcs12->host_fs_base;
> + * evmcs->host_gs_base = vmcs12->host_gs_base;
> + * evmcs->host_tr_base = vmcs12->host_tr_base;
> + * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
> + * evmcs->host_idtr_base = vmcs12->host_idtr_base;
> + * evmcs->host_rsp = vmcs12->host_rsp;
> + * sync_vmcs12() doesn't read these:
> + * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
> + * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
> + * evmcs->msr_bitmap = vmcs12->msr_bitmap;
> + * evmcs->ept_pointer = vmcs12->ept_pointer;
> + * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
> + * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
> + * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
> + * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
> + * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
> + * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
> + * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
> + * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
> + * evmcs->tpr_threshold = vmcs12->tpr_threshold;
> + * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
> + * evmcs->exception_bitmap = vmcs12->exception_bitmap;
> + * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
> + * evmcs->pin_based_vm_exec_control =
> vmcs12->pin_based_vm_exec_control;
> + * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
> + * evmcs->secondary_vm_exec_control =
> vmcs12->secondary_vm_exec_control;
> + * evmcs->page_fault_error_code_mask =
> + * vmcs12->page_fault_error_code_mask;
> + * evmcs->page_fault_error_code_match =
> + * vmcs12->page_fault_error_code_match;
> + * evmcs->cr3_target_count = vmcs12->cr3_target_count;
> + * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
> + * evmcs->tsc_offset = vmcs12->tsc_offset;
> + * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
> + * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
> + * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
> + * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
> + * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
> + * evmcs->vm_exit_msr_store_count =
> vmcs12->vm_exit_msr_store_count;
> + * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
> + * evmcs->vm_entry_msr_load_count =
> vmcs12->vm_entry_msr_load_count;
> + *
> + * Not present in struct vmcs12:
> + * evmcs->exit_io_instruction_ecx =
> vmcs12->exit_io_instruction_ecx;
> + * evmcs->exit_io_instruction_esi =
> vmcs12->exit_io_instruction_esi;
> + * evmcs->exit_io_instruction_edi =
> vmcs12->exit_io_instruction_edi;
> + * evmcs->exit_io_instruction_eip =
> vmcs12->exit_io_instruction_eip;
> + */
> +
> + evmcs->guest_es_selector = vmcs12->guest_es_selector;
> + evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
> + evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
> + evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
> + evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
> + evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
> + evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
> + evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
> +
> + evmcs->guest_es_limit = vmcs12->guest_es_limit;
> + evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
> + evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
> + evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
> + evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
> + evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
> + evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
> + evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
> + evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
> + evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
> +
> + evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
> + evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
> + evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
> + evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
> + evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
> + evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
> + evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
> + evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
> +
> + evmcs->guest_es_base = vmcs12->guest_es_base;
> + evmcs->guest_cs_base = vmcs12->guest_cs_base;
> + evmcs->guest_ss_base = vmcs12->guest_ss_base;
> + evmcs->guest_ds_base = vmcs12->guest_ds_base;
> + evmcs->guest_fs_base = vmcs12->guest_fs_base;
> + evmcs->guest_gs_base = vmcs12->guest_gs_base;
> + evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
> + evmcs->guest_tr_base = vmcs12->guest_tr_base;
> + evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
> + evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
> +
> + evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
> + evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
> +
> + evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
> + evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
> + evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
> + evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
> +
> + evmcs->guest_pending_dbg_exceptions =
> + vmcs12->guest_pending_dbg_exceptions;
> + evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
> + evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
> +
> + evmcs->guest_activity_state = vmcs12->guest_activity_state;
> + evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
> +
> + evmcs->guest_cr0 = vmcs12->guest_cr0;
> + evmcs->guest_cr3 = vmcs12->guest_cr3;
> + evmcs->guest_cr4 = vmcs12->guest_cr4;
> + evmcs->guest_dr7 = vmcs12->guest_dr7;
> +
> + evmcs->guest_physical_address = vmcs12->guest_physical_address;
> +
> + evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
> + evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
> + evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
> + evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
> + evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
> + evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
> + evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
> + evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
> +
> + evmcs->exit_qualification = vmcs12->exit_qualification;
> +
> + evmcs->guest_linear_address = vmcs12->guest_linear_address;
> + evmcs->guest_rsp = vmcs12->guest_rsp;
> + evmcs->guest_rflags = vmcs12->guest_rflags;
> +
> + evmcs->guest_interruptibility_info =
> + vmcs12->guest_interruptibility_info;
> + evmcs->cpu_based_vm_exec_control =
> vmcs12->cpu_based_vm_exec_control;
> + evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
> + evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
> + evmcs->vm_entry_exception_error_code =
> + vmcs12->vm_entry_exception_error_code;
> + evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
> +
> + evmcs->guest_rip = vmcs12->guest_rip;
> +
> + evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
> +
> + return 0;
> +}
> +
> /*
> * Copy the writable VMCS shadow fields back to the VMCS12, in case
> * they have been modified by the L1 guest. Note that the
> "read-only"
> @@ -8398,7 +8787,7 @@ static void set_current_vmptr(struct vcpu_vmx
> *vmx, gpa_t vmptr)
> SECONDARY_EXEC_SHADOW_VMCS);
> vmcs_write64(VMCS_LINK_POINTER,
> __pa(vmx->vmcs01.shadow_vmcs));
> - vmx->nested.sync_shadow_vmcs = true;
> + vmx->nested.need_vmcs12_sync = true;
> }
> vmx->nested.dirty_vmcs12 = true;
> }
> @@ -9960,9 +10349,16 @@ static void __noclone vmx_vcpu_run(struct
> kvm_vcpu *vcpu)
> vmcs_write32(PLE_WINDOW, vmx->ple_window);
> }
>
> - if (vmx->nested.sync_shadow_vmcs) {
> - copy_vmcs12_to_shadow(vmx);
> - vmx->nested.sync_shadow_vmcs = false;
> + if (vmx->nested.need_vmcs12_sync) {
> + if (unlikely(vmx->nested.hv_evmcs)) {

Why is this marked with unlikely()?
In L1 guest use eVMCS, we will always have this as true for vmx_vcpu_run()
after simulating VMExit from L2 to L1. You should not have here unlikely()
just like you don't have it in new code added to nested_vmx_run().

> + copy_vmcs12_to_enlightened(vmx);
> + /* All fields are clean */
> + vmx->nested.hv_evmcs->hv_clean_fields |=
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
> + } else {
> + copy_vmcs12_to_shadow(vmx);
> + }
> + vmx->nested.need_vmcs12_sync = false;
> }
>
> if (test_bit(VCPU_REGS_RSP, (unsigned long
> *)&vcpu->arch.regs_dirty))
> @@ -11281,7 +11677,7 @@ static int prepare_vmcs02(struct kvm_vcpu
> *vcpu, struct vmcs12 *vmcs12,
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exec_control, vmcs12_exec_ctrl;
>
> - if (vmx->nested.dirty_vmcs12) {
> + if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
> prepare_vmcs02_full(vcpu, vmcs12);
> vmx->nested.dirty_vmcs12 = false;
> }
> @@ -11757,8 +12153,13 @@ static int nested_vmx_run(struct kvm_vcpu
> *vcpu, bool launch)
>
> vmcs12 = get_vmcs12(vcpu);
>
> - if (enable_shadow_vmcs)
> + if (vmx->nested.hv_evmcs) {
> + copy_enlightened_to_vmcs12(vmx, vmx->nested.dirty_vmcs12);
> + /* Enlightened VMCS doesn't have launch state */
> + vmcs12->launch_state = !launch;
> + } else if (enable_shadow_vmcs) {
> copy_shadow_to_vmcs12(vmx);
> + }
>
> /*
> * The nested entry process starts with enforcing various
> prerequisites
> @@ -12383,8 +12784,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu
> *vcpu, u32 exit_reason,
> */
> kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
>
> - if (enable_shadow_vmcs && exit_reason != -1)
> - vmx->nested.sync_shadow_vmcs = true;
> + if ((exit_reason != -1) && (enable_shadow_vmcs ||
> vmx->nested.hv_evmcs))
> + vmx->nested.need_vmcs12_sync = true;
>
> /* in case we halted in L2 */
> vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> @@ -12463,12 +12864,14 @@ static void nested_vmx_entry_failure(struct
> kvm_vcpu *vcpu,
> struct vmcs12 *vmcs12,
> u32 reason, unsigned long qualification)
> {
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> load_vmcs12_host_state(vcpu, vmcs12);
> vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
> vmcs12->exit_qualification = qualification;
> nested_vmx_succeed(vcpu);
> - if (enable_shadow_vmcs)
> - to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
> + if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
> + vmx->nested.need_vmcs12_sync = true;
> }
>
> static int vmx_check_intercept(struct kvm_vcpu *vcpu,
> --
> 2.14.4

2018-06-14 23:38:23

by Liran Alon

[permalink] [raw]
Subject: Re: [PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case


----- [email protected] wrote:

> When Enlightened VMCS is in use by L1 hypervisor we can avoid
> vmwriting
> VMCS fields which did not change.
>
> Our first goal is to achieve minimal impact on traditional VMCS case
> so
> we're not wrapping each vmwrite() with an if-changed checker. We also
> can't
> utilize static keys as Enlightened VMCS usage is per-guest.
>
> This patch implements the simpliest solution: checking fields in
> groups.
> We skip single vmwrite() statements as doing the check will cost us
> something even in non-evmcs case and the win is tiny. Unfortunately,
> this
> makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent
> (and
> a bit ugly).
>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 143
> ++++++++++++++++++++++++++++++-----------------------
> 1 file changed, 82 insertions(+), 61 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6802ba91468c..9a7d76c5c92b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct
> kvm_vcpu *vcpu, unsigned long cr3, bool ne
> return 0;
> }
>
> +/*
> + * Check if L1 hypervisor changed the particular field in
> Enlightened
> + * VMCS and avoid redundant vmwrite if it didn't. Can only be used
> when
> + * the value we're about to write is unchanged vmcs12->field.
> + */
> +#define evmcs_needs_write(vmx, clean_field)
> ((vmx)->nested.dirty_vmcs12 ||\
> + !(vmx->nested.hv_evmcs->hv_clean_fields &\
> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))

Why declare this is a macro instead of an static inline small function?
Just to shorten the name of the clean-field constant?

> +
> static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12
> *vmcs12)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
> + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> + vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> + vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
> + vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
> + vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
> + vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
> + vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
> + vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
> + vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
> + vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
> + vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
> + vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
> + vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
> + vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
> + vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
> + vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
> + vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
> + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
> + vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
> + vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
> + vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
> + vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
> + vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
> + vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
> + vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
> + vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
> + vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
> + vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
> + vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
> + vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
> + vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
> + }
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
> + vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
> + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> + vmcs12->guest_pending_dbg_exceptions);
> + vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
> + vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
> +
> + if (vmx_mpx_supported())
> + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>
> - vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> - vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> - vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
> - vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
> - vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
> - vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
> - vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
> - vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
> - vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
> - vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
> - vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
> - vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
> - vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
> - vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
> - vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
> - vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
> - vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
> - vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
> - vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
> - vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
> - vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
> - vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
> - vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
> - vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
> - vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
> - vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
> - vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
> - vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
> - vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
> - vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
> - vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
> -
> - vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
> - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> - vmcs12->guest_pending_dbg_exceptions);
> - vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
> - vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
> + /*
> + * L1 may access the L2's PDPTR, so save them to construct
> + * vmcs12
> + */
> + if (enable_ept) {
> + vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
> + vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
> + vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
> + vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
> + }
> + }
>
> if (nested_cpu_has_xsaves(vmcs12))
> vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
> +
> vmcs_write64(VMCS_LINK_POINTER, -1ull);
>
> if (cpu_has_vmx_posted_intr())
> @@ -11717,9 +11746,6 @@ static void prepare_vmcs02_full(struct
> kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>
> set_cr4_guest_host_mask(vmx);
>
> - if (vmx_mpx_supported())
> - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
> -
> if (enable_vpid) {
> if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
> vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
> @@ -11727,16 +11753,6 @@ static void prepare_vmcs02_full(struct
> kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
> }
>
> - /*
> - * L1 may access the L2's PDPTR, so save them to construct vmcs12
> - */
> - if (enable_ept) {
> - vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
> - vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
> - vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
> - vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
> - }
> -
> if (cpu_has_vmx_msr_bitmap())
> vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
> }
> @@ -11757,6 +11773,7 @@ static int prepare_vmcs02(struct kvm_vcpu
> *vcpu, struct vmcs12 *vmcs12,
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exec_control, vmcs12_exec_ctrl;
> + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
>
> if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
> prepare_vmcs02_full(vcpu, vmcs12);
> @@ -11768,11 +11785,13 @@ static int prepare_vmcs02(struct kvm_vcpu
> *vcpu, struct vmcs12 *vmcs12,
> * with vmx_shadow_fields.h.
> */
>
> - vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> - vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
> - vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
> - vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
> - vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
> + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
> + vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
> + vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
> + vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
> + vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
> + }
>
> /*
> * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR,
> HOST_GS_SELECTOR,
> @@ -11788,12 +11807,14 @@ static int prepare_vmcs02(struct kvm_vcpu
> *vcpu, struct vmcs12 *vmcs12,
> vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
> }
> if (vmx->nested.nested_run_pending) {
> - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> - vmcs12->vm_entry_intr_info_field);
> - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> - vmcs12->vm_entry_exception_error_code);
> - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> - vmcs12->vm_entry_instruction_len);
> + if (!hv_evmcs || evmcs_needs_write(vmx, CONTROL_EVENT)) {
> + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> + vmcs12->vm_entry_intr_info_field);
> + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> + vmcs12->vm_entry_exception_error_code);
> + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> + vmcs12->vm_entry_instruction_len);
> + }
> vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
> vmcs12->guest_interruptibility_info);
> vmx->loaded_vmcs->nmi_known_unmasked =
> --
> 2.14.4

2018-06-14 23:41:11

by Liran Alon

[permalink] [raw]
Subject: Re: [PATCH 1/5] KVM: hyperv: define VP assist page helpers


----- [email protected] wrote:

> From: Ladi Prosek <[email protected]>
>
> The state related to the VP assist page is still managed by the LAPIC
> code in the pv_eoi field.
>
> Signed-off-by: Ladi Prosek <[email protected]>
> Signed-off-by: Vitaly Kuznetsov <[email protected]>
> ---
> arch/x86/kvm/hyperv.c | 23 +++++++++++++++++++++--
> arch/x86/kvm/hyperv.h | 4 ++++
> arch/x86/kvm/lapic.c | 4 ++--
> arch/x86/kvm/lapic.h | 2 +-
> arch/x86/kvm/x86.c | 2 +-
> 5 files changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 14e0d0ae4e0a..fdf659ca6167 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -688,6 +688,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
> stimer_cleanup(&hv_vcpu->stimer[i]);
> }
>
> +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
> +{
> + if (!(vcpu->arch.hyperv.hv_vapic &
> HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
> + return false;
> + return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
> +}
> +EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
> +
> +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
> + struct hv_vp_assist_page *assist_page)
> +{
> + if (!kvm_hv_assist_page_enabled(vcpu))
> + return false;
> + return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
> + assist_page, sizeof(*assist_page));
> +}
> +EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
> +
> static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
> {
> struct hv_message *msg = &stimer->msg;
> @@ -1048,7 +1066,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu,
> u32 msr, u64 data, bool host)
>
> if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
> hv->hv_vapic = data;
> - if (kvm_lapic_enable_pv_eoi(vcpu, 0))
> + if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
> return 1;
> break;
> }
> @@ -1061,7 +1079,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu,
> u32 msr, u64 data, bool host)
> hv->hv_vapic = data;
> kvm_vcpu_mark_page_dirty(vcpu, gfn);
> if (kvm_lapic_enable_pv_eoi(vcpu,
> - gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
> + gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
> + sizeof(struct hv_vp_assist_page)))
> return 1;
> break;
> }
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 837465d69c6d..db825bb7efc7 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
> void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
> void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
>
> +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
> +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
> + struct hv_vp_assist_page *assist_page);
> +
> static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct
> kvm_vcpu *vcpu,
> int timer_index)
> {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 776391cf69a5..b6d6a36f1a33 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2540,7 +2540,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu,
> u32 reg, u64 *data)
> return 0;
> }
>
> -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
> +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned
> long len)
> {
> u64 addr = data & ~KVM_MSR_ENABLED;
> if (!IS_ALIGNED(addr, 4))
> @@ -2550,7 +2550,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu
> *vcpu, u64 data)
> if (!pv_eoi_enabled(vcpu))
> return 0;
> return kvm_gfn_to_hva_cache_init(vcpu->kvm,
> &vcpu->arch.pv_eoi.data,
> - addr, sizeof(u8));
> + addr, len);
> }
>
> void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index ed0ed39abd36..ff6ef9c3d760 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -120,7 +120,7 @@ static inline bool
> kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
> return vcpu->arch.hyperv.hv_vapic &
> HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
> }
>
> -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
> +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned
> long len);
> void kvm_lapic_init(void);
> void kvm_lapic_exit(void);
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 06dd4cdb2ca8..a57766b940a5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2442,7 +2442,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
>
> break;
> case MSR_KVM_PV_EOI_EN:
> - if (kvm_lapic_enable_pv_eoi(vcpu, data))
> + if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
> return 1;
> break;
>
> --
> 2.14.4

Reviewed-By: Liran Alon <[email protected]>

2018-06-15 10:32:31

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

Liran Alon <[email protected]> writes:

> ----- [email protected] wrote:
>
>> When Enlightened VMCS is in use by L1 hypervisor we can avoid
>> vmwriting
>> VMCS fields which did not change.
>>
>> Our first goal is to achieve minimal impact on traditional VMCS case
>> so
>> we're not wrapping each vmwrite() with an if-changed checker. We also
>> can't
>> utilize static keys as Enlightened VMCS usage is per-guest.
>>
>> This patch implements the simpliest solution: checking fields in
>> groups.
>> We skip single vmwrite() statements as doing the check will cost us
>> something even in non-evmcs case and the win is tiny. Unfortunately,
>> this
>> makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent
>> (and
>> a bit ugly).
>>
>> Signed-off-by: Vitaly Kuznetsov <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 143
>> ++++++++++++++++++++++++++++++-----------------------
>> 1 file changed, 82 insertions(+), 61 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 6802ba91468c..9a7d76c5c92b 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct
>> kvm_vcpu *vcpu, unsigned long cr3, bool ne
>> return 0;
>> }
>>
>> +/*
>> + * Check if L1 hypervisor changed the particular field in
>> Enlightened
>> + * VMCS and avoid redundant vmwrite if it didn't. Can only be used
>> when
>> + * the value we're about to write is unchanged vmcs12->field.
>> + */
>> +#define evmcs_needs_write(vmx, clean_field)
>> ((vmx)->nested.dirty_vmcs12 ||\
>> + !(vmx->nested.hv_evmcs->hv_clean_fields &\
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))
>
> Why declare this is a macro instead of an static inline small function?
> Just to shorten the name of the clean-field constant?
>

To be completely honest I forgot why I used define but I think yes, it
was because HV_VMX_ENLIGHTENED_CLEAN_FIELD_* constants are very long.

>> +
>> static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12
>> *vmcs12)
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
>> +
>> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
>> + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>> + vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
>> + vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
>> + vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
>> + vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
>> + vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
>> + vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
>> + vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
>> + vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
>> + vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
>> + vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
>> + vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
>> + vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
>> + vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
>> + vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
>> + vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
>> + vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
>> + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
>> + vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
>> + vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
>> + vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
>> + vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
>> + vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
>> + vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
>> + vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
>> + vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
>> + vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
>> + vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
>> + vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
>> + vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
>> + vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
>> + }
>> +
>> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
>> + vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
>> + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
>> + vmcs12->guest_pending_dbg_exceptions);
>> + vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
>> + vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
>> +
>> + if (vmx_mpx_supported())
>> + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>>
>> - vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>> - vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
>> - vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
>> - vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
>> - vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
>> - vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
>> - vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
>> - vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
>> - vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
>> - vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
>> - vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
>> - vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
>> - vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
>> - vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
>> - vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
>> - vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
>> - vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
>> - vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
>> - vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
>> - vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
>> - vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
>> - vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
>> - vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
>> - vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
>> - vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
>> - vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
>> - vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
>> - vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
>> - vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
>> - vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
>> - vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
>> -
>> - vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
>> - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
>> - vmcs12->guest_pending_dbg_exceptions);
>> - vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
>> - vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
>> + /*
>> + * L1 may access the L2's PDPTR, so save them to construct
>> + * vmcs12
>> + */
>> + if (enable_ept) {
>> + vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
>> + vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
>> + vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
>> + vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
>> + }
>> + }
>>
>> if (nested_cpu_has_xsaves(vmcs12))
>> vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
>> +
>> vmcs_write64(VMCS_LINK_POINTER, -1ull);
>>
>> if (cpu_has_vmx_posted_intr())
>> @@ -11717,9 +11746,6 @@ static void prepare_vmcs02_full(struct
>> kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>
>> set_cr4_guest_host_mask(vmx);
>>
>> - if (vmx_mpx_supported())
>> - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>> -
>> if (enable_vpid) {
>> if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
>> vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
>> @@ -11727,16 +11753,6 @@ static void prepare_vmcs02_full(struct
>> kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>> vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
>> }
>>
>> - /*
>> - * L1 may access the L2's PDPTR, so save them to construct vmcs12
>> - */
>> - if (enable_ept) {
>> - vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
>> - vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
>> - vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
>> - vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
>> - }
>> -
>> if (cpu_has_vmx_msr_bitmap())
>> vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
>> }
>> @@ -11757,6 +11773,7 @@ static int prepare_vmcs02(struct kvm_vcpu
>> *vcpu, struct vmcs12 *vmcs12,
>> {
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> u32 exec_control, vmcs12_exec_ctrl;
>> + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
>>
>> if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
>> prepare_vmcs02_full(vcpu, vmcs12);
>> @@ -11768,11 +11785,13 @@ static int prepare_vmcs02(struct kvm_vcpu
>> *vcpu, struct vmcs12 *vmcs12,
>> * with vmx_shadow_fields.h.
>> */
>>
>> - vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
>> - vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
>> - vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
>> - vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
>> - vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
>> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
>> + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
>> + vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
>> + vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
>> + vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
>> + vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
>> + }
>>
>> /*
>> * Not in vmcs02: GUEST_PML_INDEX, HOST_FS_SELECTOR,
>> HOST_GS_SELECTOR,
>> @@ -11788,12 +11807,14 @@ static int prepare_vmcs02(struct kvm_vcpu
>> *vcpu, struct vmcs12 *vmcs12,
>> vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
>> }
>> if (vmx->nested.nested_run_pending) {
>> - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
>> - vmcs12->vm_entry_intr_info_field);
>> - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
>> - vmcs12->vm_entry_exception_error_code);
>> - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
>> - vmcs12->vm_entry_instruction_len);
>> + if (!hv_evmcs || evmcs_needs_write(vmx, CONTROL_EVENT)) {
>> + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
>> + vmcs12->vm_entry_intr_info_field);
>> + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
>> + vmcs12->vm_entry_exception_error_code);
>> + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
>> + vmcs12->vm_entry_instruction_len);
>> + }
>> vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
>> vmcs12->guest_interruptibility_info);
>> vmx->loaded_vmcs->nmi_known_unmasked =
>> --
>> 2.14.4

--
Vitaly

2018-06-15 10:34:28

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH 3/5] KVM: nVMX: add enlightened VMCS state

Liran Alon <[email protected]> writes:

> ----- [email protected] wrote:
>
>> Adds hv_evmcs pointer and implement copy_enlightened_to_vmcs12() and
>> copy_enlightened_to_vmcs12().
>>
>> prepare_vmcs02()/prepare_vmcs02_full() separation is not valid for
>> Enlightened VMCS, do full sync for now.
>>
>> Suggested-by: Ladi Prosek <[email protected]>
>> Signed-off-by: Vitaly Kuznetsov <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 431
>> +++++++++++++++++++++++++++++++++++++++++++++++++++--
>> 1 file changed, 417 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 51749207cef1..e7fa9f9c6e36 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -640,10 +640,10 @@ struct nested_vmx {
>> */
>> struct vmcs12 *cached_vmcs12;
>> /*
>> - * Indicates if the shadow vmcs must be updated with the
>> - * data hold by vmcs12
>> + * Indicates if the shadow vmcs or enlightened vmcs must be updated
>> + * with the data held by struct vmcs12.
>> */
>> - bool sync_shadow_vmcs;
>> + bool need_vmcs12_sync;
>> bool dirty_vmcs12;
>>
>> bool change_vmcs01_virtual_apic_mode;
>> @@ -689,6 +689,8 @@ struct nested_vmx {
>> /* in guest mode on SMM entry? */
>> bool guest_mode;
>> } smm;
>> +
>> + struct hv_enlightened_vmcs *hv_evmcs;
>> };
>>
>> #define POSTED_INTR_ON 0
>> @@ -8010,7 +8012,7 @@ static inline void nested_release_vmcs12(struct
>> vcpu_vmx *vmx)
>> /* copy to memory all shadowed fields in case
>> they were modified */
>> copy_shadow_to_vmcs12(vmx);
>> - vmx->nested.sync_shadow_vmcs = false;
>> + vmx->nested.need_vmcs12_sync = false;
>> vmx_disable_shadow_vmcs(vmx);
>> }
>> vmx->nested.posted_intr_nv = -1;
>> @@ -8187,6 +8189,393 @@ static inline int vmcs12_write_any(struct
>> kvm_vcpu *vcpu,
>>
>> }
>>
>> +static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, bool
>> full)
>> +{
>> + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
>> + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
>> +
>> + /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
>> + vmcs12->tpr_threshold = evmcs->tpr_threshold;
>> + vmcs12->guest_rip = evmcs->guest_rip;
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
>> + vmcs12->guest_rsp = evmcs->guest_rsp;
>> + vmcs12->guest_rflags = evmcs->guest_rflags;
>> + vmcs12->guest_interruptibility_info =
>> + evmcs->guest_interruptibility_info;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
>> + vmcs12->cpu_based_vm_exec_control =
>> + evmcs->cpu_based_vm_exec_control;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
>> + vmcs12->exception_bitmap = evmcs->exception_bitmap;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
>> + vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
>> + vmcs12->vm_entry_intr_info_field =
>> + evmcs->vm_entry_intr_info_field;
>> + vmcs12->vm_entry_exception_error_code =
>> + evmcs->vm_entry_exception_error_code;
>> + vmcs12->vm_entry_instruction_len =
>> + evmcs->vm_entry_instruction_len;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
>> + vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
>> + vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
>> + vmcs12->host_cr0 = evmcs->host_cr0;
>> + vmcs12->host_cr3 = evmcs->host_cr3;
>> + vmcs12->host_cr4 = evmcs->host_cr4;
>> + vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
>> + vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
>> + vmcs12->host_rip = evmcs->host_rip;
>> + vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
>> + vmcs12->host_es_selector = evmcs->host_es_selector;
>> + vmcs12->host_cs_selector = evmcs->host_cs_selector;
>> + vmcs12->host_ss_selector = evmcs->host_ss_selector;
>> + vmcs12->host_ds_selector = evmcs->host_ds_selector;
>> + vmcs12->host_fs_selector = evmcs->host_fs_selector;
>> + vmcs12->host_gs_selector = evmcs->host_gs_selector;
>> + vmcs12->host_tr_selector = evmcs->host_tr_selector;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
>> + vmcs12->pin_based_vm_exec_control =
>> + evmcs->pin_based_vm_exec_control;
>> + vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
>> + vmcs12->secondary_vm_exec_control =
>> + evmcs->secondary_vm_exec_control;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
>> + vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
>> + vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
>> + vmcs12->msr_bitmap = evmcs->msr_bitmap;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
>> + vmcs12->guest_es_base = evmcs->guest_es_base;
>> + vmcs12->guest_cs_base = evmcs->guest_cs_base;
>> + vmcs12->guest_ss_base = evmcs->guest_ss_base;
>> + vmcs12->guest_ds_base = evmcs->guest_ds_base;
>> + vmcs12->guest_fs_base = evmcs->guest_fs_base;
>> + vmcs12->guest_gs_base = evmcs->guest_gs_base;
>> + vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
>> + vmcs12->guest_tr_base = evmcs->guest_tr_base;
>> + vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
>> + vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
>> + vmcs12->guest_es_limit = evmcs->guest_es_limit;
>> + vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
>> + vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
>> + vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
>> + vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
>> + vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
>> + vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
>> + vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
>> + vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
>> + vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
>> + vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
>> + vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
>> + vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
>> + vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
>> + vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
>> + vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
>> + vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
>> + vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
>> + vmcs12->guest_es_selector = evmcs->guest_es_selector;
>> + vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
>> + vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
>> + vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
>> + vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
>> + vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
>> + vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
>> + vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
>> + vmcs12->tsc_offset = evmcs->tsc_offset;
>> + vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
>> + vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
>> + vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
>> + vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
>> + vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
>> + vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
>> + vmcs12->guest_cr0 = evmcs->guest_cr0;
>> + vmcs12->guest_cr3 = evmcs->guest_cr3;
>> + vmcs12->guest_cr4 = evmcs->guest_cr4;
>> + vmcs12->guest_dr7 = evmcs->guest_dr7;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
>> + vmcs12->host_fs_base = evmcs->host_fs_base;
>> + vmcs12->host_gs_base = evmcs->host_gs_base;
>> + vmcs12->host_tr_base = evmcs->host_tr_base;
>> + vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
>> + vmcs12->host_idtr_base = evmcs->host_idtr_base;
>> + vmcs12->host_rsp = evmcs->host_rsp;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
>> + vmcs12->ept_pointer = evmcs->ept_pointer;
>> + vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
>> + }
>> +
>> + if (unlikely(full || !(evmcs->hv_clean_fields &
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
>> + vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
>> + vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
>> + vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
>> + vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
>> + vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
>> + vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
>> + vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
>> + vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
>> + vmcs12->guest_pending_dbg_exceptions =
>> + evmcs->guest_pending_dbg_exceptions;
>> + vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
>> + vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
>> + vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
>> + vmcs12->guest_activity_state = evmcs->guest_activity_state;
>> + vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
>> + }
>> +
>> + /*
>> + * Not used?
>> + * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
>> + * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
>> + * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
>> + * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0;
>> + * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1;
>> + * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2;
>> + * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3;
>> + * vmcs12->page_fault_error_code_mask =
>> + * evmcs->page_fault_error_code_mask;
>> + * vmcs12->page_fault_error_code_match =
>> + * evmcs->page_fault_error_code_match;
>> + * vmcs12->cr3_target_count = evmcs->cr3_target_count;
>> + * vmcs12->vm_exit_msr_store_count =
>> evmcs->vm_exit_msr_store_count;
>> + * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
>> + * vmcs12->vm_entry_msr_load_count =
>> evmcs->vm_entry_msr_load_count;
>> + */
>> +
>> + /*
>> + * Read only fields:
>> + * vmcs12->guest_physical_address = evmcs->guest_physical_address;
>> + * vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
>> + * vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
>> + * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
>> + * vmcs12->vm_exit_intr_error_code =
>> evmcs->vm_exit_intr_error_code;
>> + * vmcs12->idt_vectoring_info_field =
>> evmcs->idt_vectoring_info_field;
>> + * vmcs12->idt_vectoring_error_code =
>> evmcs->idt_vectoring_error_code;
>> + * vmcs12->vm_exit_instruction_len =
>> evmcs->vm_exit_instruction_len;
>> + * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
>> + * vmcs12->exit_qualification = evmcs->exit_qualification;
>> + * vmcs12->guest_linear_address = evmcs->guest_linear_address;
>> + *
>> + * Not present in struct vmcs12:
>> + * vmcs12->exit_io_instruction_ecx =
>> evmcs->exit_io_instruction_ecx;
>> + * vmcs12->exit_io_instruction_esi =
>> evmcs->exit_io_instruction_esi;
>> + * vmcs12->exit_io_instruction_edi =
>> evmcs->exit_io_instruction_edi;
>> + * vmcs12->exit_io_instruction_eip =
>> evmcs->exit_io_instruction_eip;
>> + */
>> +
>> + return 0;
>> +}
>> +
>> +static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
>> +{
>> + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
>> + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
>> +
>> + /*
>> + * Should not be changed by KVM:
>> + *
>> + * evmcs->host_es_selector = vmcs12->host_es_selector;
>> + * evmcs->host_cs_selector = vmcs12->host_cs_selector;
>> + * evmcs->host_ss_selector = vmcs12->host_ss_selector;
>> + * evmcs->host_ds_selector = vmcs12->host_ds_selector;
>> + * evmcs->host_fs_selector = vmcs12->host_fs_selector;
>> + * evmcs->host_gs_selector = vmcs12->host_gs_selector;
>> + * evmcs->host_tr_selector = vmcs12->host_tr_selector;
>> + * evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
>> + * evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
>> + * evmcs->host_cr0 = vmcs12->host_cr0;
>> + * evmcs->host_cr3 = vmcs12->host_cr3;
>> + * evmcs->host_cr4 = vmcs12->host_cr4;
>> + * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
>> + * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
>> + * evmcs->host_rip = vmcs12->host_rip;
>> + * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
>> + * evmcs->host_fs_base = vmcs12->host_fs_base;
>> + * evmcs->host_gs_base = vmcs12->host_gs_base;
>> + * evmcs->host_tr_base = vmcs12->host_tr_base;
>> + * evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
>> + * evmcs->host_idtr_base = vmcs12->host_idtr_base;
>> + * evmcs->host_rsp = vmcs12->host_rsp;
>> + * sync_vmcs12() doesn't read these:
>> + * evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
>> + * evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
>> + * evmcs->msr_bitmap = vmcs12->msr_bitmap;
>> + * evmcs->ept_pointer = vmcs12->ept_pointer;
>> + * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
>> + * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
>> + * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
>> + * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
>> + * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0;
>> + * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1;
>> + * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2;
>> + * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3;
>> + * evmcs->tpr_threshold = vmcs12->tpr_threshold;
>> + * evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
>> + * evmcs->exception_bitmap = vmcs12->exception_bitmap;
>> + * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
>> + * evmcs->pin_based_vm_exec_control =
>> vmcs12->pin_based_vm_exec_control;
>> + * evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
>> + * evmcs->secondary_vm_exec_control =
>> vmcs12->secondary_vm_exec_control;
>> + * evmcs->page_fault_error_code_mask =
>> + * vmcs12->page_fault_error_code_mask;
>> + * evmcs->page_fault_error_code_match =
>> + * vmcs12->page_fault_error_code_match;
>> + * evmcs->cr3_target_count = vmcs12->cr3_target_count;
>> + * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
>> + * evmcs->tsc_offset = vmcs12->tsc_offset;
>> + * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
>> + * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
>> + * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
>> + * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
>> + * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
>> + * evmcs->vm_exit_msr_store_count =
>> vmcs12->vm_exit_msr_store_count;
>> + * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
>> + * evmcs->vm_entry_msr_load_count =
>> vmcs12->vm_entry_msr_load_count;
>> + *
>> + * Not present in struct vmcs12:
>> + * evmcs->exit_io_instruction_ecx =
>> vmcs12->exit_io_instruction_ecx;
>> + * evmcs->exit_io_instruction_esi =
>> vmcs12->exit_io_instruction_esi;
>> + * evmcs->exit_io_instruction_edi =
>> vmcs12->exit_io_instruction_edi;
>> + * evmcs->exit_io_instruction_eip =
>> vmcs12->exit_io_instruction_eip;
>> + */
>> +
>> + evmcs->guest_es_selector = vmcs12->guest_es_selector;
>> + evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
>> + evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
>> + evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
>> + evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
>> + evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
>> + evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
>> + evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
>> +
>> + evmcs->guest_es_limit = vmcs12->guest_es_limit;
>> + evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
>> + evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
>> + evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
>> + evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
>> + evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
>> + evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
>> + evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
>> + evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
>> + evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
>> +
>> + evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
>> + evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
>> + evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
>> + evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
>> + evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
>> + evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
>> + evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
>> + evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
>> +
>> + evmcs->guest_es_base = vmcs12->guest_es_base;
>> + evmcs->guest_cs_base = vmcs12->guest_cs_base;
>> + evmcs->guest_ss_base = vmcs12->guest_ss_base;
>> + evmcs->guest_ds_base = vmcs12->guest_ds_base;
>> + evmcs->guest_fs_base = vmcs12->guest_fs_base;
>> + evmcs->guest_gs_base = vmcs12->guest_gs_base;
>> + evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
>> + evmcs->guest_tr_base = vmcs12->guest_tr_base;
>> + evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
>> + evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
>> +
>> + evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
>> + evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
>> +
>> + evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
>> + evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
>> + evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
>> + evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
>> +
>> + evmcs->guest_pending_dbg_exceptions =
>> + vmcs12->guest_pending_dbg_exceptions;
>> + evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
>> + evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
>> +
>> + evmcs->guest_activity_state = vmcs12->guest_activity_state;
>> + evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
>> +
>> + evmcs->guest_cr0 = vmcs12->guest_cr0;
>> + evmcs->guest_cr3 = vmcs12->guest_cr3;
>> + evmcs->guest_cr4 = vmcs12->guest_cr4;
>> + evmcs->guest_dr7 = vmcs12->guest_dr7;
>> +
>> + evmcs->guest_physical_address = vmcs12->guest_physical_address;
>> +
>> + evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
>> + evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
>> + evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
>> + evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
>> + evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
>> + evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
>> + evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
>> + evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
>> +
>> + evmcs->exit_qualification = vmcs12->exit_qualification;
>> +
>> + evmcs->guest_linear_address = vmcs12->guest_linear_address;
>> + evmcs->guest_rsp = vmcs12->guest_rsp;
>> + evmcs->guest_rflags = vmcs12->guest_rflags;
>> +
>> + evmcs->guest_interruptibility_info =
>> + vmcs12->guest_interruptibility_info;
>> + evmcs->cpu_based_vm_exec_control =
>> vmcs12->cpu_based_vm_exec_control;
>> + evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
>> + evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
>> + evmcs->vm_entry_exception_error_code =
>> + vmcs12->vm_entry_exception_error_code;
>> + evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
>> +
>> + evmcs->guest_rip = vmcs12->guest_rip;
>> +
>> + evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
>> +
>> + return 0;
>> +}
>> +
>> /*
>> * Copy the writable VMCS shadow fields back to the VMCS12, in case
>> * they have been modified by the L1 guest. Note that the
>> "read-only"
>> @@ -8398,7 +8787,7 @@ static void set_current_vmptr(struct vcpu_vmx
>> *vmx, gpa_t vmptr)
>> SECONDARY_EXEC_SHADOW_VMCS);
>> vmcs_write64(VMCS_LINK_POINTER,
>> __pa(vmx->vmcs01.shadow_vmcs));
>> - vmx->nested.sync_shadow_vmcs = true;
>> + vmx->nested.need_vmcs12_sync = true;
>> }
>> vmx->nested.dirty_vmcs12 = true;
>> }
>> @@ -9960,9 +10349,16 @@ static void __noclone vmx_vcpu_run(struct
>> kvm_vcpu *vcpu)
>> vmcs_write32(PLE_WINDOW, vmx->ple_window);
>> }
>>
>> - if (vmx->nested.sync_shadow_vmcs) {
>> - copy_vmcs12_to_shadow(vmx);
>> - vmx->nested.sync_shadow_vmcs = false;
>> + if (vmx->nested.need_vmcs12_sync) {
>> + if (unlikely(vmx->nested.hv_evmcs)) {
>
> Why is this marked with unlikely()?
> In L1 guest use eVMCS, we will always have this as true for vmx_vcpu_run()
> after simulating VMExit from L2 to L1. You should not have here unlikely()
> just like you don't have it in new code added to nested_vmx_run().

I think I thought "eVMCS usage is rare" but I think you're right, I'll
drop this unlikely.

>
>> + copy_vmcs12_to_enlightened(vmx);
>> + /* All fields are clean */
>> + vmx->nested.hv_evmcs->hv_clean_fields |=
>> + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
>> + } else {
>> + copy_vmcs12_to_shadow(vmx);
>> + }
>> + vmx->nested.need_vmcs12_sync = false;
>> }
>>
>> if (test_bit(VCPU_REGS_RSP, (unsigned long
>> *)&vcpu->arch.regs_dirty))
>> @@ -11281,7 +11677,7 @@ static int prepare_vmcs02(struct kvm_vcpu
>> *vcpu, struct vmcs12 *vmcs12,
>> struct vcpu_vmx *vmx = to_vmx(vcpu);
>> u32 exec_control, vmcs12_exec_ctrl;
>>
>> - if (vmx->nested.dirty_vmcs12) {
>> + if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
>> prepare_vmcs02_full(vcpu, vmcs12);
>> vmx->nested.dirty_vmcs12 = false;
>> }
>> @@ -11757,8 +12153,13 @@ static int nested_vmx_run(struct kvm_vcpu
>> *vcpu, bool launch)
>>
>> vmcs12 = get_vmcs12(vcpu);
>>
>> - if (enable_shadow_vmcs)
>> + if (vmx->nested.hv_evmcs) {
>> + copy_enlightened_to_vmcs12(vmx, vmx->nested.dirty_vmcs12);
>> + /* Enlightened VMCS doesn't have launch state */
>> + vmcs12->launch_state = !launch;
>> + } else if (enable_shadow_vmcs) {
>> copy_shadow_to_vmcs12(vmx);
>> + }
>>
>> /*
>> * The nested entry process starts with enforcing various
>> prerequisites
>> @@ -12383,8 +12784,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu
>> *vcpu, u32 exit_reason,
>> */
>> kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
>>
>> - if (enable_shadow_vmcs && exit_reason != -1)
>> - vmx->nested.sync_shadow_vmcs = true;
>> + if ((exit_reason != -1) && (enable_shadow_vmcs ||
>> vmx->nested.hv_evmcs))
>> + vmx->nested.need_vmcs12_sync = true;
>>
>> /* in case we halted in L2 */
>> vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
>> @@ -12463,12 +12864,14 @@ static void nested_vmx_entry_failure(struct
>> kvm_vcpu *vcpu,
>> struct vmcs12 *vmcs12,
>> u32 reason, unsigned long qualification)
>> {
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +
>> load_vmcs12_host_state(vcpu, vmcs12);
>> vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
>> vmcs12->exit_qualification = qualification;
>> nested_vmx_succeed(vcpu);
>> - if (enable_shadow_vmcs)
>> - to_vmx(vcpu)->nested.sync_shadow_vmcs = true;
>> + if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
>> + vmx->nested.need_vmcs12_sync = true;
>> }
>>
>> static int vmx_check_intercept(struct kvm_vcpu *vcpu,
>> --
>> 2.14.4

--
Vitaly

2018-06-15 10:35:40

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH 2/5] KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability

Liran Alon <[email protected]> writes:

> ----- [email protected] wrote:
>
>> Enlightened VMCS is opt-in. The current version does not contain all
>> fields supported by nested VMX so we must not advertise the
>> corresponding VMX features if enlightened VMCS is enabled.
>>
>> Userspace is given the enlightened VMCS version supported by KVM as
>> part of enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS. The version is to
>> be advertised to the nested hypervisor, currently done via a cpuid
>> leaf for Hyper-V.
>>
>> Suggested-by: Ladi Prosek <[email protected]>
>> Signed-off-by: Vitaly Kuznetsov <[email protected]>
>> ---
>> arch/x86/include/asm/kvm_host.h | 3 +
>> arch/x86/kvm/svm.c | 9 +++
>> arch/x86/kvm/vmx.c | 138
>> ++++++++++++++++++++++------------------
>> arch/x86/kvm/x86.c | 15 +++++
>> include/uapi/linux/kvm.h | 1 +
>> 5 files changed, 105 insertions(+), 61 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h
>> b/arch/x86/include/asm/kvm_host.h
>> index 0ebe659f2802..d7e8f7155d79 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -1095,6 +1095,9 @@ struct kvm_x86_ops {
>> int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region
>> *argp);
>>
>> int (*get_msr_feature)(struct kvm_msr_entry *entry);
>> +
>> + int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
>> + uint16_t *vmcs_version);
>> };
>>
>> struct kvm_arch_async_pf {
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index d9305f1723f5..6dc42c870565 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -7009,6 +7009,13 @@ static int svm_unregister_enc_region(struct kvm
>> *kvm,
>> return ret;
>> }
>>
>> +static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
>> + uint16_t *vmcs_version)
>> +{
>> + /* Intel-only feature */
>> + return -ENODEV;
>> +}
>> +
>> static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>> .cpu_has_kvm_support = has_svm,
>> .disabled_by_bios = is_disabled,
>> @@ -7135,6 +7142,8 @@ static struct kvm_x86_ops svm_x86_ops
>> __ro_after_init = {
>> .mem_enc_op = svm_mem_enc_op,
>> .mem_enc_reg_region = svm_register_enc_region,
>> .mem_enc_unreg_region = svm_unregister_enc_region,
>> +
>> + .nested_enable_evmcs = nested_enable_evmcs,
>> };
>>
>> static int __init svm_init(void)
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 48989f78be60..51749207cef1 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -648,6 +648,13 @@ struct nested_vmx {
>>
>> bool change_vmcs01_virtual_apic_mode;
>>
>> + /*
>> + * Enlightened VMCS has been enabled. It does not mean that L1 has
>> to
>> + * use it. However, VMX features available to L1 will be limited
>> based
>> + * on what the enlightened VMCS supports.
>> + */
>> + bool enlightened_vmcs_enabled;
>> +
>> /* L2 must run next, and mustn't decide to exit to L1. */
>> bool nested_run_pending;
>>
>> @@ -1186,6 +1193,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
>>
>> #define KVM_EVMCS_VERSION 1
>>
>> +/*
>> + * Enlightened VMCSv1 doesn't support these:
>> + *
>> + * POSTED_INTR_NV = 0x00000002,
>> + * GUEST_INTR_STATUS = 0x00000810,
>> + * APIC_ACCESS_ADDR = 0x00002014,
>> + * POSTED_INTR_DESC_ADDR = 0x00002016,
>> + * EOI_EXIT_BITMAP0 = 0x0000201c,
>> + * EOI_EXIT_BITMAP1 = 0x0000201e,
>> + * EOI_EXIT_BITMAP2 = 0x00002020,
>> + * EOI_EXIT_BITMAP3 = 0x00002022,
>> + * GUEST_PML_INDEX = 0x00000812,
>> + * PML_ADDRESS = 0x0000200e,
>> + * VM_FUNCTION_CONTROL = 0x00002018,
>> + * EPTP_LIST_ADDRESS = 0x00002024,
>> + * VMREAD_BITMAP = 0x00002026,
>> + * VMWRITE_BITMAP = 0x00002028,
>> + *
>> + * TSC_MULTIPLIER = 0x00002032,
>> + * PLE_GAP = 0x00004020,
>> + * PLE_WINDOW = 0x00004022,
>> + * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
>> + * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
>> + * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
>> + *
>> + * Currently unsupported in KVM:
>> + * GUEST_IA32_RTIT_CTL = 0x00002814,
>> + */
>> +#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
>> + PIN_BASED_VMX_PREEMPTION_TIMER)
>> +#define EVMCS1_UNSUPPORTED_2NDEXEC \
>> + (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
>> + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \
>> + SECONDARY_EXEC_APIC_REGISTER_VIRT | \
>> + SECONDARY_EXEC_ENABLE_PML | \
>> + SECONDARY_EXEC_ENABLE_VMFUNC | \
>> + SECONDARY_EXEC_SHADOW_VMCS | \
>> + SECONDARY_EXEC_TSC_SCALING | \
>> + SECONDARY_EXEC_PAUSE_LOOP_EXITING)
>> +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL
>> (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
>> +#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL
>> (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
>> +#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING)
>> +
>> #if IS_ENABLED(CONFIG_HYPERV)
>> static bool __read_mostly enlightened_vmcs = true;
>> module_param(enlightened_vmcs, bool, 0444);
>> @@ -1278,69 +1328,12 @@ static void evmcs_load(u64 phys_addr)
>>
>> static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
>> {
>> - /*
>> - * Enlightened VMCSv1 doesn't support these:
>> - *
>> - * POSTED_INTR_NV = 0x00000002,
>> - * GUEST_INTR_STATUS = 0x00000810,
>> - * APIC_ACCESS_ADDR = 0x00002014,
>> - * POSTED_INTR_DESC_ADDR = 0x00002016,
>> - * EOI_EXIT_BITMAP0 = 0x0000201c,
>> - * EOI_EXIT_BITMAP1 = 0x0000201e,
>> - * EOI_EXIT_BITMAP2 = 0x00002020,
>> - * EOI_EXIT_BITMAP3 = 0x00002022,
>> - */
>> - vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
>> - ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
>> - ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
>> - ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
>> -
>> - /*
>> - * GUEST_PML_INDEX = 0x00000812,
>> - * PML_ADDRESS = 0x0000200e,
>> - */
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML;
>> + vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL;
>> + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC;
>>
>> - /* VM_FUNCTION_CONTROL = 0x00002018, */
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
>> ~SECONDARY_EXEC_ENABLE_VMFUNC;
>> + vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
>> + vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
>>
>> - /*
>> - * EPTP_LIST_ADDRESS = 0x00002024,
>> - * VMREAD_BITMAP = 0x00002026,
>> - * VMWRITE_BITMAP = 0x00002028,
>> - */
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS;
>> -
>> - /*
>> - * TSC_MULTIPLIER = 0x00002032,
>> - */
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING;
>> -
>> - /*
>> - * PLE_GAP = 0x00004020,
>> - * PLE_WINDOW = 0x00004022,
>> - */
>> - vmcs_conf->cpu_based_2nd_exec_ctrl &=
>> ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
>> -
>> - /*
>> - * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
>> - */
>> - vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
>> -
>> - /*
>> - * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
>> - * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
>> - */
>> - vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
>> - vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
>> -
>> - /*
>> - * Currently unsupported in KVM:
>> - * GUEST_IA32_RTIT_CTL = 0x00002814,
>> - */
>
> The creation of the EVMCS1_UNSUPPORTED_* macros and the refactor to evmcs_sanitize_exec_ctrls()
> should be done in a separate patch in this series before this one.
>

Sure, will do!

>> }
>> #else /* !IS_ENABLED(CONFIG_HYPERV) */
>> static inline void evmcs_write64(unsigned long field, u64 value) {}
>> @@ -1354,6 +1347,27 @@ static inline void
>> evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
>> static inline void evmcs_touch_msr_bitmap(void) {}
>> #endif /* IS_ENABLED(CONFIG_HYPERV) */
>>
>> +static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
>> + uint16_t *vmcs_version)
>> +{
>> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +
>> + /* We don't support disabling the feature for simplicity. */
>> + if (vmx->nested.enlightened_vmcs_enabled)
>> + return 0;
>> +
>> + vmx->nested.enlightened_vmcs_enabled = true;
>> + *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
>
> Please add a comment here explaining the "<< 8) | 1" part.
>

Got it.

>> +
>> + vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
>> + vmx->nested.msrs.entry_ctls_high &=
>> ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
>> + vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
>> + vmx->nested.msrs.secondary_ctls_high &=
>> ~EVMCS1_UNSUPPORTED_2NDEXEC;
>> + vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC;
>> +
>> + return 0;
>> +}
>> +
>> static inline bool is_exception_n(u32 intr_info, u8 vector)
>> {
>> return (intr_info & (INTR_INFO_INTR_TYPE_MASK |
>> INTR_INFO_VECTOR_MASK |
>> @@ -13039,6 +13053,8 @@ static struct kvm_x86_ops vmx_x86_ops
>> __ro_after_init = {
>> .pre_enter_smm = vmx_pre_enter_smm,
>> .pre_leave_smm = vmx_pre_leave_smm,
>> .enable_smi_window = enable_smi_window,
>> +
>> + .nested_enable_evmcs = nested_enable_evmcs,
>> };
>>
>> static int __init vmx_init(void)
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index a57766b940a5..51488019dec2 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -2873,6 +2873,7 @@ int kvm_vm_ioctl_check_extension(struct kvm
>> *kvm, long ext)
>> case KVM_CAP_HYPERV_VP_INDEX:
>> case KVM_CAP_HYPERV_EVENTFD:
>> case KVM_CAP_HYPERV_TLBFLUSH:
>> + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
>> case KVM_CAP_PCI_SEGMENT:
>> case KVM_CAP_DEBUGREGS:
>> case KVM_CAP_X86_ROBUST_SINGLESTEP:
>> @@ -3650,6 +3651,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu
>> *vcpu)
>> static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
>> struct kvm_enable_cap *cap)
>> {
>> + int r;
>> + uint16_t vmcs_version;
>> + void __user *user_ptr;
>> +
>> if (cap->flags)
>> return -EINVAL;
>>
>> @@ -3662,6 +3667,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct
>> kvm_vcpu *vcpu,
>> return -EINVAL;
>> return kvm_hv_activate_synic(vcpu, cap->cap ==
>> KVM_CAP_HYPERV_SYNIC2);
>> + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
>> + r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
>> + if (!r) {
>> + user_ptr = (void __user *)(uintptr_t)cap->args[0];
>> + if (copy_to_user(user_ptr, &vmcs_version,
>> + sizeof(vmcs_version)))
>> + r = -EFAULT;
>> + }
>> + return r;
>> +
>> default:
>> return -EINVAL;
>> }
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index b6270a3b38e9..5c4b79c1af19 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -949,6 +949,7 @@ struct kvm_ppc_resize_hpt {
>> #define KVM_CAP_GET_MSR_FEATURES 153
>> #define KVM_CAP_HYPERV_EVENTFD 154
>> #define KVM_CAP_HYPERV_TLBFLUSH 155
>> +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 156
>>
>> #ifdef KVM_CAP_IRQ_ROUTING
>>
>> --
>> 2.14.4
>
> Besides above comments,
> Reviewed-By: Liran Alon <[email protected]>

Thank you for reviewing the series!

--
Vitaly