This patch series enables the nested virtualization enlightenments for
SVM. This is very similar to the enlightenments for VMX except for the
fact that there is no enlightened VMCS. For SVM, VMCB is already an
architectural in-memory data structure.
The supported enlightenments are:
Enlightened TLB Flush: If this is enabled, ASID invalidations invalidate
only gva -> hpa entries. To flush entries derived from NPT, hyper-v
provided hypercalls (HvFlushGuestPhysicalAddressSpace or
HvFlushGuestPhysicalAddressList) should be used.
Enlightened MSR bitmap(TLFS 16.5.3): "When enabled, L0 hypervisor does
not monitor the MSR bitmaps for changes. Instead, the L1 hypervisor must
invalidate the corresponding clean field after making changes to one of
the MSR bitmaps."
Direct Virtual Flush(TLFS 16.8): The hypervisor exposes hypercalls
(HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
HvFlushVirtualAddressList, and HvFlushVirtualAddressListEx) that allow
operating systems to more efficiently manage the virtual TLB. The L1
hypervisor can choose to allow its guest to use those hypercalls and
delegate the responsibility to handle them to the L0 hypervisor. This
requires the use of a partition assist page."
L2 Windows boot time was measured with and without the patch. Time was
measured from power on to the login screen and was averaged over a
consecutive 5 trials:
Without the patch: 42 seconds
With the patch: 29 seconds
--
Changes from v3
- Included definitions for software/hypervisor reserved fields in SVM
architectural data structures.
- Consolidated Hyper-V specific code into svm_onhyperv.[ch] to reduce
the "ifdefs". This change applies only to SVM, VMX is not touched and
is not in the scope of this patch series.
Changes from v2:
- Refactored the Remote TLB Flush logic into separate hyperv specific
source files (kvm_onhyperv.[ch]).
- Reverted the VMCB Clean bits macro changes as it is no longer needed.
Changes from v1:
- Move the remote TLB flush related fields from kvm_vcpu_hv and kvm_hv
to kvm_vcpu_arch and kvm_arch.
- Modify the VMCB clean mask runtime based on whether L1 hypervisor
is running on Hyper-V or not.
- Detect Hyper-V nested enlightenments based on
HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS.
- Address other minor review comments.
---
Vineeth Pillai (7):
hyperv: Detect Nested virtualization support for SVM
hyperv: SVM enlightened TLB flush support flag
KVM: x86: hyper-v: Move the remote TLB flush logic out of vmx
KVM: SVM: Software reserved fields
KVM: SVM: hyper-v: Remote TLB flush for SVM
KVM: SVM: hyper-v: Enlightened MSR-Bitmap support
KVM: SVM: hyper-v: Direct Virtual Flush support
arch/x86/include/asm/hyperv-tlfs.h | 9 ++
arch/x86/include/asm/kvm_host.h | 14 +++
arch/x86/include/asm/svm.h | 9 +-
arch/x86/include/uapi/asm/svm.h | 3 +
arch/x86/kernel/cpu/mshyperv.c | 10 +-
arch/x86/kvm/Makefile | 9 ++
arch/x86/kvm/kvm_onhyperv.c | 94 +++++++++++++++++++
arch/x86/kvm/kvm_onhyperv.h | 33 +++++++
arch/x86/kvm/svm/svm.c | 13 +++
arch/x86/kvm/svm/svm.h | 22 ++++-
arch/x86/kvm/svm/svm_onhyperv.c | 41 +++++++++
arch/x86/kvm/svm/svm_onhyperv.h | 141 +++++++++++++++++++++++++++++
arch/x86/kvm/vmx/vmx.c | 97 ++------------------
arch/x86/kvm/vmx/vmx.h | 10 --
arch/x86/kvm/x86.c | 8 ++
15 files changed, 406 insertions(+), 107 deletions(-)
create mode 100644 arch/x86/kvm/kvm_onhyperv.c
create mode 100644 arch/x86/kvm/kvm_onhyperv.h
create mode 100644 arch/x86/kvm/svm/svm_onhyperv.c
create mode 100644 arch/x86/kvm/svm/svm_onhyperv.h
--
2.25.1
Currently the remote TLB flush logic is specific to VMX.
Move it to a common place so that SVM can use it as well.
Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/include/asm/kvm_host.h | 14 +++++
arch/x86/kvm/Makefile | 5 ++
arch/x86/kvm/kvm_onhyperv.c | 94 ++++++++++++++++++++++++++++++++
arch/x86/kvm/kvm_onhyperv.h | 33 +++++++++++
arch/x86/kvm/vmx/vmx.c | 97 +++------------------------------
arch/x86/kvm/vmx/vmx.h | 10 ----
arch/x86/kvm/x86.c | 8 +++
7 files changed, 161 insertions(+), 100 deletions(-)
create mode 100644 arch/x86/kvm/kvm_onhyperv.c
create mode 100644 arch/x86/kvm/kvm_onhyperv.h
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 877a4025d8da..ed84c15d18bc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -838,6 +838,15 @@ struct kvm_vcpu_arch {
/* Protected Guests */
bool guest_state_protected;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ /*
+ * Two Dimensional paging CR3
+ * EPTP for Intel
+ * nCR3 for AMD
+ */
+ u64 tdp_pointer;
+#endif
};
struct kvm_lpage_info {
@@ -1079,6 +1088,11 @@ struct kvm_arch {
*/
spinlock_t tdp_mmu_pages_lock;
#endif /* CONFIG_X86_64 */
+
+#if IS_ENABLED(CONFIG_HYPERV)
+ int tdp_pointers_match;
+ spinlock_t tdp_pointer_lock;
+#endif
};
struct kvm_vm_stat {
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 1b4766fe1de2..694f44c8192b 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -18,6 +18,11 @@ kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
mmu/spte.o
+
+ifdef CONFIG_HYPERV
+kvm-y += kvm_onhyperv.o
+endif
+
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
kvm-$(CONFIG_KVM_XEN) += xen.o
diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c
new file mode 100644
index 000000000000..7fec60836d1d
--- /dev/null
+++ b/arch/x86/kvm/kvm_onhyperv.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/mshyperv.h>
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+
+/* check_tdp_pointer() should be under protection of tdp_pointer_lock. */
+static void check_tdp_pointer_match(struct kvm *kvm)
+{
+ u64 tdp_pointer = INVALID_PAGE;
+ bool valid_tdp = false;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!valid_tdp) {
+ tdp_pointer = vcpu->arch.tdp_pointer;
+ valid_tdp = true;
+ continue;
+ }
+
+ if (tdp_pointer != vcpu->arch.tdp_pointer) {
+ kvm->arch.tdp_pointers_match = TDP_POINTERS_MISMATCH;
+ return;
+ }
+ }
+
+ kvm->arch.tdp_pointers_match = TDP_POINTERS_MATCH;
+}
+
+static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
+ void *data)
+{
+ struct kvm_tlb_range *range = data;
+
+ return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
+ range->pages);
+}
+
+static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
+ struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
+{
+ u64 tdp_pointer = vcpu->arch.tdp_pointer;
+
+ /*
+ * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address
+ * of the base of EPT PML4 table, strip off EPT configuration
+ * information.
+ */
+ if (range)
+ return hyperv_flush_guest_mapping_range(tdp_pointer & PAGE_MASK,
+ kvm_fill_hv_flush_list_func, (void *)range);
+ else
+ return hyperv_flush_guest_mapping(tdp_pointer & PAGE_MASK);
+}
+
+int kvm_hv_remote_flush_tlb_with_range(struct kvm *kvm,
+ struct kvm_tlb_range *range)
+{
+ struct kvm_vcpu *vcpu;
+ int ret = 0, i;
+
+ spin_lock(&kvm->arch.tdp_pointer_lock);
+
+ if (kvm->arch.tdp_pointers_match == TDP_POINTERS_CHECK)
+ check_tdp_pointer_match(kvm);
+
+ if (kvm->arch.tdp_pointers_match != TDP_POINTERS_MATCH) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /* If tdp_pointer is invalid pointer, bypass flush request. */
+ if (VALID_PAGE(vcpu->arch.tdp_pointer))
+ ret |= __hv_remote_flush_tlb_with_range(
+ kvm, vcpu, range);
+ }
+ } else {
+ ret = __hv_remote_flush_tlb_with_range(kvm,
+ kvm_get_vcpu(kvm, 0), range);
+ }
+
+ spin_unlock(&kvm->arch.tdp_pointer_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_hv_remote_flush_tlb_with_range);
+
+int kvm_hv_remote_flush_tlb(struct kvm *kvm)
+{
+ return kvm_hv_remote_flush_tlb_with_range(kvm, NULL);
+}
+EXPORT_SYMBOL_GPL(kvm_hv_remote_flush_tlb);
diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h
new file mode 100644
index 000000000000..77415b173dc7
--- /dev/null
+++ b/arch/x86/kvm/kvm_onhyperv.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V.
+ */
+
+#ifndef __ARCH_X86_KVM_KVM_ONHYPERV_H__
+#define __ARCH_X86_KVM_KVM_ONHYPERV_H__
+
+#if IS_ENABLED(CONFIG_HYPERV)
+
+enum tdp_pointers_status {
+ TDP_POINTERS_CHECK = 0,
+ TDP_POINTERS_MATCH = 1,
+ TDP_POINTERS_MISMATCH = 2
+};
+
+static inline void kvm_update_arch_tdp_pointer(struct kvm_vcpu *vcpu,
+ u64 tdp_pointer)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ spin_lock(&kvm->arch.tdp_pointer_lock);
+ vcpu->arch.tdp_pointer = tdp_pointer;
+ kvm->arch.tdp_pointers_match = TDP_POINTERS_CHECK;
+ spin_unlock(&kvm->arch.tdp_pointer_lock);
+}
+
+int kvm_hv_remote_flush_tlb(struct kvm *kvm);
+int kvm_hv_remote_flush_tlb_with_range(struct kvm *kvm,
+ struct kvm_tlb_range *range);
+#endif
+#endif
+
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 50810d471462..8611d1711300 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -51,6 +51,7 @@
#include "cpuid.h"
#include "evmcs.h"
#include "hyperv.h"
+#include "kvm_onhyperv.h"
#include "irq.h"
#include "kvm_cache_regs.h"
#include "lapic.h"
@@ -472,83 +473,6 @@ static const u32 vmx_uret_msrs_list[] = {
static bool __read_mostly enlightened_vmcs = true;
module_param(enlightened_vmcs, bool, 0444);
-/* check_ept_pointer() should be under protection of ept_pointer_lock. */
-static void check_ept_pointer_match(struct kvm *kvm)
-{
- struct kvm_vcpu *vcpu;
- u64 tmp_eptp = INVALID_PAGE;
- int i;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!VALID_PAGE(tmp_eptp)) {
- tmp_eptp = to_vmx(vcpu)->ept_pointer;
- } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) {
- to_kvm_vmx(kvm)->ept_pointers_match
- = EPT_POINTERS_MISMATCH;
- return;
- }
- }
-
- to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH;
-}
-
-static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
- void *data)
-{
- struct kvm_tlb_range *range = data;
-
- return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
- range->pages);
-}
-
-static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
- struct kvm_vcpu *vcpu, struct kvm_tlb_range *range)
-{
- u64 ept_pointer = to_vmx(vcpu)->ept_pointer;
-
- /*
- * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs address
- * of the base of EPT PML4 table, strip off EPT configuration
- * information.
- */
- if (range)
- return hyperv_flush_guest_mapping_range(ept_pointer & PAGE_MASK,
- kvm_fill_hv_flush_list_func, (void *)range);
- else
- return hyperv_flush_guest_mapping(ept_pointer & PAGE_MASK);
-}
-
-static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
- struct kvm_tlb_range *range)
-{
- struct kvm_vcpu *vcpu;
- int ret = 0, i;
-
- spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
-
- if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK)
- check_ept_pointer_match(kvm);
-
- if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) {
- kvm_for_each_vcpu(i, vcpu, kvm) {
- /* If ept_pointer is invalid pointer, bypass flush request. */
- if (VALID_PAGE(to_vmx(vcpu)->ept_pointer))
- ret |= __hv_remote_flush_tlb_with_range(
- kvm, vcpu, range);
- }
- } else {
- ret = __hv_remote_flush_tlb_with_range(kvm,
- kvm_get_vcpu(kvm, 0), range);
- }
-
- spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
- return ret;
-}
-static int hv_remote_flush_tlb(struct kvm *kvm)
-{
- return hv_remote_flush_tlb_with_range(kvm, NULL);
-}
-
static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
{
struct hv_enlightened_vmcs *evmcs;
@@ -3115,13 +3039,10 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd,
eptp = construct_eptp(vcpu, pgd, pgd_level);
vmcs_write64(EPT_POINTER, eptp);
- if (kvm_x86_ops.tlb_remote_flush) {
- spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock);
- to_vmx(vcpu)->ept_pointer = eptp;
- to_kvm_vmx(kvm)->ept_pointers_match
- = EPT_POINTERS_CHECK;
- spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock);
- }
+#if IS_ENABLED(CONFIG_HYPERV)
+ if (kvm_x86_ops.tlb_remote_flush)
+ kvm_update_arch_tdp_pointer(vcpu, eptp);
+#endif
if (!enable_unrestricted_guest && !is_paging(vcpu))
guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
@@ -6989,8 +6910,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
vmx->pi_desc.nv = POSTED_INTR_VECTOR;
vmx->pi_desc.sn = 1;
- vmx->ept_pointer = INVALID_PAGE;
-
return 0;
free_vmcs:
@@ -7007,8 +6926,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
static int vmx_vm_init(struct kvm *kvm)
{
- spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock);
-
if (!ple_gap)
kvm->arch.pause_in_guest = true;
@@ -7818,9 +7735,9 @@ static __init int hardware_setup(void)
#if IS_ENABLED(CONFIG_HYPERV)
if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH
&& enable_ept) {
- vmx_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
+ vmx_x86_ops.tlb_remote_flush = kvm_hv_remote_flush_tlb;
vmx_x86_ops.tlb_remote_flush_with_range =
- hv_remote_flush_tlb_with_range;
+ kvm_hv_remote_flush_tlb_with_range;
}
#endif
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 89da5e1251f1..d2e2ab46f5bb 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -325,7 +325,6 @@ struct vcpu_vmx {
*/
u64 msr_ia32_feature_control;
u64 msr_ia32_feature_control_valid_bits;
- u64 ept_pointer;
struct pt_desc pt_desc;
struct lbr_desc lbr_desc;
@@ -338,21 +337,12 @@ struct vcpu_vmx {
} shadow_msr_intercept;
};
-enum ept_pointers_status {
- EPT_POINTERS_CHECK = 0,
- EPT_POINTERS_MATCH = 1,
- EPT_POINTERS_MISMATCH = 2
-};
-
struct kvm_vmx {
struct kvm kvm;
unsigned int tss_addr;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
-
- enum ept_pointers_status ept_pointers_match;
- spinlock_t ept_pointer_lock;
};
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2a20ce60152e..64a6a4a5d1b1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10115,6 +10115,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.pending_external_vector = -1;
vcpu->arch.preempted_in_kernel = false;
+#if IS_ENABLED(CONFIG_HYPERV)
+ vcpu->arch.tdp_pointer = INVALID_PAGE;
+#endif
+
r = static_call(kvm_x86_vcpu_create)(vcpu);
if (r)
goto free_guest_fpu;
@@ -10498,6 +10502,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.guest_can_read_msr_platform_info = true;
+#if IS_ENABLED(CONFIG_HYPERV)
+ spin_lock_init(&kvm->arch.tdp_pointer_lock);
+#endif
+
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
--
2.25.1
SVM added support for certain reserved fields to be used by
software or hypervisor. Add the following reserved fields:
- VMCB offset 0x3e0 - 0x3ff
- Clean bit 31
- SVM intercept exit code 0xf0000000
Later patches will make use of this for supporting Hyper-V
nested virtualization enhancements.
Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/include/asm/svm.h | 9 +++++++--
arch/x86/include/uapi/asm/svm.h | 3 +++
arch/x86/kvm/svm/svm.h | 17 +++++++++++++++--
3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 1c561945b426..8dac318bd6f9 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -156,6 +156,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u64 avic_physical_id; /* Offset 0xf8 */
u8 reserved_7[8];
u64 vmsa_pa; /* Used for an SEV-ES guest */
+ u8 reserved_8[720];
+ /*
+ * Offset 0x3e0, 32 bytes reserved
+ * for use by hypervisor/software.
+ */
+ u8 reserved_sw[32];
};
@@ -312,7 +318,7 @@ struct ghcb {
#define EXPECTED_VMCB_SAVE_AREA_SIZE 1032
-#define EXPECTED_VMCB_CONTROL_AREA_SIZE 272
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
#define EXPECTED_GHCB_SIZE PAGE_SIZE
static inline void __unused_size_checks(void)
@@ -324,7 +330,6 @@ static inline void __unused_size_checks(void)
struct vmcb {
struct vmcb_control_area control;
- u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
struct vmcb_save_area save;
} __packed;
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 554f75fe013c..efa969325ede 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -110,6 +110,9 @@
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
+/* Exit code reserved for hypervisor/software use */
+#define SVM_EXIT_SW 0xf0000000
+
#define SVM_EXIT_ERR -1
#define SVM_EXIT_REASONS \
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 39e071fdab0c..4e073b88c8cb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -33,6 +33,11 @@ static const u32 host_save_user_msrs[] = {
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
+/*
+ * Clean bits in VMCB.
+ * VMCB_ALL_CLEAN_MASK might also need to
+ * be updated if this enum is modified.
+ */
enum {
VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
pause filter count */
@@ -50,9 +55,17 @@ enum {
* AVIC PHYSICAL_TABLE pointer,
* AVIC LOGICAL_TABLE pointer
*/
- VMCB_DIRTY_MAX,
+ VMCB_SW = 31, /* Reserved for hypervisor/software use */
};
+#define VMCB_ALL_CLEAN_MASK ( \
+ (1U << VMCB_INTERCEPTS) | (1U << VMCB_PERM_MAP) | \
+ (1U << VMCB_ASID) | (1U << VMCB_INTR) | \
+ (1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) | \
+ (1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) | \
+ (1U << VMCB_LBR) | (1U << VMCB_AVIC) | \
+ (1U << VMCB_SW))
+
/* TPR and CR2 are always written before VMRUN */
#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
@@ -230,7 +243,7 @@ static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
{
- vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
+ vmcb->control.clean = VMCB_ALL_CLEAN_MASK
& ~VMCB_ALWAYS_DIRTY_MASK;
}
--
2.25.1
Enlightened MSR-Bitmap as per TLFS:
"The L1 hypervisor may collaborate with the L0 hypervisor to make MSR
accesses more efficient. It can enable enlightened MSR bitmaps by setting
the corresponding field in the enlightened VMCS to 1. When enabled, L0
hypervisor does not monitor the MSR bitmaps for changes. Instead, the L1
hypervisor must invalidate the corresponding clean field after making
changes to one of the MSR bitmaps."
Enable this for SVM.
Related VMX changes:
commit ceef7d10dfb6 ("KVM: x86: VMX: hyper-v: Enlightened MSR-Bitmap support")
Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/kvm/svm/svm.c | 3 +++
arch/x86/kvm/svm/svm.h | 5 +++++
arch/x86/kvm/svm/svm_onhyperv.h | 28 ++++++++++++++++++++++++++++
3 files changed, 36 insertions(+)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 62b618ebcb25..a042f6c4ecb6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -676,6 +676,9 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
msrpm[offset] = tmp;
+
+ svm_hv_vmcb_dirty_nested_enlightenments(vcpu);
+
}
void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 4e073b88c8cb..33c1a77eed58 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -247,6 +247,11 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
& ~VMCB_ALWAYS_DIRTY_MASK;
}
+static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
+{
+ return (vmcb->control.clean & (1 << bit));
+}
+
static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
{
vmcb->control.clean &= ~(1 << bit);
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 35b4bbbc539a..da6c6a0a70e0 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -31,6 +31,11 @@ struct __packed hv_enlightenments {
u64 reserved;
};
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW
+
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
{
struct hv_enlightenments *hve =
@@ -58,6 +63,24 @@ static inline void svm_hv_update_tdp_pointer(struct kvm_vcpu *vcpu,
if (kvm_x86_ops.tlb_remote_flush)
kvm_update_arch_tdp_pointer(vcpu, cr3);
}
+
+static inline void svm_hv_vmcb_dirty_nested_enlightenments(
+ struct kvm_vcpu *vcpu)
+{
+ struct vmcb *vmcb = to_svm(vcpu)->vmcb;
+ struct hv_enlightenments *hve =
+ (struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+ /*
+ * vmcb can be NULL if called during early vcpu init.
+ * And its okay not to mark vmcb dirty during vcpu init
+ * as we mark it dirty unconditionally towards end of vcpu
+ * init phase.
+ */
+ if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+ hve->hv_enlightenments_control.msr_bitmap)
+ vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+}
#else
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
@@ -72,6 +95,11 @@ static inline void svm_hv_update_tdp_pointer(struct kvm_vcpu *vcpu,
unsigned long cr3)
{
}
+
+static inline void svm_hv_vmcb_dirty_nested_enlightenments(
+ struct kvm_vcpu *vcpu)
+{
+}
#endif /* CONFIG_HYPERV */
#endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */
--
2.25.1
Enable remote TLB flush for SVM.
Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/kvm/svm/svm.c | 8 ++++
arch/x86/kvm/svm/svm_onhyperv.h | 77 +++++++++++++++++++++++++++++++++
2 files changed, 85 insertions(+)
create mode 100644 arch/x86/kvm/svm/svm_onhyperv.h
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index baee91c1e936..62b618ebcb25 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -43,6 +43,8 @@
#include "svm.h"
#include "svm_ops.h"
+#include "svm_onhyperv.h"
+
#define __ex(x) __kvm_handle_fault_on_reboot(x)
MODULE_AUTHOR("Qumranet");
@@ -997,6 +999,8 @@ static __init int svm_hardware_setup(void)
kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
+ svm_hv_hardware_setup();
+
if (nrips) {
if (!boot_cpu_has(X86_FEATURE_NRIPS))
nrips = false;
@@ -1274,6 +1278,8 @@ static void init_vmcb(struct vcpu_svm *svm)
}
}
+ svm_hv_init_vmcb(svm->vmcb);
+
vmcb_mark_all_dirty(svm->vmcb);
enable_gif(svm);
@@ -3967,6 +3973,8 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root,
svm->vmcb->control.nested_cr3 = cr3;
vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
+ svm_hv_update_tdp_pointer(vcpu, cr3);
+
/* Loading L2's CR3 is handled by enter_svm_guest_mode. */
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
return;
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
new file mode 100644
index 000000000000..35b4bbbc539a
--- /dev/null
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V for SVM.
+ */
+
+#ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
+#define __ARCH_X86_KVM_SVM_ONHYPERV_H__
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#include <asm/mshyperv.h>
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+
+static struct kvm_x86_ops svm_x86_ops;
+
+/*
+ * Hyper-V uses the software reserved 32 bytes in VMCB
+ * control area to expose SVM enlightenments to guests.
+ */
+struct __packed hv_enlightenments {
+ struct __packed hv_enlightenments_control {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 enlightened_npt_tlb: 1;
+ u32 reserved:29;
+ } hv_enlightenments_control;
+ u32 hv_vp_id;
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 reserved;
+};
+
+static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+{
+ struct hv_enlightenments *hve =
+ (struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+ if (npt_enabled &&
+ ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
+ hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
+}
+
+static inline void svm_hv_hardware_setup(void)
+{
+ if (npt_enabled &&
+ ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
+ pr_info("kvm: Hyper-V enlightened NPT TLB flush enabled\n");
+ svm_x86_ops.tlb_remote_flush = kvm_hv_remote_flush_tlb;
+ svm_x86_ops.tlb_remote_flush_with_range =
+ kvm_hv_remote_flush_tlb_with_range;
+ }
+}
+
+static inline void svm_hv_update_tdp_pointer(struct kvm_vcpu *vcpu,
+ unsigned long cr3)
+{
+ if (kvm_x86_ops.tlb_remote_flush)
+ kvm_update_arch_tdp_pointer(vcpu, cr3);
+}
+#else
+
+static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+{
+}
+
+static inline void svm_hv_hardware_setup(void)
+{
+}
+
+static inline void svm_hv_update_tdp_pointer(struct kvm_vcpu *vcpu,
+ unsigned long cr3)
+{
+}
+#endif /* CONFIG_HYPERV */
+
+#endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */
--
2.25.1
From Hyper-V TLFS:
"The hypervisor exposes hypercalls (HvFlushVirtualAddressSpace,
HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressList, and
HvFlushVirtualAddressListEx) that allow operating systems to more
efficiently manage the virtual TLB. The L1 hypervisor can choose to
allow its guest to use those hypercalls and delegate the responsibility
to handle them to the L0 hypervisor. This requires the use of a
partition assist page."
Add the Direct Virtual Flush support for SVM.
Related VMX changes:
commit 6f6a657c9998 ("KVM/Hyper-V/VMX: Add direct tlb flush support")
Signed-off-by: Vineeth Pillai <[email protected]>
---
arch/x86/kvm/Makefile | 4 ++++
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/svm/svm_onhyperv.c | 41 +++++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm_onhyperv.h | 36 +++++++++++++++++++++++++++++
4 files changed, 83 insertions(+)
create mode 100644 arch/x86/kvm/svm/svm_onhyperv.c
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 694f44c8192b..de1ff99a7095 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -30,6 +30,10 @@ kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
+ifdef CONFIG_HYPERV
+kvm-amd-y += svm/svm_onhyperv.o
+endif
+
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a042f6c4ecb6..f80b805db256 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3863,6 +3863,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
}
svm->vmcb->save.cr2 = vcpu->arch.cr2;
+ svm_hv_update_vp_id(svm->vmcb, vcpu);
+
/*
* Run with all-zero DR6 unless needed, so that we can get the exact cause
* of a #DB.
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
new file mode 100644
index 000000000000..3281856ebd94
--- /dev/null
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V for SVM.
+ */
+
+#include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
+
+#include <asm/mshyperv.h>
+
+#include "svm.h"
+#include "svm_ops.h"
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+#include "svm_onhyperv.h"
+
+int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+ struct hv_enlightenments *hve;
+ struct hv_partition_assist_pg **p_hv_pa_pg =
+ &to_kvm_hv(vcpu->kvm)->hv_pa_pg;
+
+ if (!*p_hv_pa_pg)
+ *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+ if (!*p_hv_pa_pg)
+ return -ENOMEM;
+
+ hve = (struct hv_enlightenments *)to_svm(vcpu)->vmcb->control.reserved_sw;
+
+ hve->partition_assist_page = __pa(*p_hv_pa_pg);
+ hve->hv_vm_id = (unsigned long)vcpu->kvm;
+ if (!hve->hv_enlightenments_control.nested_flush_hypercall) {
+ hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+ vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+ }
+
+ return 0;
+}
+
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index da6c6a0a70e0..d9279d93ea69 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -36,6 +36,8 @@ struct __packed hv_enlightenments {
*/
#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW
+int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu);
+
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
{
struct hv_enlightenments *hve =
@@ -55,6 +57,23 @@ static inline void svm_hv_hardware_setup(void)
svm_x86_ops.tlb_remote_flush_with_range =
kvm_hv_remote_flush_tlb_with_range;
}
+
+ if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) {
+ int cpu;
+
+ pr_info("kvm: Hyper-V Direct TLB Flush enabled\n");
+ for_each_online_cpu(cpu) {
+ struct hv_vp_assist_page *vp_ap =
+ hv_get_vp_assist_page(cpu);
+
+ if (!vp_ap)
+ continue;
+
+ vp_ap->nested_control.features.directhypercall = 1;
+ }
+ svm_x86_ops.enable_direct_tlbflush =
+ hv_enable_direct_tlbflush;
+ }
}
static inline void svm_hv_update_tdp_pointer(struct kvm_vcpu *vcpu,
@@ -81,6 +100,18 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
hve->hv_enlightenments_control.msr_bitmap)
vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
}
+
+static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
+ struct kvm_vcpu *vcpu)
+{
+ struct hv_enlightenments *hve =
+ (struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+ if (hve->hv_vp_id != to_hv_vcpu(vcpu)->vp_index) {
+ hve->hv_vp_id = to_hv_vcpu(vcpu)->vp_index;
+ vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+ }
+}
#else
static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
@@ -100,6 +131,11 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
struct kvm_vcpu *vcpu)
{
}
+
+static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
+ struct kvm_vcpu *vcpu)
+{
+}
#endif /* CONFIG_HYPERV */
#endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */
--
2.25.1