This series allows guests to use the MSR_IA32_SPEC_CTRL and
MSR_IA32_PRED_CMD model specific registers that were added as mitigations
for CVE-2017-5715.
These are only the KVM specific parts of the fix. It does *not* yet
include any protection for reading host memory from the guest, because
that would be done in the same way as the rest of Linux. So there is no
IBRS *usage* here, no retpolines, no stuffing of the return stack buffer.
(KVM already includes a fix to clear all registers on vmexit, which is
enough to block Google Project Zero's PoC exploit).
However, I am including the changes to use IBPB (indirect branch
predictor barrier) if available. That occurs only when there is a VCPU
switch on a physical CPU, thus it has a small impact on performance.
The patches are a bit hackish because the relevant cpufeatures have
not been included yet, and because I wanted to make the patches easier
to backport to distro kernels if desired, but I would still like to
have them in 4.16.
Please review. The interdiff from v1 is at the end of this cover letter.
Thanks,
Paolo
Paolo Bonzini (6):
KVM: x86: add SPEC_CTRL and IBPB_SUPPORT accessors
x86/msr: add definitions for indirect branch predictor MSRs
kvm: vmx: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to the guest
KVM: SVM: fix comment
kvm: svm: pass MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD down to guest
KVM: x86: add SPEC_CTRL and IBPB_SUPPORT to MSR and CPUID lists
Tim Chen (1):
kvm: vmx: Set IBPB when running a different VCPU
Tom Lendacky (1):
x86/svm: Set IBPB when running a different VCPU
arch/x86/include/asm/msr-index.h | 9 ++++-
arch/x86/kvm/cpuid.c | 27 ++++++++++---
arch/x86/kvm/cpuid.h | 22 +++++++++++
arch/x86/kvm/svm.c | 83 +++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/vmx.c | 59 ++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
6 files changed, 193 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ec08f1d8d39b..828a03425571 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -39,11 +39,6 @@
/* Intel MSRs. Some also available on other CPUs */
-#define MSR_IA32_SPEC_CTRL 0x00000048
-
-#define MSR_IA32_PRED_CMD 0x00000049
-#define FEATURE_SET_IBPB (1UL << 0)
-
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
@@ -469,8 +464,15 @@
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
+
+#define MSR_IA32_SPEC_CTRL 0x00000048
+#define SPEC_CTRL_FEATURE_DISABLE_IBRS (0 << 0)
+#define SPEC_CTRL_FEATURE_ENABLE_IBRS (1 << 0)
+
+#define MSR_IA32_PRED_CMD 0x00000049
+#define PRED_CMD_IBPB (1UL << 0)
+
#define MSR_IA32_BNDCFGS 0x00000d90
-
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
#define MSR_IA32_XSS 0x00000da0
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index dd744d896cec..c249d5f599e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1738,7 +1738,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
* svm_vcpu_load; block speculative execution.
*/
if (have_ibpb_support)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1776,7 +1776,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
if (have_ibpb_support)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}
avic_vcpu_load(vcpu, cpu);
@@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->nested.vm_cr_msr;
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
msr_info->data = svm->spec_ctrl;
break;
case MSR_IA32_UCODE_REV:
@@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
svm->spec_ctrl = data;
break;
case MSR_IA32_APICBASE:
@@ -4996,6 +5004,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
local_irq_enable();
+ /*
+ * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+ * before vmentry.
+ */
if (have_spec_ctrl && svm->spec_ctrl != 0)
wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
@@ -5077,6 +5089,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (svm->spec_ctrl != 0)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
}
+ /*
+ * Speculative execution past the above wrmsrl might encounter
+ * an indirect branch and use guest-controlled contents of the
+ * indirect branch predictor; block it.
+ */
+ asm("lfence");
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bf127c570675..ef2681fa568a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2376,7 +2376,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
if (have_spec_ctrl)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}
if (!already_loaded) {
@@ -3347,6 +3347,7 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
*/
static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
switch (msr_info->index) {
@@ -3358,8 +3359,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_GS_BASE);
break;
case MSR_KERNEL_GS_BASE:
- vmx_load_host_state(to_vmx(vcpu));
- msr_info->data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
+ vmx_load_host_state(vmx);
+ msr_info->data = vmx->msr_guest_kernel_gs_base;
break;
#endif
case MSR_EFER:
@@ -3368,7 +3369,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = guest_read_tsc(vcpu);
break;
case MSR_IA32_SPEC_CTRL:
- msr_info->data = to_vmx(vcpu)->spec_ctrl;
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
+ msr_info->data = to_vmx(vcpu)->spec_ctrl;
break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
@@ -3388,13 +3393,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_MCG_EXT_CTL:
if (!msr_info->host_initiated &&
- !(to_vmx(vcpu)->msr_ia32_feature_control &
+ !(vmx->msr_ia32_feature_control &
FEATURE_CONTROL_LMCE))
return 1;
msr_info->data = vcpu->arch.mcg_ext_ctl;
break;
case MSR_IA32_FEATURE_CONTROL:
- msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
+ msr_info->data = vmx->msr_ia32_feature_control;
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
if (!nested_vmx_allowed(vcpu))
@@ -3443,7 +3448,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
/* Otherwise falls through */
default:
- msr = find_msr_entry(to_vmx(vcpu), msr_info->index);
+ msr = find_msr_entry(vmx, msr_info->index);
if (msr) {
msr_info->data = msr->data;
break;
@@ -3510,7 +3515,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_write_tsc(vcpu, msr_info);
break;
case MSR_IA32_SPEC_CTRL:
- to_vmx(vcpu)->spec_ctrl = msr_info->data;
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
+ to_vmx(vcpu)->spec_ctrl = data;
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -4046,7 +4046,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
* vmx_vcpu_load; block speculative execution.
*/
if (have_spec_ctrl)
- wrmsrl(MSR_IA32_PRED_CMD, FEATURE_SET_IBPB);
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}
static void vmx_nested_free_vmcs02(struct vcpu_vmx *vmx)
@@ -9629,13 +9638,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
pt_guest_enter(vmx);
- if (have_spec_ctrl && vmx->spec_ctrl != 0)
- wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
-
atomic_switch_perf_msrs(vmx);
vmx_arm_hv_timer(vcpu);
+ /*
+ * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+ * before vmentry.
+ */
+ if (have_spec_ctrl && vmx->spec_ctrl != 0)
+ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -9744,9 +9757,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (have_spec_ctrl) {
rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
- if (vmx->spec_ctrl)
+ if (vmx->spec_ctrl != 0)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
}
+ /*
+ * Speculative execution past the above wrmsrl might encounter
+ * an indirect branch and use guest-controlled contents of the
+ * indirect branch predictor; block it.
+ */
+ asm("lfence");
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (vmx->host_debugctlmsr)
--
1.8.3.1
These MSRs are available if the CPU features SPEC_CTRL
(CPUID(EAX=7,ECX=0).EDX[26]) is present. The PRED_CMD MSR
is also available if the CPU feature IBPB_SUPPORT
(CPUID(EAX=0x80000008).EBX[12]) is present.
KVM will soon start using PRED_CMD and will make SPEC_CTRL
available to guests.
Reviewed-by: Liran Alon <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/include/asm/msr-index.h | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 03ffde6217d0..828a03425571 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -464,8 +464,15 @@
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
-#define MSR_IA32_BNDCFGS 0x00000d90
+#define MSR_IA32_SPEC_CTRL 0x00000048
+#define SPEC_CTRL_FEATURE_DISABLE_IBRS (0 << 0)
+#define SPEC_CTRL_FEATURE_ENABLE_IBRS (1 << 0)
+
+#define MSR_IA32_PRED_CMD 0x00000049
+#define PRED_CMD_IBPB (1UL << 0)
+
+#define MSR_IA32_BNDCFGS 0x00000d90
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
#define MSR_IA32_XSS 0x00000da0
--
1.8.3.1
Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is important
for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore guest
IBRS on VM entry and set it to 0 on VM exit (because Linux does not use
it yet).
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm.c | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 31ace8d7774a..934a21e02e03 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -183,6 +183,8 @@ struct vcpu_svm {
u64 gs_base;
} host;
+ u64 spec_ctrl;
+
u32 *msrpm;
ulong nmi_iret_rip;
@@ -248,6 +250,8 @@ struct amd_svm_iommu_ir {
{ .index = MSR_CSTAR, .always = true },
{ .index = MSR_SYSCALL_MASK, .always = true },
#endif
+ { .index = MSR_IA32_SPEC_CTRL, .always = true },
+ { .index = MSR_IA32_PRED_CMD, .always = true },
{ .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
{ .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
{ .index = MSR_IA32_LASTINTFROMIP, .always = false },
@@ -283,6 +287,8 @@ struct amd_svm_iommu_ir {
/* enable/disable Virtual GIF */
static int vgif = true;
module_param(vgif, int, 0444);
+
+static bool __read_mostly have_spec_ctrl;
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
@@ -1135,6 +1141,17 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n");
}
+ /*
+ * FIXME: this is only needed until SPEC_CTRL is supported
+ * by upstream Linux in cpufeatures, then it can be replaced
+ * with static_cpu_has.
+ */
+ have_spec_ctrl = cpu_has_spec_ctrl();
+ if (have_spec_ctrl)
+ pr_info("kvm: SPEC_CTRL available\n");
+ else
+ pr_info("kvm: SPEC_CTRL not available\n");
+
return 0;
err:
@@ -3599,6 +3616,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_VM_CR:
msr_info->data = svm->nested.vm_cr_msr;
break;
+ case MSR_IA32_SPEC_CTRL:
+ msr_info->data = svm->spec_ctrl;
+ break;
case MSR_IA32_UCODE_REV:
msr_info->data = 0x01000065;
break;
@@ -3754,6 +3774,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
+ case MSR_IA32_SPEC_CTRL:
+ svm->spec_ctrl = data;
+ break;
case MSR_IA32_APICBASE:
if (kvm_vcpu_apicv_active(vcpu))
avic_update_vapic_bar(to_svm(vcpu), data);
@@ -4942,6 +4965,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
local_irq_enable();
+ /*
+ * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+ * before vmentry.
+ */
+ if (have_spec_ctrl && svm->spec_ctrl != 0)
+ wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+
asm volatile (
"push %%" _ASM_BP "; \n\t"
"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -5015,6 +5045,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ if (have_spec_ctrl) {
+ rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ if (svm->spec_ctrl != 0)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ }
+ /*
+ * Speculative execution past the above wrmsrl might encounter
+ * an indirect branch and use guest-controlled contents of the
+ * indirect branch predictor; block it.
+ */
+ asm("lfence");
+
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, svm->host.gs_base);
#else
--
1.8.3.1
MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
Check that against host CPUID or guest CPUID, respectively for
host-initiated and guest-initiated accesses.
Suggested-by: Jim Mattson <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
I still wanted to ack Jim's improvement.
arch/x86/kvm/svm.c | 8 ++++++++
arch/x86/kvm/vmx.c | 8 ++++++++
2 files changed, 16 insertions(+)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 97126c2bd663..3a646580d7c5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = svm->nested.vm_cr_msr;
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
msr_info->data = svm->spec_ctrl;
break;
case MSR_IA32_UCODE_REV:
@@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
svm->spec_ctrl = data;
break;
case MSR_IA32_APICBASE:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 49b4a2d61603..42bc7ee293e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = guest_read_tsc(vcpu);
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
msr_info->data = to_vmx(vcpu)->spec_ctrl;
break;
case MSR_IA32_SYSENTER_CS:
@@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_write_tsc(vcpu, msr_info);
break;
case MSR_IA32_SPEC_CTRL:
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
+ return 1;
to_vmx(vcpu)->spec_ctrl = data;
break;
case MSR_IA32_CR_PAT:
--
1.8.3.1
Expose them to userspace, now that guests can use them.
I am not adding cpufeatures here to avoid having a kernel
that shows spec_ctrl in /proc/cpuinfo and actually has no
support whatsoever for IBRS/IBPB. Keep the ugly special-casing
for now, and clean it up once the generic arch/x86/ code
learns about them.
Reviewed-by: Liran Alon <[email protected]>
Reviewed-by: Jim Mattson <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/cpuid.c | 24 +++++++++++++++++++++---
arch/x86/kvm/x86.c | 1 +
2 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 767af697c20c..5f43a5940275 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -397,7 +397,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
- KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+ KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS) |
+ KF(SPEC_CTRL) | KF(STIBP);
+
+ /* cpuid 0x80000008.edx */
+ const u32 kvm_cpuid_80000008_ebx_x86_features =
+ KF(IBPB_SUPPORT);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -483,7 +488,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
- entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
+ /*
+ * FIXME: the special casing of SPEC_CTRL and STIBP
+ * can be removed once they become regular
+ * cpufeatures.
+ */
+ entry->edx &= (
+ get_scattered_cpuid_leaf(7, 0, CPUID_EDX) |
+ KF(SPEC_CTRL) | KF(STIBP));
} else {
entry->ebx = 0;
entry->ecx = 0;
@@ -651,7 +663,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!g_phys_as)
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
- entry->ebx = entry->edx = 0;
+ /*
+ * FIXME: mask against cpufeatures, with
+ * get_scattered_cpuid_leaf(0x80000008, 0, CPUID_EBX),
+ * once IBPB_SUPPORT becomes a regular cpufeature.
+ */
+ entry->ebx &= kvm_cpuid_80000008_ebx_x86_features;
+ entry->edx = 0;
break;
}
case 0x80000019:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index daa1918031df..4abb37d9f4d8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1032,6 +1032,7 @@ unsigned int kvm_get_pt_addr_cnt(void)
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+ MSR_IA32_SPEC_CTRL,
};
static unsigned num_msrs_to_save;
--
1.8.3.1
From: Tom Lendacky <[email protected]>
Set IBPB (Indirect Branch Prediction Barrier) when the current CPU is
going to run a VCPU different from what was previously run. Nested
virtualization uses the same VMCB for the second level guest, but the
L1 hypervisor should be using IBRS to protect itself.
Signed-off-by: Tom Lendacky <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm.c | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 934a21e02e03..97126c2bd663 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -289,6 +289,7 @@ struct amd_svm_iommu_ir {
module_param(vgif, int, 0444);
static bool __read_mostly have_spec_ctrl;
+static bool __read_mostly have_ibpb_support;
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
@@ -540,6 +541,7 @@ struct svm_cpu_data {
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+ struct vmcb *current_vmcb;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -1151,6 +1153,11 @@ static __init int svm_hardware_setup(void)
pr_info("kvm: SPEC_CTRL available\n");
else
pr_info("kvm: SPEC_CTRL not available\n");
+ have_ibpb_support = have_spec_ctrl || cpu_has_ibpb_support();
+ if (have_ibpb_support)
+ pr_info("kvm: IBPB_SUPPORT available\n");
+ else
+ pr_info("kvm: IBPB_SUPPORT not available\n");
return 0;
@@ -1725,11 +1732,19 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
+
+ /*
+ * The VMCB could be recycled, causing a false negative in
+ * svm_vcpu_load; block speculative execution.
+ */
+ if (have_ibpb_support)
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int i;
if (unlikely(cpu != vcpu->cpu)) {
@@ -1758,6 +1773,12 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ if (sd->current_vmcb != svm->vmcb) {
+ sd->current_vmcb = svm->vmcb;
+ if (have_ibpb_support)
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
+ }
+
avic_vcpu_load(vcpu, cpu);
}
@@ -2798,6 +2819,11 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
if (!nested_vmcb)
return 1;
+ /*
+ * No need for IBPB here, the L1 hypervisor should be running with
+ * IBRS=1 and inserts one already when switching L2 VMs.
+ */
+
/* Exit Guest-Mode */
leave_guest_mode(&svm->vcpu);
svm->nested.vmcb = 0;
@@ -3061,6 +3087,11 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
if (!nested_vmcb)
return false;
+ /*
+ * No need for IBPB here, since the nested VM is less privileged. The
+ * L1 hypervisor inserts one already when switching L2 VMs.
+ */
+
if (!nested_vmcb_checks(nested_vmcb)) {
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
nested_vmcb->control.exit_code_hi = 0;
--
1.8.3.1
From: Tim Chen <[email protected]>
Ensure an IBPB (Indirect branch prediction barrier) before every VCPU
switch.
Signed-off-by: Tim Chen <[email protected]>
Reviewed-by: Liran Alon <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/vmx.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ef603692aa98..49b4a2d61603 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2375,6 +2375,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
+ if (have_spec_ctrl)
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}
if (!already_loaded) {
@@ -4029,6 +4031,13 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
free_vmcs(loaded_vmcs->vmcs);
loaded_vmcs->vmcs = NULL;
WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
+
+ /*
+ * The VMCS could be recycled, causing a false negative in
+ * vmx_vcpu_load; block speculative execution.
+ */
+ if (have_spec_ctrl)
+ wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
}
static void vmx_nested_free_vmcs02(struct vcpu_vmx *vmx)
--
1.8.3.1
If always==true, then read/write bits are cleared from
the MSR permission bitmap, thus passing-through the MSR.
Fix the comment to match reality.
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/svm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 55dde3896898..31ace8d7774a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -236,7 +236,7 @@ struct amd_svm_iommu_ir {
static const struct svm_direct_access_msrs {
u32 index; /* Index of the MSR */
- bool always; /* True if intercept is always on */
+ bool always; /* True if intercept is always off */
} direct_access_msrs[] = {
{ .index = MSR_STAR, .always = true },
{ .index = MSR_IA32_SYSENTER_CS, .always = true },
--
1.8.3.1
Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is important
for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore guest
IBRS on VM entry and set it to 0 on VM exit (because Linux does not use
it yet).
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/vmx.c | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 669f5f74857d..ef603692aa98 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -120,6 +120,8 @@
module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#endif
+static bool __read_mostly have_spec_ctrl;
+
#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
@@ -609,6 +611,8 @@ struct vcpu_vmx {
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
+ u64 spec_ctrl;
+
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
u32 secondary_exec_control;
@@ -3361,6 +3365,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
msr_info->data = guest_read_tsc(vcpu);
break;
+ case MSR_IA32_SPEC_CTRL:
+ msr_info->data = to_vmx(vcpu)->spec_ctrl;
+ break;
case MSR_IA32_SYSENTER_CS:
msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
break;
@@ -3500,6 +3507,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
+ case MSR_IA32_SPEC_CTRL:
+ to_vmx(vcpu)->spec_ctrl = data;
+ break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -7062,6 +7072,17 @@ static __init int hardware_setup(void)
goto out;
}
+ /*
+ * FIXME: this is only needed until SPEC_CTRL is supported
+ * by upstream Linux in cpufeatures, then it can be replaced
+ * with static_cpu_has.
+ */
+ have_spec_ctrl = cpu_has_spec_ctrl();
+ if (have_spec_ctrl)
+ pr_info("kvm: SPEC_CTRL available\n");
+ else
+ pr_info("kvm: SPEC_CTRL not available\n");
+
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
@@ -7131,6 +7152,8 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
+ vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -9601,6 +9624,13 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_arm_hv_timer(vcpu);
+ /*
+ * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
+ * before vmentry.
+ */
+ if (have_spec_ctrl && vmx->spec_ctrl != 0)
+ wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+
vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
@@ -9707,6 +9737,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ if (have_spec_ctrl) {
+ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ if (vmx->spec_ctrl != 0)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ }
+ /*
+ * Speculative execution past the above wrmsrl might encounter
+ * an indirect branch and use guest-controlled contents of the
+ * indirect branch predictor; block it.
+ */
+ asm("lfence");
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (vmx->host_debugctlmsr)
update_debugctlmsr(vmx->host_debugctlmsr);
--
1.8.3.1
As an interim measure until SPEC_CTRL is supported by upstream
Linux in cpufeatures, add a function that lets vmx.c and svm.c
know whether to save/restore MSR_IA32_SPEC_CTRL.
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Signed-off-by: Paolo Bonzini <[email protected]>
---
arch/x86/kvm/cpuid.c | 3 ---
arch/x86/kvm/cpuid.h | 22 ++++++++++++++++++++++
2 files changed, 22 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8e9a07c557f1..767af697c20c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -67,9 +67,6 @@ u64 kvm_supported_xcr0(void)
#define F(x) bit(X86_FEATURE_##x)
-/* These are scattered features in cpufeatures.h. */
-#define KVM_CPUID_BIT_AVX512_4VNNIW 2
-#define KVM_CPUID_BIT_AVX512_4FMAPS 3
#define KF(x) bit(KVM_CPUID_BIT_##x)
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index c2cea6651279..8d04ccf177ce 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -155,6 +155,28 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
return x86_stepping(best->eax);
}
+/* These are scattered features in cpufeatures.h. */
+#define KVM_CPUID_BIT_AVX512_4VNNIW 2
+#define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KVM_CPUID_BIT_SPEC_CTRL 26
+#define KVM_CPUID_BIT_STIBP 27
+
+/* CPUID[eax=0x80000008].ebx */
+#define KVM_CPUID_BIT_IBPB_SUPPORT 12
+
+static inline bool cpu_has_spec_ctrl(void)
+{
+ u32 eax, ebx, ecx, edx;
+ cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
+
+ return edx & bit(KVM_CPUID_BIT_SPEC_CTRL);
+}
+
+static inline bool cpu_has_ibpb_support(void)
+{
+ return cpuid_ebx(0x80000008) & bit(KVM_CPUID_BIT_IBPB_SUPPORT);
+}
+
static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
{
return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
--
1.8.3.1
----- [email protected] wrote:
> As an interim measure until SPEC_CTRL is supported by upstream
> Linux in cpufeatures, add a function that lets vmx.c and svm.c
> know whether to save/restore MSR_IA32_SPEC_CTRL.
>
> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/cpuid.c | 3 ---
> arch/x86/kvm/cpuid.h | 22 ++++++++++++++++++++++
> 2 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 8e9a07c557f1..767af697c20c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -67,9 +67,6 @@ u64 kvm_supported_xcr0(void)
>
> #define F(x) bit(X86_FEATURE_##x)
>
> -/* These are scattered features in cpufeatures.h. */
> -#define KVM_CPUID_BIT_AVX512_4VNNIW 2
> -#define KVM_CPUID_BIT_AVX512_4FMAPS 3
> #define KF(x) bit(KVM_CPUID_BIT_##x)
>
> int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index c2cea6651279..8d04ccf177ce 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -155,6 +155,28 @@ static inline int guest_cpuid_stepping(struct
> kvm_vcpu *vcpu)
> return x86_stepping(best->eax);
> }
>
> +/* These are scattered features in cpufeatures.h. */
> +#define KVM_CPUID_BIT_AVX512_4VNNIW 2
> +#define KVM_CPUID_BIT_AVX512_4FMAPS 3
> +#define KVM_CPUID_BIT_SPEC_CTRL 26
> +#define KVM_CPUID_BIT_STIBP 27
> +
> +/* CPUID[eax=0x80000008].ebx */
> +#define KVM_CPUID_BIT_IBPB_SUPPORT 12
> +
> +static inline bool cpu_has_spec_ctrl(void)
> +{
> + u32 eax, ebx, ecx, edx;
> + cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
> +
> + return edx & bit(KVM_CPUID_BIT_SPEC_CTRL);
> +}
> +
> +static inline bool cpu_has_ibpb_support(void)
> +{
> + return cpuid_ebx(0x80000008) & bit(KVM_CPUID_BIT_IBPB_SUPPORT);
> +}
> +
> static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
> {
> return vcpu->arch.msr_platform_info &
> MSR_PLATFORM_INFO_CPUID_FAULT;
> --
> 1.8.3.1
Reviewed-by: Liran Alon <[email protected]>
----- [email protected] wrote:
> If always==true, then read/write bits are cleared from
> the MSR permission bitmap, thus passing-through the MSR.
> Fix the comment to match reality.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 55dde3896898..31ace8d7774a 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -236,7 +236,7 @@ struct amd_svm_iommu_ir {
>
> static const struct svm_direct_access_msrs {
> u32 index; /* Index of the MSR */
> - bool always; /* True if intercept is always on */
> + bool always; /* True if intercept is always off */
> } direct_access_msrs[] = {
> { .index = MSR_STAR, .always = true },
> { .index = MSR_IA32_SYSENTER_CS, .always = true },
> --
> 1.8.3.1
Reviewed-by: Liran Alon <[email protected]>
On Tue, Jan 09, 2018 at 01:03:08PM +0100, Paolo Bonzini wrote:
> Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is important
> for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore guest
> IBRS on VM entry and set it to 0 on VM exit (because Linux does not use
> it yet).
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 31ace8d7774a..934a21e02e03 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -183,6 +183,8 @@ struct vcpu_svm {
> u64 gs_base;
> } host;
>
> + u64 spec_ctrl;
> +
> u32 *msrpm;
>
> ulong nmi_iret_rip;
> @@ -248,6 +250,8 @@ struct amd_svm_iommu_ir {
> { .index = MSR_CSTAR, .always = true },
> { .index = MSR_SYSCALL_MASK, .always = true },
> #endif
> + { .index = MSR_IA32_SPEC_CTRL, .always = true },
> + { .index = MSR_IA32_PRED_CMD, .always = true },
> { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
> { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
> { .index = MSR_IA32_LASTINTFROMIP, .always = false },
> @@ -283,6 +287,8 @@ struct amd_svm_iommu_ir {
> /* enable/disable Virtual GIF */
> static int vgif = true;
> module_param(vgif, int, 0444);
> +
> +static bool __read_mostly have_spec_ctrl;
>
> static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
> static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
> @@ -1135,6 +1141,17 @@ static __init int svm_hardware_setup(void)
> pr_info("Virtual GIF supported\n");
> }
>
> + /*
> + * FIXME: this is only needed until SPEC_CTRL is supported
> + * by upstream Linux in cpufeatures, then it can be replaced
> + * with static_cpu_has.
> + */
> + have_spec_ctrl = cpu_has_spec_ctrl();
> + if (have_spec_ctrl)
> + pr_info("kvm: SPEC_CTRL available\n");
> + else
> + pr_info("kvm: SPEC_CTRL not available\n");
Perhaps just
pr_info("kvm: SPEC_CTRL %s available\n", have_spec_ctrl ? "" : "not");
> +
> return 0;
>
> err:
> @@ -3599,6 +3616,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> case MSR_VM_CR:
> msr_info->data = svm->nested.vm_cr_msr;
> break;
> + case MSR_IA32_SPEC_CTRL:
> + msr_info->data = svm->spec_ctrl;
> + break;
> case MSR_IA32_UCODE_REV:
> msr_info->data = 0x01000065;
> break;
> @@ -3754,6 +3774,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> case MSR_VM_IGNNE:
> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
> break;
> + case MSR_IA32_SPEC_CTRL:
> + svm->spec_ctrl = data;
> + break;
> case MSR_IA32_APICBASE:
> if (kvm_vcpu_apicv_active(vcpu))
> avic_update_vapic_bar(to_svm(vcpu), data);
> @@ -4942,6 +4965,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
>
> local_irq_enable();
>
> + /*
> + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> + * before vmentry.
> + */
> + if (have_spec_ctrl && svm->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
> +
> asm volatile (
> "push %%" _ASM_BP "; \n\t"
> "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
> @@ -5015,6 +5045,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> #endif
> );
>
> + if (have_spec_ctrl) {
> + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
> + if (svm->spec_ctrl != 0)
Perhaps just :
if (svm->spec_ctrl) ?
And above too?
> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> + }
> + /*
> + * Speculative execution past the above wrmsrl might encounter
> + * an indirect branch and use guest-controlled contents of the
> + * indirect branch predictor; block it.
> + */
> + asm("lfence");
Don't you want this to be part of the if () .. else part?
Meaning:
if (have_spec_ctrl && svm->spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, 0);
else
asm("lfence");
But .. I am missing something - AMD don't expose 0x48. They expose only 0x49.
That is only the IPBP is needed on AMD? (I haven't actually seen any official
docs from AMD).
> +
> #ifdef CONFIG_X86_64
> wrmsrl(MSR_GS_BASE, svm->host.gs_base);
> #else
> --
> 1.8.3.1
>
>
On Tue, Jan 09, 2018 at 01:03:09PM +0100, Paolo Bonzini wrote:
> From: Tom Lendacky <[email protected]>
>
> Set IBPB (Indirect Branch Prediction Barrier) when the current CPU is
> going to run a VCPU different from what was previously run. Nested
> virtualization uses the same VMCB for the second level guest, but the
> L1 hypervisor should be using IBRS to protect itself.
>
> Signed-off-by: Tom Lendacky <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm.c | 31 +++++++++++++++++++++++++++++++
> 1 file changed, 31 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 934a21e02e03..97126c2bd663 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -289,6 +289,7 @@ struct amd_svm_iommu_ir {
> module_param(vgif, int, 0444);
>
> static bool __read_mostly have_spec_ctrl;
> +static bool __read_mostly have_ibpb_support;
>
> static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
> static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
> @@ -540,6 +541,7 @@ struct svm_cpu_data {
> struct kvm_ldttss_desc *tss_desc;
>
> struct page *save_area;
> + struct vmcb *current_vmcb;
> };
>
> static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
> @@ -1151,6 +1153,11 @@ static __init int svm_hardware_setup(void)
> pr_info("kvm: SPEC_CTRL available\n");
> else
> pr_info("kvm: SPEC_CTRL not available\n");
> + have_ibpb_support = have_spec_ctrl || cpu_has_ibpb_support();
> + if (have_ibpb_support)
> + pr_info("kvm: IBPB_SUPPORT available\n");
> + else
> + pr_info("kvm: IBPB_SUPPORT not available\n");
pr_info("kvm: IBPB_SUPPORT %s available" .. you know :-)
----- [email protected] wrote:
> MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
> Check that against host CPUID or guest CPUID, respectively for
> host-initiated and guest-initiated accesses.
>
> Suggested-by: Jim Mattson <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
> I still wanted to ack Jim's improvement.
>
> arch/x86/kvm/svm.c | 8 ++++++++
> arch/x86/kvm/vmx.c | 8 ++++++++
> 2 files changed, 16 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 97126c2bd663..3a646580d7c5 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> msr_info->data = svm->nested.vm_cr_msr;
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> msr_info->data = svm->spec_ctrl;
> break;
> case MSR_IA32_UCODE_REV:
> @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr)
> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx,
> data);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> svm->spec_ctrl = data;
> break;
> case MSR_IA32_APICBASE:
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 49b4a2d61603..42bc7ee293e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> msr_info->data = guest_read_tsc(vcpu);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> msr_info->data = to_vmx(vcpu)->spec_ctrl;
> break;
> case MSR_IA32_SYSENTER_CS:
> @@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> kvm_write_tsc(vcpu, msr_info);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> to_vmx(vcpu)->spec_ctrl = data;
> break;
> case MSR_IA32_CR_PAT:
> --
> 1.8.3.1
Reviewed-by: Liran Alon <[email protected]>
The only thing I a bit dislike is that currently these MSRs are always pass-through to guest and therefore
there is no case vmx_set_msr() is called with !msr_info->host_initiated.
Don't you think we should BUG_ON(!msr_info->host_initiated)?
-Liran
On 09/01/2018 15:22, Konrad Rzeszutek Wilk wrote:
>> + have_spec_ctrl = cpu_has_spec_ctrl();
>> + if (have_spec_ctrl)
>> + pr_info("kvm: SPEC_CTRL available\n");
>> + else
>> + pr_info("kvm: SPEC_CTRL not available\n");
> Perhaps just
>
> pr_info("kvm: SPEC_CTRL %s available\n", have_spec_ctrl ? "" : "not");
>
I don't expect any of these FIXMEs to be ever committed. :)
Paolo
----- [email protected] wrote:
> Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is
> important
> for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore
> guest
> IBRS on VM entry and set it to 0 on VM exit (because Linux does not
> use
> it yet).
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 669f5f74857d..ef603692aa98 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -120,6 +120,8 @@
> module_param_named(preemption_timer, enable_preemption_timer, bool,
> S_IRUGO);
> #endif
>
> +static bool __read_mostly have_spec_ctrl;
> +
> #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
> #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP |
> X86_CR0_NE)
> #define KVM_VM_CR0_ALWAYS_ON \
> @@ -609,6 +611,8 @@ struct vcpu_vmx {
> u64 msr_host_kernel_gs_base;
> u64 msr_guest_kernel_gs_base;
> #endif
> + u64 spec_ctrl;
> +
> u32 vm_entry_controls_shadow;
> u32 vm_exit_controls_shadow;
> u32 secondary_exec_control;
> @@ -3361,6 +3365,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> case MSR_IA32_TSC:
> msr_info->data = guest_read_tsc(vcpu);
> break;
> + case MSR_IA32_SPEC_CTRL:
> + msr_info->data = to_vmx(vcpu)->spec_ctrl;
> + break;
> case MSR_IA32_SYSENTER_CS:
> msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
> break;
> @@ -3500,6 +3507,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> case MSR_IA32_TSC:
> kvm_write_tsc(vcpu, msr_info);
> break;
> + case MSR_IA32_SPEC_CTRL:
> + to_vmx(vcpu)->spec_ctrl = data;
> + break;
> case MSR_IA32_CR_PAT:
> if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
> if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> @@ -7062,6 +7072,17 @@ static __init int hardware_setup(void)
> goto out;
> }
>
> + /*
> + * FIXME: this is only needed until SPEC_CTRL is supported
> + * by upstream Linux in cpufeatures, then it can be replaced
> + * with static_cpu_has.
> + */
> + have_spec_ctrl = cpu_has_spec_ctrl();
> + if (have_spec_ctrl)
> + pr_info("kvm: SPEC_CTRL available\n");
> + else
> + pr_info("kvm: SPEC_CTRL not available\n");
> +
> if (boot_cpu_has(X86_FEATURE_NX))
> kvm_enable_efer_bits(EFER_NX);
>
> @@ -7131,6 +7152,8 @@ static __init int hardware_setup(void)
> vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
>
> memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
> vmx_msr_bitmap_legacy, PAGE_SIZE);
> @@ -9601,6 +9624,13 @@ static void __noclone vmx_vcpu_run(struct
> kvm_vcpu *vcpu)
>
> vmx_arm_hv_timer(vcpu);
>
> + /*
> + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> + * before vmentry.
> + */
> + if (have_spec_ctrl && vmx->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> +
> vmx->__launched = vmx->loaded_vmcs->launched;
> asm(
> /* Store host registers */
> @@ -9707,6 +9737,18 @@ static void __noclone vmx_vcpu_run(struct
> kvm_vcpu *vcpu)
> #endif
> );
>
> + if (have_spec_ctrl) {
> + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> + if (vmx->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> + }
> + /*
> + * Speculative execution past the above wrmsrl might encounter
> + * an indirect branch and use guest-controlled contents of the
> + * indirect branch predictor; block it.
> + */
> + asm("lfence");
> +
> /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed
> */
> if (vmx->host_debugctlmsr)
> update_debugctlmsr(vmx->host_debugctlmsr);
> --
> 1.8.3.1
Reviewed-by: Liran Alon <[email protected]>
Oops, I missed these.
On 09/01/2018 15:22, Konrad Rzeszutek Wilk wrote:
>> + if (have_spec_ctrl) {
>> + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
>> + if (svm->spec_ctrl != 0)
> Perhaps just :
>
> if (svm->spec_ctrl) ?
>
> And above too?
These will become != SPEC_CTRL_ENABLE_IBRS or something like that.
>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>> + }
>> + /*
>> + * Speculative execution past the above wrmsrl might encounter
>> + * an indirect branch and use guest-controlled contents of the
>> + * indirect branch predictor; block it.
>> + */
>> + asm("lfence");
> Don't you want this to be part of the if () .. else part?
Not right now, because the processor could speculate that have_spec_ctrl
== 0 and skip the wrmsrl. After it becomes a static_cpu_has, it could
move inside, but only if the kernel is compiled with static keys enabled.
> Meaning:
>
> if (have_spec_ctrl && svm->spec_ctrl)
> wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> else
> asm("lfence");
>
> But .. I am missing something - AMD don't expose 0x48. They expose only 0x49.
>
> That is only the IPBP is needed on AMD? (I haven't actually seen any official
> docs from AMD).
AMD is not exposing 0x48 yet, but they plan to based on my information
from a few weeks ago.
Paolo
----- [email protected] wrote:
> Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is
> important
> for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore
> guest
> IBRS on VM entry and set it to 0 on VM exit (because Linux does not
> use
> it yet).
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 31ace8d7774a..934a21e02e03 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -183,6 +183,8 @@ struct vcpu_svm {
> u64 gs_base;
> } host;
>
> + u64 spec_ctrl;
> +
> u32 *msrpm;
>
> ulong nmi_iret_rip;
> @@ -248,6 +250,8 @@ struct amd_svm_iommu_ir {
> { .index = MSR_CSTAR, .always = true },
> { .index = MSR_SYSCALL_MASK, .always = true },
> #endif
> + { .index = MSR_IA32_SPEC_CTRL, .always = true },
> + { .index = MSR_IA32_PRED_CMD, .always = true },
> { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
> { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
> { .index = MSR_IA32_LASTINTFROMIP, .always = false },
> @@ -283,6 +287,8 @@ struct amd_svm_iommu_ir {
> /* enable/disable Virtual GIF */
> static int vgif = true;
> module_param(vgif, int, 0444);
> +
> +static bool __read_mostly have_spec_ctrl;
>
> static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
> static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool
> invalidate_gpa);
> @@ -1135,6 +1141,17 @@ static __init int svm_hardware_setup(void)
> pr_info("Virtual GIF supported\n");
> }
>
> + /*
> + * FIXME: this is only needed until SPEC_CTRL is supported
> + * by upstream Linux in cpufeatures, then it can be replaced
> + * with static_cpu_has.
> + */
> + have_spec_ctrl = cpu_has_spec_ctrl();
> + if (have_spec_ctrl)
> + pr_info("kvm: SPEC_CTRL available\n");
> + else
> + pr_info("kvm: SPEC_CTRL not available\n");
> +
> return 0;
>
> err:
> @@ -3599,6 +3616,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
> case MSR_VM_CR:
> msr_info->data = svm->nested.vm_cr_msr;
> break;
> + case MSR_IA32_SPEC_CTRL:
> + msr_info->data = svm->spec_ctrl;
> + break;
> case MSR_IA32_UCODE_REV:
> msr_info->data = 0x01000065;
> break;
> @@ -3754,6 +3774,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu,
> struct msr_data *msr)
> case MSR_VM_IGNNE:
> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx,
> data);
> break;
> + case MSR_IA32_SPEC_CTRL:
> + svm->spec_ctrl = data;
> + break;
> case MSR_IA32_APICBASE:
> if (kvm_vcpu_apicv_active(vcpu))
> avic_update_vapic_bar(to_svm(vcpu), data);
> @@ -4942,6 +4965,13 @@ static void svm_vcpu_run(struct kvm_vcpu
> *vcpu)
>
> local_irq_enable();
>
> + /*
> + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> + * before vmentry.
> + */
> + if (have_spec_ctrl && svm->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
> +
> asm volatile (
> "push %%" _ASM_BP "; \n\t"
> "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
> @@ -5015,6 +5045,18 @@ static void svm_vcpu_run(struct kvm_vcpu
> *vcpu)
> #endif
> );
>
> + if (have_spec_ctrl) {
> + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
> + if (svm->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> + }
> + /*
> + * Speculative execution past the above wrmsrl might encounter
> + * an indirect branch and use guest-controlled contents of the
> + * indirect branch predictor; block it.
> + */
> + asm("lfence");
> +
> #ifdef CONFIG_X86_64
> wrmsrl(MSR_GS_BASE, svm->host.gs_base);
> #else
> --
> 1.8.3.1
Reviewed-by: Liran Alon <[email protected]>
On 09/01/2018 16:58, Liran Alon wrote:
>
> The only thing I a bit dislike is that currently these MSRs are always pass-through to guest and therefore
> there is no case vmx_set_msr() is called with !msr_info->host_initiated.
> Don't you think we should BUG_ON(!msr_info->host_initiated)?
All this is in flux. We'll probably get per-VCPU MSR bitmaps anyway
before this patch (which is not part of the initial, minimal fix) is
committed.
Paolo
----- [email protected] wrote:
> ----- [email protected] wrote:
>
> > Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is
> > important
> > for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore
> > guest
> > IBRS on VM entry and set it to 0 on VM exit (because Linux does not
> > use
> > it yet).
> >
> > Signed-off-by: Paolo Bonzini <[email protected]>
> > ---
> > arch/x86/kvm/svm.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 42 insertions(+)
> >
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 31ace8d7774a..934a21e02e03 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -183,6 +183,8 @@ struct vcpu_svm {
> > u64 gs_base;
> > } host;
> >
> > + u64 spec_ctrl;
> > +
> > u32 *msrpm;
> >
> > ulong nmi_iret_rip;
> > @@ -248,6 +250,8 @@ struct amd_svm_iommu_ir {
> > { .index = MSR_CSTAR, .always = true },
> > { .index = MSR_SYSCALL_MASK, .always = true },
> > #endif
> > + { .index = MSR_IA32_SPEC_CTRL, .always = true },
> > + { .index = MSR_IA32_PRED_CMD, .always = true },
> > { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false },
> > { .index = MSR_IA32_LASTBRANCHTOIP, .always = false },
> > { .index = MSR_IA32_LASTINTFROMIP, .always = false },
> > @@ -283,6 +287,8 @@ struct amd_svm_iommu_ir {
> > /* enable/disable Virtual GIF */
> > static int vgif = true;
> > module_param(vgif, int, 0444);
> > +
> > +static bool __read_mostly have_spec_ctrl;
> >
> > static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
> > static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool
> > invalidate_gpa);
> > @@ -1135,6 +1141,17 @@ static __init int svm_hardware_setup(void)
> > pr_info("Virtual GIF supported\n");
> > }
> >
> > + /*
> > + * FIXME: this is only needed until SPEC_CTRL is supported
> > + * by upstream Linux in cpufeatures, then it can be replaced
> > + * with static_cpu_has.
> > + */
> > + have_spec_ctrl = cpu_has_spec_ctrl();
> > + if (have_spec_ctrl)
> > + pr_info("kvm: SPEC_CTRL available\n");
> > + else
> > + pr_info("kvm: SPEC_CTRL not available\n");
> > +
> > return 0;
> >
> > err:
> > @@ -3599,6 +3616,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu,
> > struct msr_data *msr_info)
> > case MSR_VM_CR:
> > msr_info->data = svm->nested.vm_cr_msr;
> > break;
> > + case MSR_IA32_SPEC_CTRL:
> > + msr_info->data = svm->spec_ctrl;
> > + break;
> > case MSR_IA32_UCODE_REV:
> > msr_info->data = 0x01000065;
> > break;
> > @@ -3754,6 +3774,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu,
> > struct msr_data *msr)
> > case MSR_VM_IGNNE:
> > vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n",
> ecx,
> > data);
> > break;
> > + case MSR_IA32_SPEC_CTRL:
> > + svm->spec_ctrl = data;
> > + break;
> > case MSR_IA32_APICBASE:
> > if (kvm_vcpu_apicv_active(vcpu))
> > avic_update_vapic_bar(to_svm(vcpu), data);
> > @@ -4942,6 +4965,13 @@ static void svm_vcpu_run(struct kvm_vcpu
> > *vcpu)
> >
> > local_irq_enable();
> >
> > + /*
> > + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> > + * before vmentry.
> > + */
> > + if (have_spec_ctrl && svm->spec_ctrl != 0)
> > + wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
> > +
> > asm volatile (
> > "push %%" _ASM_BP "; \n\t"
> > "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
> > @@ -5015,6 +5045,18 @@ static void svm_vcpu_run(struct kvm_vcpu
> > *vcpu)
> > #endif
> > );
> >
> > + if (have_spec_ctrl) {
> > + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
> > + if (svm->spec_ctrl != 0)
In second thought, I think this condition is a bug.
Intel explicitly specified that after a #VMExit: Set IBRS bit even if it was already set.
Therefore, you should remove the "if (svm->spec_ctrl != 0)" condition here.
Otherwise, guest BTB/BHB could be used by host.
> > + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> > + }
> > + /*
> > + * Speculative execution past the above wrmsrl might encounter
> > + * an indirect branch and use guest-controlled contents of the
> > + * indirect branch predictor; block it.
> > + */
> > + asm("lfence");
> > +
> > #ifdef CONFIG_X86_64
> > wrmsrl(MSR_GS_BASE, svm->host.gs_base);
> > #else
> > --
> > 1.8.3.1
>
> Reviewed-by: Liran Alon <[email protected]>
I'm going to try to get per-vCPU MSR bitmaps out today or tomorrow.
On Tue, Jan 9, 2018 at 8:11 AM, Paolo Bonzini <[email protected]> wrote:
> On 09/01/2018 16:58, Liran Alon wrote:
>>
>> The only thing I a bit dislike is that currently these MSRs are always pass-through to guest and therefore
>> there is no case vmx_set_msr() is called with !msr_info->host_initiated.
>> Don't you think we should BUG_ON(!msr_info->host_initiated)?
>
> All this is in flux. We'll probably get per-VCPU MSR bitmaps anyway
> before this patch (which is not part of the initial, minimal fix) is
> committed.
>
> Paolo
>
----- [email protected] wrote:
> ----- [email protected] wrote:
>
> > Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is
> > important
> > for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore
> > guest
> > IBRS on VM entry and set it to 0 on VM exit (because Linux does not
> > use
> > it yet).
> >
> > Signed-off-by: Paolo Bonzini <[email protected]>
> > ---
> > arch/x86/kvm/vmx.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 42 insertions(+)
> >
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 669f5f74857d..ef603692aa98 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -120,6 +120,8 @@
> > module_param_named(preemption_timer, enable_preemption_timer,
> bool,
> > S_IRUGO);
> > #endif
> >
> > +static bool __read_mostly have_spec_ctrl;
> > +
> > #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
> > #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP |
> > X86_CR0_NE)
> > #define KVM_VM_CR0_ALWAYS_ON \
> > @@ -609,6 +611,8 @@ struct vcpu_vmx {
> > u64 msr_host_kernel_gs_base;
> > u64 msr_guest_kernel_gs_base;
> > #endif
> > + u64 spec_ctrl;
> > +
> > u32 vm_entry_controls_shadow;
> > u32 vm_exit_controls_shadow;
> > u32 secondary_exec_control;
> > @@ -3361,6 +3365,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
> > struct msr_data *msr_info)
> > case MSR_IA32_TSC:
> > msr_info->data = guest_read_tsc(vcpu);
> > break;
> > + case MSR_IA32_SPEC_CTRL:
> > + msr_info->data = to_vmx(vcpu)->spec_ctrl;
> > + break;
> > case MSR_IA32_SYSENTER_CS:
> > msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
> > break;
> > @@ -3500,6 +3507,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
> > struct msr_data *msr_info)
> > case MSR_IA32_TSC:
> > kvm_write_tsc(vcpu, msr_info);
> > break;
> > + case MSR_IA32_SPEC_CTRL:
> > + to_vmx(vcpu)->spec_ctrl = data;
> > + break;
> > case MSR_IA32_CR_PAT:
> > if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
> > if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> > @@ -7062,6 +7072,17 @@ static __init int hardware_setup(void)
> > goto out;
> > }
> >
> > + /*
> > + * FIXME: this is only needed until SPEC_CTRL is supported
> > + * by upstream Linux in cpufeatures, then it can be replaced
> > + * with static_cpu_has.
> > + */
> > + have_spec_ctrl = cpu_has_spec_ctrl();
> > + if (have_spec_ctrl)
> > + pr_info("kvm: SPEC_CTRL available\n");
> > + else
> > + pr_info("kvm: SPEC_CTRL not available\n");
> > +
> > if (boot_cpu_has(X86_FEATURE_NX))
> > kvm_enable_efer_bits(EFER_NX);
> >
> > @@ -7131,6 +7152,8 @@ static __init int hardware_setup(void)
> > vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> > vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> > vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> > + vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
> > + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
> >
> > memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
> > vmx_msr_bitmap_legacy, PAGE_SIZE);
> > @@ -9601,6 +9624,13 @@ static void __noclone vmx_vcpu_run(struct
> > kvm_vcpu *vcpu)
> >
> > vmx_arm_hv_timer(vcpu);
> >
> > + /*
> > + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> > + * before vmentry.
> > + */
> > + if (have_spec_ctrl && vmx->spec_ctrl != 0)
> > + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> > +
> > vmx->__launched = vmx->loaded_vmcs->launched;
> > asm(
> > /* Store host registers */
> > @@ -9707,6 +9737,18 @@ static void __noclone vmx_vcpu_run(struct
> > kvm_vcpu *vcpu)
> > #endif
> > );
> >
> > + if (have_spec_ctrl) {
> > + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> > + if (vmx->spec_ctrl != 0)
As I said also on the AMD patch, I think this is a bug.
Intel specify that we should set IBRS bit even if it was already set on every #VMExit.
-Liran
> > + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> > + }
> > + /*
> > + * Speculative execution past the above wrmsrl might encounter
> > + * an indirect branch and use guest-controlled contents of the
> > + * indirect branch predictor; block it.
> > + */
> > + asm("lfence");
> > +
> > /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed
> > */
> > if (vmx->host_debugctlmsr)
> > update_debugctlmsr(vmx->host_debugctlmsr);
> > --
> > 1.8.3.1
>
> Reviewed-by: Liran Alon <[email protected]>
On 09/01/2018 17:48, Liran Alon wrote:
>>>
>>> + if (have_spec_ctrl) {
>>> + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>>> + if (vmx->spec_ctrl != 0)
>>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>
> As I said also on the AMD patch, I think this is a bug.
> Intel specify that we should set IBRS bit even if it was already set on every #VMExit.
That's correct (though I'd like to understand _why_---I'm not inclined
to blindly trust a spec), but for now it's saving a wrmsr of 0. That is
quite obviously okay, and will be also okay after the bare-metal IBRS
patches.
Of course the code will become something like
if (using_ibrs || vmx->spec_ctrl != 0)
wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs);
optimizing the case where the host is using retpolines.
Paolo
----- [email protected] wrote:
> On 09/01/2018 17:48, Liran Alon wrote:
> >>>
> >>> + if (have_spec_ctrl) {
> >>> + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> >>> + if (vmx->spec_ctrl != 0)
> >>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> >
> > As I said also on the AMD patch, I think this is a bug.
> > Intel specify that we should set IBRS bit even if it was already set
> on every #VMExit.
>
> That's correct (though I'd like to understand _why_---I'm not
> inclined
> to blindly trust a spec), but for now it's saving a wrmsr of 0. That
> is
> quite obviously okay, and will be also okay after the bare-metal IBRS
> patches.
>
> Of course the code will become something like
>
> if (using_ibrs || vmx->spec_ctrl != 0)
> wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs);
>
> optimizing the case where the host is using retpolines.
>
> Paolo
I agree with all the above.
-Liran
Paolo Bonzini <[email protected]> wrote:
> On 09/01/2018 17:48, Liran Alon wrote:
>>>> + if (have_spec_ctrl) {
>>>> + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>>>> + if (vmx->spec_ctrl != 0)
>>>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>>
>> As I said also on the AMD patch, I think this is a bug.
>> Intel specify that we should set IBRS bit even if it was already set on every #VMExit.
>
> That's correct (though I'd like to understand _why_---I'm not inclined
> to blindly trust a spec), but for now it's saving a wrmsr of 0. That is
> quite obviously okay, and will be also okay after the bare-metal IBRS
> patches.
>
> Of course the code will become something like
>
> if (using_ibrs || vmx->spec_ctrl != 0)
> wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs);
>
> optimizing the case where the host is using retpolines.
Excuse my ignorance: Can you point me to the specifications that mention “we
should set IBRS bit even if it was already set on every #VMExit” ?
Thanks,
Nadav
On 10/01/2018 06:03, Nadav Amit wrote:
>>
>> Of course the code will become something like
>>
>> if (using_ibrs || vmx->spec_ctrl != 0)
>> wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs);
>>
>> optimizing the case where the host is using retpolines.
> Excuse my ignorance: Can you point me to the specifications that mention “we
> should set IBRS bit even if it was already set on every #VMExit” ?
All I have is some PowerPoint slides from Intel. :( They say:
---
A near indirect jump/call/return may be affected by code in a less
privileged prediction mode that executed AFTER IBRS mode was last
written with a value of 1. There is no need to clear IBRS before writing
it with a value of 1. Unconditionally writing it with a value of 1 after
the prediction mode change is sufficient.
VMX non-root is considered a less privileged prediction mode than VM
root. CPL 3 is considered a less privileged prediction mode than CPL0,
1, 2.
Some processors may enhance IBRS such that it isolates prediction modes
effectively and at higher performance if left set instead of being set
when enter OS and VMM and cleared when entering applications. [This is]
enumerated by IA32_ARCH_CAPABILITIES[1].
---
(Yes, it literally says VM root, not VMX root).
But I think this is an awful specification. For two reasons:
* a simple specification that does "IBRS=1 blocks indirect branch
prediction altogether" would actually satisfy the specification just as
well, and it would be nice to know if that's what the processor actually
does.
* the future case with enhanced IBRS still requires the expensive IBPB
when switching between applications or between guests, where the
PCID/VPID (and PCID/VPID invalidation) could be used to remove that need.
Paolo
On 1/10/2018 5:20 AM, Paolo Bonzini wrote:
> * a simple specification that does "IBRS=1 blocks indirect branch
> prediction altogether" would actually satisfy the specification just as
> well, and it would be nice to know if that's what the processor actually
> does.
it doesn't exactly, not for all.
so you really do need to write ibrs again.
On 10/01/2018 15:06, Arjan van de Ven wrote:
> On 1/10/2018 5:20 AM, Paolo Bonzini wrote:
>> * a simple specification that does "IBRS=1 blocks indirect branch
>> prediction altogether" would actually satisfy the specification just as
>> well, and it would be nice to know if that's what the processor actually
>> does.
>
> it doesn't exactly, not for all.
>
> so you really do need to write ibrs again.
Okay, so "always set IBRS=1" does *not* protect against variant 2. Thanks,
Paolo
On Wed, Jan 10, 2018 at 03:28:43PM +0100, Paolo Bonzini wrote:
> On 10/01/2018 15:06, Arjan van de Ven wrote:
> > On 1/10/2018 5:20 AM, Paolo Bonzini wrote:
> >> * a simple specification that does "IBRS=1 blocks indirect branch
> >> prediction altogether" would actually satisfy the specification just as
> >> well, and it would be nice to know if that's what the processor actually
> >> does.
> >
> > it doesn't exactly, not for all.
> >
> > so you really do need to write ibrs again.
>
> Okay, so "always set IBRS=1" does *not* protect against variant 2. Thanks,
And what is the point of this "always set IBRS=1" then? Are there some other
things lurking in the shadows?
>
> Paolo
On 10/01/2018 16:41, Konrad Rzeszutek Wilk wrote:
> On Wed, Jan 10, 2018 at 03:28:43PM +0100, Paolo Bonzini wrote:
>> On 10/01/2018 15:06, Arjan van de Ven wrote:
>>> On 1/10/2018 5:20 AM, Paolo Bonzini wrote:
>>>> * a simple specification that does "IBRS=1 blocks indirect branch
>>>> prediction altogether" would actually satisfy the specification just as
>>>> well, and it would be nice to know if that's what the processor actually
>>>> does.
>>>
>>> it doesn't exactly, not for all.
>>>
>>> so you really do need to write ibrs again.
>>
>> Okay, so "always set IBRS=1" does *not* protect against variant 2. Thanks,
>
> And what is the point of this "always set IBRS=1" then? Are there some other
> things lurking in the shadows?
The idea was that:
1) for workloads that are very kernel-heavy "always set IBRS=1" would be
faster than flipping it on kernel entry/exit,
2) skipping the IBRS=1 write on vmexit would be a nice optimization
because most vmexits come from guest ring 0.
But apparently it's a non-starter. :(
Paolo
On Wed, 2018-01-10 at 10:41 -0500, Konrad Rzeszutek Wilk wrote:
> On Wed, Jan 10, 2018 at 03:28:43PM +0100, Paolo Bonzini wrote:
> > On 10/01/2018 15:06, Arjan van de Ven wrote:
> > > On 1/10/2018 5:20 AM, Paolo Bonzini wrote:
> > >> * a simple specification that does "IBRS=1 blocks indirect
> branch
> > >> prediction altogether" would actually satisfy the specification
> just as
> > >> well, and it would be nice to know if that's what the processor
> actually
> > >> does.
> > >
> > > it doesn't exactly, not for all.
> > >
> > > so you really do need to write ibrs again.
> >
> > Okay, so "always set IBRS=1" does *not* protect against variant 2.
> Thanks,
>
> And what is the point of this "always set IBRS=1" then? Are there
> some other things lurking in the shadows?
Yes. *FUTURE* CPUs will have a mode where you can just set IBRS and
leave it set for ever and not worry about any of this, and the
performance won't even suck.
Quite why it's still an option you have to set in an MSR, and not just
a feature bit that they advertise and do it unconditionally, I have no
idea. But apparently that's the plan.
But no current hardware will do this; they've done the best they can do
with microcode already.
On 10/01/2018 16:48, Woodhouse, David wrote:
>>
>> And what is the point of this "always set IBRS=1" then? Are there
>> some other things lurking in the shadows?
> Yes. *FUTURE* CPUs will have a mode where you can just set IBRS and
> leave it set for ever and not worry about any of this, and the
> performance won't even suck.
>
> Quite why it's still an option you have to set in an MSR, and not just
> a feature bit that they advertise and do it unconditionally, I have no
> idea. But apparently that's the plan.
And again---why you still need IBPBs. That also escapes me. I wouldn't
be surprised if that's just a trick to sneak it in a generation earlier...
Paolo
On Wed, 2018-01-10 at 16:56 +0100, Paolo Bonzini wrote:
> On 10/01/2018 16:48, Woodhouse, David wrote:
> >>
> >> And what is the point of this "always set IBRS=1" then? Are there
> >> some other things lurking in the shadows?
> > Yes. *FUTURE* CPUs will have a mode where you can just set IBRS and
> > leave it set for ever and not worry about any of this, and the
> > performance won't even suck.
> >
> > Quite why it's still an option you have to set in an MSR, and not just
> > a feature bit that they advertise and do it unconditionally, I have no
> > idea. But apparently that's the plan.
>
> And again---why you still need IBPBs. That also escapes me. I wouldn't
> be surprised if that's just a trick to sneak it in a generation earlier...
There was some suggestion in early development that guests/processes
with a different CR3 or different VMCS would be considered "mutually
less privileged". It got dropped, and I'm sure we'll have clarification
before IBRS_ATT actually gets released.
I think the current docs do suggest that you need IBPB on context
switch (and vmptrld) still. But I expect that *might* go away if we're
lucky. And really, we have other things to work on for now before we
support hypothetical future hardware :)
----- [email protected] wrote:
> On Wed, 2018-01-10 at 10:41 -0500, Konrad Rzeszutek Wilk wrote:
> > On Wed, Jan 10, 2018 at 03:28:43PM +0100, Paolo Bonzini wrote:
> > > On 10/01/2018 15:06, Arjan van de Ven wrote:
> > > > On 1/10/2018 5:20 AM, Paolo Bonzini wrote:
> > > >> * a simple specification that does "IBRS=1 blocks indirect
> > branch
> > > >> prediction altogether" would actually satisfy the
> specification
> > just as
> > > >> well, and it would be nice to know if that's what the
> processor
> > actually
> > > >> does.
> > > >
> > > > it doesn't exactly, not for all.
> > > >
> > > > so you really do need to write ibrs again.
> > >
> > > Okay, so "always set IBRS=1" does *not* protect against variant
> 2.
> > Thanks,
> >
> > And what is the point of this "always set IBRS=1" then? Are there
> > some other things lurking in the shadows?
>
> Yes. *FUTURE* CPUs will have a mode where you can just set IBRS and
> leave it set for ever and not worry about any of this, and the
> performance won't even suck.
>
> Quite why it's still an option you have to set in an MSR, and not
> just
> a feature bit that they advertise and do it unconditionally, I have
> no
> idea. But apparently that's the plan.
>
> But no current hardware will do this; they've done the best they can
> do
> with microcode already.
I'm still a bit confused here (maybe I'm not the only one), but I have the following pending questions:
(I really hope someone from Intel will clarify these)
(1) On VMEntry, Intel recommends to just restore SPEC_CTRL to guest value (using WRMSR or MSR save/load list) and that's it. As I previously said to Jim, I am missing here a mechanism which should be responsible for hiding host's BHB & RSB from guest. Therefore, guest still have the possibility to info-leak host's kernel module addresses (kvm-intel.ko / kvm.ko / vmlinux).
* a) As I currently understand things, the only available solution to this is to issue an IBPB and stuff RSB on every VMEntry before resuming the guest. The IBPB is done to clear host's BTB/BHB and the stuff RSB is done to overwrite all host's RSB entries.
* b) Does "IBRS ALL THE TIME" usage change this in some aspect? What is Intel recommendation to handle this info-leak in that usage?
(2) On VMExit, Intel recommends to always save guest SPEC_CTRL value, set IBRS to 1 (even if it is already set by guest) and stuff RSB. What exactly does this write of 1 to IBRS do?
* a) Does it keep all currently existing BTB/BHB entries created by less-privileged prediction-mode and marks them as were created in less-privileged prediction-mode such that they won't be used in current prediction-mode?
* b) Or does it, as Paolo has mentioned multiple times, disables the branch predictor to never consult the BTB/BHB at all as long as IBRS=1?
If (b) is true, why is setting IBRS=1 better than just issue an IBPB that clears all entries? At least in that case the host kernel could still benefict branch prediction performance boost.
If (a) is true, does "IBRS ALL THE TIME" usage is basically a CPU change to just create all BTB/BHB entries to be tagged with prediction-mode at creation-time and that tag to be compared to current prediction-mode when CPU attempts to use BTB/BHB?
Regards,
-Liran
I can answer (2) only.
On 10/01/2018 17:19, Liran Alon wrote:
> (2) On VMExit, Intel recommends to always save guest SPEC_CTRL value,
> set IBRS to 1 (even if it is already set by guest) and stuff RSB. What
> exactly does this write of 1 to IBRS do?
> * a) Does it keep all currently existing BTB/BHB entries created by
> less-privileged prediction-mode and marks them as were created in
> less-privileged prediction-mode such that they won't be used in current
> prediction-mode?
> * b) Or does it, as Paolo has mentioned multiple times, disables the
> branch predictor to never consult the BTB/BHB at all as long as IBRS=1?
> If (b) is true, why is setting IBRS=1 better than just issue an IBPB that clears all entries? At least in that case the > host kernel could still benefict branch prediction performance boost.
Arjan said (b) is not true on all processor generations. But even if it
were true, setting IBRS=1 is much, much faster than IBPB.
> If (a) is true, does "IBRS ALL THE TIME" usage is basically a CPU
> change to just create all BTB/BHB entries to be tagged with
> prediction-mode at creation-time and that tag to be compared to current
> prediction-mode when CPU attempts to use BTB/BHB?
I hope so, and I hope said prediction mode includes PCID/VPID too.
While I agree with David that "we have other things to work on for now
before we support hypothetical future hardware", I'd like to make sure
that Intel gets it right...
Paolo
On Wed, 2018-01-10 at 08:19 -0800, Liran Alon wrote:
>
> (1) On VMEntry, Intel recommends to just restore SPEC_CTRL to guest
> value (using WRMSR or MSR save/load list) and that's it. As I
> previously said to Jim, I am missing here a mechanism which should be
> responsible for hiding host's BHB & RSB from guest. Therefore, guest
> still have the possibility to info-leak host's kernel module
> addresses (kvm-intel.ko / kvm.ko / vmlinux).
How so?
The host has the capability to attack the guest... but that's not an
interesting observation.
I'm not sure why you consider it an information leak to have host
addresses in the BTB/RSB when the guest is running; it's not like they
can be *read* from there. Perhaps you could mount a really contrived
attack where you might attempt to provide your own spec-leak code at
various candidate addresses that you think might be host BTB targets,
and validate your assumptions... but I suspect basic cache-based
observations were easier than that anyway.
I don't think this is a consideration.
----- [email protected] wrote:
> On Wed, 2018-01-10 at 08:19 -0800, Liran Alon wrote:
> >
> > (1) On VMEntry, Intel recommends to just restore SPEC_CTRL to guest
> > value (using WRMSR or MSR save/load list) and that's it. As I
> > previously said to Jim, I am missing here a mechanism which should
> be
> > responsible for hiding host's BHB & RSB from guest. Therefore,
> guest
> > still have the possibility to info-leak host's kernel module
> > addresses (kvm-intel.ko / kvm.ko / vmlinux).
>
> How so?
>
> The host has the capability to attack the guest... but that's not an
> interesting observation.
That's not the issue I am talking about here.
I'm talking about the guest ability to info-leak addresses in host.
>
> I'm not sure why you consider it an information leak to have host
> addresses in the BTB/RSB when the guest is running; it's not like
> they
> can be *read* from there. Perhaps you could mount a really contrived
> attack where you might attempt to provide your own spec-leak code at
> various candidate addresses that you think might be host BTB targets,
> and validate your assumptions... but I suspect basic cache-based
> observations were easier than that anyway.
>
> I don't think this is a consideration.
Hmm... This is exactly how Google Project-Zero PoC leaks kvm-intel.ko, kvm.ko & vmlinux...
See section "Locating the host kernel" here:
https://googleprojectzero.blogspot.co.il/2018/01/reading-privileged-memory-with-side.html
This was an important primitive in order for them to actually launch the attack of reading host's memory pages. As they needed the hypervisor addresses such that they will be able to later poison the BTB/BHB to gadgets residing in known host addresses.
-Liran
On Wed, 2018-01-10 at 08:51 -0800, Liran Alon wrote:
>
> Hmm... This is exactly how Google Project-Zero PoC leaks kvm-
> intel.ko, kvm.ko & vmlinux...
> See section "Locating the host kernel" here:
> https://googleprojectzero.blogspot.co.il/2018/01/reading-privileged-m
> emory-with-side.html
>
> This was an important primitive in order for them to actually launch
> the attack of reading host's memory pages. As they needed the
> hypervisor addresses such that they will be able to later poison the
> BTB/BHB to gadgets residing in known host addresses.
Ah, joy. I'm not sure that leak is being plugged. Even setting IBRS=1
when entering the guest isn't guaranteed to plug it, as it's only
defined to prevent predictions from affecting a *more* privileged
prediction mode than they were 'learned' in.
Maybe IBPB would suffice? I'm not sure.
On Wed, Jan 10, 2018 at 8:27 AM, Paolo Bonzini <[email protected]> wrote:
> I can answer (2) only.
>
> On 10/01/2018 17:19, Liran Alon wrote:
>> (2) On VMExit, Intel recommends to always save guest SPEC_CTRL value,
>> set IBRS to 1 (even if it is already set by guest) and stuff RSB. What
>> exactly does this write of 1 to IBRS do?
>> * a) Does it keep all currently existing BTB/BHB entries created by
>> less-privileged prediction-mode and marks them as were created in
>> less-privileged prediction-mode such that they won't be used in current
>> prediction-mode?
>> * b) Or does it, as Paolo has mentioned multiple times, disables the
>> branch predictor to never consult the BTB/BHB at all as long as IBRS=1?
>> If (b) is true, why is setting IBRS=1 better than just issue an IBPB that clears all entries? At least in that case the > host kernel could still benefict branch prediction performance boost.
>
> Arjan said (b) is not true on all processor generations. But even if it
> were true, setting IBRS=1 is much, much faster than IBPB.
>
>> If (a) is true, does "IBRS ALL THE TIME" usage is basically a CPU
>> change to just create all BTB/BHB entries to be tagged with
>> prediction-mode at creation-time and that tag to be compared to current
>> prediction-mode when CPU attempts to use BTB/BHB?
>
> I hope so, and I hope said prediction mode includes PCID/VPID too.
Branch prediction entries should probably be tagged with PCID, VPID,
EP4TA, and thread ID...the same things used to tag TLB contexts.
> While I agree with David that "we have other things to work on for now
> before we support hypothetical future hardware", I'd like to make sure
> that Intel gets it right...
>
> Paolo
On 10/01/2018 18:14, Jim Mattson wrote:
>>> If (a) is true, does "IBRS ALL THE TIME" usage is basically a CPU
>>> change to just create all BTB/BHB entries to be tagged with
>>> prediction-mode at creation-time and that tag to be compared to current
>>> prediction-mode when CPU attempts to use BTB/BHB?
>>
>> I hope so, and I hope said prediction mode includes PCID/VPID too.
>
> Branch prediction entries should probably be tagged with PCID, VPID,
> EP4TA, and thread ID...the same things used to tag TLB contexts.
But if so, I don't see the need for IBPB.
Paolo
Paolo Bonzini <[email protected]> wrote:
> On 10/01/2018 18:14, Jim Mattson wrote:
>>>> If (a) is true, does "IBRS ALL THE TIME" usage is basically a CPU
>>>> change to just create all BTB/BHB entries to be tagged with
>>>> prediction-mode at creation-time and that tag to be compared to current
>>>> prediction-mode when CPU attempts to use BTB/BHB?
>>>
>>> I hope so, and I hope said prediction mode includes PCID/VPID too.
>>
>> Branch prediction entries should probably be tagged with PCID, VPID,
>> EP4TA, and thread ID...the same things used to tag TLB contexts.
>
> But if so, I don't see the need for IBPB.
It is highly improbable that a microcode patch can change how prediction
entries are tagged. IIRC, microcode may change the behavior of instructions
and “assists" (e.g., TLB miss). Not much more than that.
Right. For future CPUs with a well-engineered fix, no extra work
should be necessary on VM-entry. However, for current CPUs, we have to
ensure that host kernel addresses can't be deduced from by the guest.
IBPB may be sufficient, but Intel's slide deck doesn't make that
clear.
On Wed, Jan 10, 2018 at 9:23 AM, Nadav Amit <[email protected]> wrote:
> Paolo Bonzini <[email protected]> wrote:
>
>> On 10/01/2018 18:14, Jim Mattson wrote:
>>>>> If (a) is true, does "IBRS ALL THE TIME" usage is basically a CPU
>>>>> change to just create all BTB/BHB entries to be tagged with
>>>>> prediction-mode at creation-time and that tag to be compared to current
>>>>> prediction-mode when CPU attempts to use BTB/BHB?
>>>>
>>>> I hope so, and I hope said prediction mode includes PCID/VPID too.
>>>
>>> Branch prediction entries should probably be tagged with PCID, VPID,
>>> EP4TA, and thread ID...the same things used to tag TLB contexts.
>>
>> But if so, I don't see the need for IBPB.
>
> It is highly improbable that a microcode patch can change how prediction
> entries are tagged. IIRC, microcode may change the behavior of instructions
> and “assists" (e.g., TLB miss). Not much more than that.
>
On 1/9/2018 6:03 AM, Paolo Bonzini wrote:
> Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is important
> for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore guest
> IBRS on VM entry and set it to 0 on VM exit (because Linux does not use
> it yet).
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 31ace8d7774a..934a21e02e03 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
...
> @@ -5015,6 +5045,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
> #endif
> );
>
> + if (have_spec_ctrl) {
> + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
> + if (svm->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> + }
> + /*
> + * Speculative execution past the above wrmsrl might encounter
> + * an indirect branch and use guest-controlled contents of the
> + * indirect branch predictor; block it.
> + */
> + asm("lfence");
This will end up needing to be an alternative macro based on the
LFENCE_RDTSC or MFENCE_RDTSC features [1]. You'll probably just want to
use the speculation barrier macro that ends up being defined to control
the speculation here.
Thanks,
Tom
[1] https://marc.info/?l=linux-kernel&m=151545930207815&w=2
> +
> #ifdef CONFIG_X86_64
> wrmsrl(MSR_GS_BASE, svm->host.gs_base);
> #else
>
On 10/01/2018 21:13, Tom Lendacky wrote:
> On 1/9/2018 6:03 AM, Paolo Bonzini wrote:
>> Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is important
>> for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore guest
>> IBRS on VM entry and set it to 0 on VM exit (because Linux does not use
>> it yet).
>>
>> Signed-off-by: Paolo Bonzini <[email protected]>
>> ---
>> arch/x86/kvm/svm.c | 42 ++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 42 insertions(+)
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 31ace8d7774a..934a21e02e03 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>
> ...
>
>> @@ -5015,6 +5045,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
>> #endif
>> );
>>
>> + if (have_spec_ctrl) {
>> + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
>> + if (svm->spec_ctrl != 0)
>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>> + }
>> + /*
>> + * Speculative execution past the above wrmsrl might encounter
>> + * an indirect branch and use guest-controlled contents of the
>> + * indirect branch predictor; block it.
>> + */
>> + asm("lfence");
>
> This will end up needing to be an alternative macro based on the
> LFENCE_RDTSC or MFENCE_RDTSC features [1]. You'll probably just want to
> use the speculation barrier macro that ends up being defined to control
> the speculation here.
Yes. Though, is there any processor that has spec_ctrl (which is none
on AMD) but needs a full fence?
Paolo
> Thanks,
> Tom
>
> [1] https://marc.info/?l=linux-kernel&m=151545930207815&w=2
>
>> +
>> #ifdef CONFIG_X86_64
>> wrmsrl(MSR_GS_BASE, svm->host.gs_base);
>> #else
>>
2018-01-10 0:08 GMT+08:00 Paolo Bonzini <[email protected]>:
> Oops, I missed these.
>
> On 09/01/2018 15:22, Konrad Rzeszutek Wilk wrote:
>>> + if (have_spec_ctrl) {
>>> + rdmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
>>> + if (svm->spec_ctrl != 0)
>> Perhaps just :
>>
>> if (svm->spec_ctrl) ?
>>
>> And above too?
>
> These will become != SPEC_CTRL_ENABLE_IBRS or something like that.
>
>>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>>> + }
>>> + /*
>>> + * Speculative execution past the above wrmsrl might encounter
>>> + * an indirect branch and use guest-controlled contents of the
>>> + * indirect branch predictor; block it.
>>> + */
>>> + asm("lfence");
>> Don't you want this to be part of the if () .. else part?
>
> Not right now, because the processor could speculate that have_spec_ctrl
> == 0 and skip the wrmsrl. After it becomes a static_cpu_has, it could
> move inside, but only if the kernel is compiled with static keys enabled.
>
>> Meaning:
>>
>> if (have_spec_ctrl && svm->spec_ctrl)
>> wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>> else
>> asm("lfence");
>>
>> But .. I am missing something - AMD don't expose 0x48. They expose only 0x49.
>>
>> That is only the IPBP is needed on AMD? (I haven't actually seen any official
>> docs from AMD).
>
> AMD is not exposing 0x48 yet, but they plan to based on my information
> from a few weeks ago.
Haha, interesting, they announce officially there is no issue for
variant 2. http://www.amd.com/en/corporate/speculative-execution
Regards,
Wanpeng Li
2018-01-09 20:03 GMT+08:00 Paolo Bonzini <[email protected]>:
>
> if (!already_loaded) {
> @@ -4029,6 +4031,13 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
> free_vmcs(loaded_vmcs->vmcs);
> loaded_vmcs->vmcs = NULL;
> WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
> +
> + /*
> + * The VMCS could be recycled, causing a false negative in
> + * vmx_vcpu_load; block speculative execution.
> + */
> + if (have_spec_ctrl)
> + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
> }
Intel guys told us the recycle is about the address of vmcs, not the
content. Could you explain more why it matters?
Regards,
Wanpeng Li
The point behind the IPBP in vmx_vcpu_load is to prevent one VCPU from
steering the speculative execution of the next. If the VMCS address is
recycled, vmx_vcpu_load doesn't realize that the VCPUs are different,
and so it won't issue the IPBP.
On Thu, Jan 11, 2018 at 5:49 PM, Wanpeng Li <[email protected]> wrote:
> 2018-01-09 20:03 GMT+08:00 Paolo Bonzini <[email protected]>:
>>
>> if (!already_loaded) {
>> @@ -4029,6 +4031,13 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
>> free_vmcs(loaded_vmcs->vmcs);
>> loaded_vmcs->vmcs = NULL;
>> WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
>> +
>> + /*
>> + * The VMCS could be recycled, causing a false negative in
>> + * vmx_vcpu_load; block speculative execution.
>> + */
>> + if (have_spec_ctrl)
>> + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
>> }
>
> Intel guys told us the recycle is about the address of vmcs, not the
> content. Could you explain more why it matters?
>
> Regards,
> Wanpeng Li
Nadav,
See section 2.5.1.2 (paragraph 3) in
https://software.intel.com/sites/default/files/managed/c5/63/336996-Speculative-Execution-Side-Channel-Mitigations.pdf.
On Tue, Jan 9, 2018 at 9:03 PM, Nadav Amit <[email protected]> wrote:
> Paolo Bonzini <[email protected]> wrote:
>
>> On 09/01/2018 17:48, Liran Alon wrote:
>>>>> + if (have_spec_ctrl) {
>>>>> + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>>>>> + if (vmx->spec_ctrl != 0)
>>>>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>>>
>>> As I said also on the AMD patch, I think this is a bug.
>>> Intel specify that we should set IBRS bit even if it was already set on every #VMExit.
>>
>> That's correct (though I'd like to understand _why_---I'm not inclined
>> to blindly trust a spec), but for now it's saving a wrmsr of 0. That is
>> quite obviously okay, and will be also okay after the bare-metal IBRS
>> patches.
>>
>> Of course the code will become something like
>>
>> if (using_ibrs || vmx->spec_ctrl != 0)
>> wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs);
>>
>> optimizing the case where the host is using retpolines.
>
> Excuse my ignorance: Can you point me to the specifications that mention “we
> should set IBRS bit even if it was already set on every #VMExit” ?
>
> Thanks,
> Nadav
Thanks, Jim. Highly appreciated.
Jim Mattson <[email protected]> wrote:
> Nadav,
>
> See section 2.5.1.2 (paragraph 3) in
> https://software.intel.com/sites/default/files/managed/c5/63/336996-Speculative-Execution-Side-Channel-Mitigations.pdf.
>
> On Tue, Jan 9, 2018 at 9:03 PM, Nadav Amit <[email protected]> wrote:
>> Paolo Bonzini <[email protected]> wrote:
>>
>>> On 09/01/2018 17:48, Liran Alon wrote:
>>>>>> + if (have_spec_ctrl) {
>>>>>> + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
>>>>>> + if (vmx->spec_ctrl != 0)
>>>>>> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
>>>>
>>>> As I said also on the AMD patch, I think this is a bug.
>>>> Intel specify that we should set IBRS bit even if it was already set on every #VMExit.
>>>
>>> That's correct (though I'd like to understand _why_---I'm not inclined
>>> to blindly trust a spec), but for now it's saving a wrmsr of 0. That is
>>> quite obviously okay, and will be also okay after the bare-metal IBRS
>>> patches.
>>>
>>> Of course the code will become something like
>>>
>>> if (using_ibrs || vmx->spec_ctrl != 0)
>>> wrmsrl(MSR_IA32_SPEC_CTRL, host_ibrs);
>>>
>>> optimizing the case where the host is using retpolines.
>>
>> Excuse my ignorance: Can you point me to the specifications that mention “we
>> should set IBRS bit even if it was already set on every #VMExit” ?
>>
>> Thanks,
>> Nadav
On Tue, 9 Jan 2018, Paolo Bonzini wrote:
> Expose them to userspace, now that guests can use them.
> I am not adding cpufeatures here to avoid having a kernel
> that shows spec_ctrl in /proc/cpuinfo and actually has no
> support whatsoever for IBRS/IBPB. Keep the ugly special-casing
> for now, and clean it up once the generic arch/x86/ code
> learns about them.
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index daa1918031df..4abb37d9f4d8 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1032,6 +1032,7 @@ unsigned int kvm_get_pt_addr_cnt(void)
> MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> + MSR_IA32_SPEC_CTRL,
> };
Hi Paolo,
Thank you for posting this!
I am trying to merge this into 4.14 which does not have
kvm_get_pt_addr_cnt. The rest of the patch commits, but this gets
rejected. Is this a necessary part of the commit?
patching file arch/x86/kvm/cpuid.c
Hunk #1 succeeded at 389 (offset -8 lines).
Hunk #2 succeeded at 479 (offset -9 lines).
Hunk #3 succeeded at 636 (offset -27 lines).
patching file arch/x86/kvm/x86.c
Hunk #1 FAILED at 1032.
1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kvm/x86.c.rej
]# cat arch/x86/kvm/x86.c.rej
--- arch/x86/kvm/x86.c
+++ arch/x86/kvm/x86.c
@@ -1032,6 +1032,7 @@
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+ MSR_IA32_SPEC_CTRL,
};
static unsigned num_msrs_to_save;
Thank you for your help!
--
Eric Wheeler
>
> static unsigned num_msrs_to_save;
> --
> 1.8.3.1
>
>
>
On 2018/1/9 20:03, Paolo Bonzini wrote:
> Direct access to MSR_IA32_SPEC_CTRL and MSR_IA32_PRED_CMD is important
> for performance. Allow load/store of MSR_IA32_SPEC_CTRL, restore guest
> IBRS on VM entry and set it to 0 on VM exit (because Linux does not use
> it yet).
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 42 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 669f5f74857d..ef603692aa98 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -120,6 +120,8 @@
> module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
> #endif
>
> +static bool __read_mostly have_spec_ctrl;
> +
> #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
> #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
> #define KVM_VM_CR0_ALWAYS_ON \
> @@ -609,6 +611,8 @@ struct vcpu_vmx {
> u64 msr_host_kernel_gs_base;
> u64 msr_guest_kernel_gs_base;
> #endif
> + u64 spec_ctrl;
> +
> u32 vm_entry_controls_shadow;
> u32 vm_exit_controls_shadow;
> u32 secondary_exec_control;
> @@ -3361,6 +3365,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> case MSR_IA32_TSC:
> msr_info->data = guest_read_tsc(vcpu);
> break;
> + case MSR_IA32_SPEC_CTRL:
> + msr_info->data = to_vmx(vcpu)->spec_ctrl;
> + break;
> case MSR_IA32_SYSENTER_CS:
> msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
> break;
> @@ -3500,6 +3507,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> case MSR_IA32_TSC:
> kvm_write_tsc(vcpu, msr_info);
> break;
> + case MSR_IA32_SPEC_CTRL:
> + to_vmx(vcpu)->spec_ctrl = data;
> + break;
> case MSR_IA32_CR_PAT:
> if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
> if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
> @@ -7062,6 +7072,17 @@ static __init int hardware_setup(void)
> goto out;
> }
>
> + /*
> + * FIXME: this is only needed until SPEC_CTRL is supported
> + * by upstream Linux in cpufeatures, then it can be replaced
> + * with static_cpu_has.
> + */
> + have_spec_ctrl = cpu_has_spec_ctrl();
> + if (have_spec_ctrl)
> + pr_info("kvm: SPEC_CTRL available\n");
> + else
> + pr_info("kvm: SPEC_CTRL not available\n");
> +
In this approach, we must reload these modules if we update the microcode later ?
> if (boot_cpu_has(X86_FEATURE_NX))
> kvm_enable_efer_bits(EFER_NX);
>
> @@ -7131,6 +7152,8 @@ static __init int hardware_setup(void)
> vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
> vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
> vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_SPEC_CTRL, false);
> + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
>
> memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
> vmx_msr_bitmap_legacy, PAGE_SIZE);
> @@ -9601,6 +9624,13 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>
> vmx_arm_hv_timer(vcpu);
>
> + /*
> + * MSR_IA32_SPEC_CTRL is restored after the last indirect branch
> + * before vmentry.
> + */
> + if (have_spec_ctrl && vmx->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> +
> vmx->__launched = vmx->loaded_vmcs->launched;
> asm(
> /* Store host registers */
> @@ -9707,6 +9737,18 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> #endif
> );
>
> + if (have_spec_ctrl) {
> + rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
> + if (vmx->spec_ctrl != 0)
> + wrmsrl(MSR_IA32_SPEC_CTRL, 0);
> + }
> + /*
> + * Speculative execution past the above wrmsrl might encounter
> + * an indirect branch and use guest-controlled contents of the
> + * indirect branch predictor; block it.
> + */
> + asm("lfence");
> +
> /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
> if (vmx->host_debugctlmsr)
> update_debugctlmsr(vmx->host_debugctlmsr);
--
Regards,
Longpeng(Mike)
On Fri, 2018-01-12 at 09:03 -0800, Jim Mattson wrote:
> The point behind the IPBP in vmx_vcpu_load is to prevent one VCPU from
> steering the speculative execution of the next. If the VMCS address is
> recycled, vmx_vcpu_load doesn't realize that the VCPUs are different,
> and so it won't issue the IPBP.
I don't understand the sequence of events that could lead to this.
If the VMCS is freed, surely per_cpu(current_vmcs, cpu) has to be
cleared? If the VMCS is freed while it's still *active* on a CPU,
that's a bug, surely? And if that CPU is later offlined and clears the
VMCS, it's going to scribble on freed (and potentially re-used) memory.
So vmx_cpu_load() *will* realise that it's different, won't it?
>> + if (have_spec_ctrl)
>> + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
Also, I think the same condition applies to the conditional branches
over the IBPB-frobbing, as it does to setting IBRS. You can eschew the
'else lfence' only if you put in a comment showing that you've proved
it's safe. Many of the other bits like this are being done with
alternatives, which avoids that concern completely.
But really, I don't like this series much. Don't say "let's do this
until upstream supports...". Just fix it up properly, and add the
generic X86_FEATURE_IBPB bit and use it. We have *too* many separate
tiny patch sets, and we need to be getting our act together and putting
it all in one.
Just add the new MSR at the end of the array.
Paolo
----- Eric Wheeler <[email protected]> ha scritto:
> On Tue, 9 Jan 2018, Paolo Bonzini wrote:
>
> > Expose them to userspace, now that guests can use them.
> > I am not adding cpufeatures here to avoid having a kernel
> > that shows spec_ctrl in /proc/cpuinfo and actually has no
> > support whatsoever for IBRS/IBPB. Keep the ugly special-casing
> > for now, and clean it up once the generic arch/x86/ code
> > learns about them.
> >
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index daa1918031df..4abb37d9f4d8 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -1032,6 +1032,7 @@ unsigned int kvm_get_pt_addr_cnt(void)
> > MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> > MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> > MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> > + MSR_IA32_SPEC_CTRL,
> > };
>
> Hi Paolo,
>
> Thank you for posting this!
>
> I am trying to merge this into 4.14 which does not have
> kvm_get_pt_addr_cnt. The rest of the patch commits, but this gets
> rejected. Is this a necessary part of the commit?
>
> patching file arch/x86/kvm/cpuid.c
> Hunk #1 succeeded at 389 (offset -8 lines).
> Hunk #2 succeeded at 479 (offset -9 lines).
> Hunk #3 succeeded at 636 (offset -27 lines).
> patching file arch/x86/kvm/x86.c
> Hunk #1 FAILED at 1032.
> 1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kvm/x86.c.rej
>
> ]# cat arch/x86/kvm/x86.c.rej
> --- arch/x86/kvm/x86.c
> +++ arch/x86/kvm/x86.c
> @@ -1032,6 +1032,7 @@
> MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> + MSR_IA32_SPEC_CTRL,
> };
>
> static unsigned num_msrs_to_save;
>
>
> Thank you for your help!
>
> --
> Eric Wheeler
>
>
> >
> > static unsigned num_msrs_to_save;
> > --
> > 1.8.3.1
> >
> >
> >
On 13/01/2018 10:29, Woodhouse, David wrote:
> On Fri, 2018-01-12 at 09:03 -0800, Jim Mattson wrote:
>> The point behind the IPBP in vmx_vcpu_load is to prevent one VCPU from
>> steering the speculative execution of the next. If the VMCS address is
>> recycled, vmx_vcpu_load doesn't realize that the VCPUs are different,
>> and so it won't issue the IPBP.
>
> I don't understand the sequence of events that could lead to this.
>
> If the VMCS is freed, surely per_cpu(current_vmcs, cpu) has to be
> cleared? If the VMCS is freed while it's still *active* on a CPU,
> that's a bug, surely? And if that CPU is later offlined and clears the
> VMCS, it's going to scribble on freed (and potentially re-used) memory.
You're right, svm.c needs it but vmx.c does not (because AMD has no
vmclear equivalent).
>>> + if (have_spec_ctrl)
>>> + wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
>
> Also, I think the same condition applies to the conditional branches
> over the IBPB-frobbing, as it does to setting IBRS. You can eschew the
> 'else lfence' only if you put in a comment showing that you've proved
> it's safe. Many of the other bits like this are being done with
> alternatives, which avoids that concern completely.
>
> But really, I don't like this series much. Don't say "let's do this
> until upstream supports...". Just fix it up properly, and add the
> generic X86_FEATURE_IBPB bit and use it. We have *too* many separate
> tiny patch sets, and we need to be getting our act together and putting
> it all in one.
I agree, this series is not meant to be committed. I only posted to get
early reviews (and it was very effective for that purpose).
Paolo
On 13/01/2018 11:16, Longpeng (Mike) wrote:
>> + /*
>> + * FIXME: this is only needed until SPEC_CTRL is supported
>> + * by upstream Linux in cpufeatures, then it can be replaced
>> + * with static_cpu_has.
>> + */
>> + have_spec_ctrl = cpu_has_spec_ctrl();
>> + if (have_spec_ctrl)
>> + pr_info("kvm: SPEC_CTRL available\n");
>> + else
>> + pr_info("kvm: SPEC_CTRL not available\n");
>> +
>
> In this approach, we must reload these modules if we update the microcode later ?
I strongly suggest using early microcode update anyway.
Paolo
On Mon, 15 Jan 2018, Paolo Bonzini wrote:
> On 13/01/2018 11:16, Longpeng (Mike) wrote:
> >> + /*
> >> + * FIXME: this is only needed until SPEC_CTRL is supported
> >> + * by upstream Linux in cpufeatures, then it can be replaced
> >> + * with static_cpu_has.
> >> + */
> >> + have_spec_ctrl = cpu_has_spec_ctrl();
> >> + if (have_spec_ctrl)
> >> + pr_info("kvm: SPEC_CTRL available\n");
> >> + else
> >> + pr_info("kvm: SPEC_CTRL not available\n");
> >> +
> >
> > In this approach, we must reload these modules if we update the microcode later ?
>
> I strongly suggest using early microcode update anyway.
We had the discussion already and we are not going to support late micro
code loading. It's just not worth the trouble.
Also please do not commit any of this before we have sorted out the bare
metal IBRS/IBPB support. We really don't want to have two variants of that
in tree.
Thanks,
tglx
On 09.01.2018 13:03, Paolo Bonzini wrote:
> As an interim measure until SPEC_CTRL is supported by upstream
> Linux in cpufeatures, add a function that lets vmx.c and svm.c
> know whether to save/restore MSR_IA32_SPEC_CTRL.
>
> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/cpuid.c | 3 ---
> arch/x86/kvm/cpuid.h | 22 ++++++++++++++++++++++
> 2 files changed, 22 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 8e9a07c557f1..767af697c20c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -67,9 +67,6 @@ u64 kvm_supported_xcr0(void)
>
> #define F(x) bit(X86_FEATURE_##x)
>
> -/* These are scattered features in cpufeatures.h. */
> -#define KVM_CPUID_BIT_AVX512_4VNNIW 2
> -#define KVM_CPUID_BIT_AVX512_4FMAPS 3
> #define KF(x) bit(KVM_CPUID_BIT_##x)
>
> int kvm_update_cpuid(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index c2cea6651279..8d04ccf177ce 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -155,6 +155,28 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
> return x86_stepping(best->eax);
> }
>
> +/* These are scattered features in cpufeatures.h. */
> +#define KVM_CPUID_BIT_AVX512_4VNNIW 2
> +#define KVM_CPUID_BIT_AVX512_4FMAPS 3
> +#define KVM_CPUID_BIT_SPEC_CTRL 26
> +#define KVM_CPUID_BIT_STIBP 27
> +
> +/* CPUID[eax=0x80000008].ebx */
> +#define KVM_CPUID_BIT_IBPB_SUPPORT 12
> +
> +static inline bool cpu_has_spec_ctrl(void)
> +{
> + u32 eax, ebx, ecx, edx;
> + cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
> +
> + return edx & bit(KVM_CPUID_BIT_SPEC_CTRL);
> +}
> +
> +static inline bool cpu_has_ibpb_support(void)
> +{
> + return cpuid_ebx(0x80000008) & bit(KVM_CPUID_BIT_IBPB_SUPPORT);
> +}
> +
> static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
> {
> return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
>
Reviewed-by: David Hildenbrand <[email protected]>
--
Thanks,
David / dhildenb
On 09.01.2018 13:03, Paolo Bonzini wrote:
> If always==true, then read/write bits are cleared from
> the MSR permission bitmap, thus passing-through the MSR.
> Fix the comment to match reality.
>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> arch/x86/kvm/svm.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 55dde3896898..31ace8d7774a 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -236,7 +236,7 @@ struct amd_svm_iommu_ir {
>
> static const struct svm_direct_access_msrs {
> u32 index; /* Index of the MSR */
> - bool always; /* True if intercept is always on */
> + bool always; /* True if intercept is always off */
> } direct_access_msrs[] = {
> { .index = MSR_STAR, .always = true },
> { .index = MSR_IA32_SYSENTER_CS, .always = true },
>
The function set_msr_interception() is really named confusingly.
If we pass in a "1", we clear the bit, resulting in no interception.
So 1==no intercept, 0==intercept. This function would be better named
"disable_msr_interception" or sth. like that.
Reviewed-by: David Hildenbrand <[email protected]>
--
Thanks,
David / dhildenb
On Sat, 13 Jan 2018, Paolo Bonzini wrote:
> Just add the new MSR at the end of the array.
I'm assuming you meant emulated_msrs[], correct?
--
Eric Wheeler
>
> Paolo
>
> ----- Eric Wheeler <[email protected]> ha scritto:
> > On Tue, 9 Jan 2018, Paolo Bonzini wrote:
> >
> > > Expose them to userspace, now that guests can use them.
> > > I am not adding cpufeatures here to avoid having a kernel
> > > that shows spec_ctrl in /proc/cpuinfo and actually has no
> > > support whatsoever for IBRS/IBPB. Keep the ugly special-casing
> > > for now, and clean it up once the generic arch/x86/ code
> > > learns about them.
> > >
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > index daa1918031df..4abb37d9f4d8 100644
> > > --- a/arch/x86/kvm/x86.c
> > > +++ b/arch/x86/kvm/x86.c
> > > @@ -1032,6 +1032,7 @@ unsigned int kvm_get_pt_addr_cnt(void)
> > > MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> > > MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> > > MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> > > + MSR_IA32_SPEC_CTRL,
> > > };
> >
> > Hi Paolo,
> >
> > Thank you for posting this!
> >
> > I am trying to merge this into 4.14 which does not have
> > kvm_get_pt_addr_cnt. The rest of the patch commits, but this gets
> > rejected. Is this a necessary part of the commit?
> >
> > patching file arch/x86/kvm/cpuid.c
> > Hunk #1 succeeded at 389 (offset -8 lines).
> > Hunk #2 succeeded at 479 (offset -9 lines).
> > Hunk #3 succeeded at 636 (offset -27 lines).
> > patching file arch/x86/kvm/x86.c
> > Hunk #1 FAILED at 1032.
> > 1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kvm/x86.c.rej
> >
> > ]# cat arch/x86/kvm/x86.c.rej
> > --- arch/x86/kvm/x86.c
> > +++ arch/x86/kvm/x86.c
> > @@ -1032,6 +1032,7 @@
> > MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> > MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> > MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> > + MSR_IA32_SPEC_CTRL,
> > };
> >
> > static unsigned num_msrs_to_save;
> >
> >
> > Thank you for your help!
> >
> > --
> > Eric Wheeler
> >
> >
> > >
> > > static unsigned num_msrs_to_save;
> > > --
> > > 1.8.3.1
> > >
> > >
> > >
>
>
On Tue, 9 Jan 2018, Paolo Bonzini wrote:
> MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
> Check that against host CPUID or guest CPUID, respectively for
> host-initiated and guest-initiated accesses.
Hi Radim, Paolo:
In porting this patch series to v4.14, I'm getting this BUILD_BUG_ON:
In file included from arch/x86/kvm/vmx.c:21:0:
In function 'x86_feature_cpuid',
inlined from 'guest_cpuid_get_register' at arch/x86/kvm/cpuid.h:72:25,
inlined from 'vmx_get_msr' at arch/x86/kvm/cpuid.h:101:6:
arch/x86/kvm/cpuid.h:64:232: error: call to '__compiletime_assert_64'
declared with attribute error: BUILD_BUG_ON failed:
reverse_cpuid[x86_leaf].function == 0
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
^
In function 'x86_feature_cpuid',
inlined from 'guest_cpuid_get_register' at arch/x86/kvm/cpuid.h:72:25,
inlined from 'vmx_set_msr' at arch/x86/kvm/cpuid.h:101:6:
arch/x86/kvm/cpuid.h:64:232: error: call to '__compiletime_assert_64'
declared with attribute error: BUILD_BUG_ON failed:
reverse_cpuid[x86_leaf].function == 0
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
I think this is caused by the following call stack for
X86_FEATURE_SPEC_CTRL, but if not please correct me here:
arch/x86/kvm/vmx.c:
vmx_get_msr/vmx_set_msr()
guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)
guest_cpuid_get_register(vcpu, x86_feature);
x86_feature_cpuid(x86_feature);
x86_feature_cpuid(unsigned x86_feature)
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
It looks like I need to add something to reverse_cpuid[] but I'm not sure
what.
Do you know what needs to be added here?
-Eric
--
Eric Wheeler
>
> Suggested-by: Jim Mattson <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
> I still wanted to ack Jim's improvement.
>
> arch/x86/kvm/svm.c | 8 ++++++++
> arch/x86/kvm/vmx.c | 8 ++++++++
> 2 files changed, 16 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 97126c2bd663..3a646580d7c5 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = svm->nested.vm_cr_msr;
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> msr_info->data = svm->spec_ctrl;
> break;
> case MSR_IA32_UCODE_REV:
> @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> svm->spec_ctrl = data;
> break;
> case MSR_IA32_APICBASE:
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 49b4a2d61603..42bc7ee293e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = guest_read_tsc(vcpu);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> msr_info->data = to_vmx(vcpu)->spec_ctrl;
> break;
> case MSR_IA32_SYSENTER_CS:
> @@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> kvm_write_tsc(vcpu, msr_info);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> to_vmx(vcpu)->spec_ctrl = data;
> break;
> case MSR_IA32_CR_PAT:
> --
> 1.8.3.1
>
>
----- Eric Wheeler <[email protected]> ha scritto:
> On Sat, 13 Jan 2018, Paolo Bonzini wrote:
>
> > Just add the new MSR at the end of the array.
>
> I'm assuming you meant emulated_msrs[], correct?
No, msrs_to_save. It's just above emulated_msrs.
Paolo
>
>
> --
> Eric Wheeler
>
>
>
> >
> > Paolo
> >
> > ----- Eric Wheeler <[email protected]> ha scritto:
> > > On Tue, 9 Jan 2018, Paolo Bonzini wrote:
> > >
> > > > Expose them to userspace, now that guests can use them.
> > > > I am not adding cpufeatures here to avoid having a kernel
> > > > that shows spec_ctrl in /proc/cpuinfo and actually has no
> > > > support whatsoever for IBRS/IBPB. Keep the ugly special-casing
> > > > for now, and clean it up once the generic arch/x86/ code
> > > > learns about them.
> > > >
> > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > index daa1918031df..4abb37d9f4d8 100644
> > > > --- a/arch/x86/kvm/x86.c
> > > > +++ b/arch/x86/kvm/x86.c
> > > > @@ -1032,6 +1032,7 @@ unsigned int kvm_get_pt_addr_cnt(void)
> > > > MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> > > > MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> > > > MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> > > > + MSR_IA32_SPEC_CTRL,
> > > > };
> > >
> > > Hi Paolo,
> > >
> > > Thank you for posting this!
> > >
> > > I am trying to merge this into 4.14 which does not have
> > > kvm_get_pt_addr_cnt. The rest of the patch commits, but this gets
> > > rejected. Is this a necessary part of the commit?
> > >
> > > patching file arch/x86/kvm/cpuid.c
> > > Hunk #1 succeeded at 389 (offset -8 lines).
> > > Hunk #2 succeeded at 479 (offset -9 lines).
> > > Hunk #3 succeeded at 636 (offset -27 lines).
> > > patching file arch/x86/kvm/x86.c
> > > Hunk #1 FAILED at 1032.
> > > 1 out of 1 hunk FAILED -- saving rejects to file arch/x86/kvm/x86.c.rej
> > >
> > > ]# cat arch/x86/kvm/x86.c.rej
> > > --- arch/x86/kvm/x86.c
> > > +++ arch/x86/kvm/x86.c
> > > @@ -1032,6 +1032,7 @@
> > > MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
> > > MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
> > > MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
> > > + MSR_IA32_SPEC_CTRL,
> > > };
> > >
> > > static unsigned num_msrs_to_save;
> > >
> > >
> > > Thank you for your help!
> > >
> > > --
> > > Eric Wheeler
> > >
> > >
> > > >
> > > > static unsigned num_msrs_to_save;
> > > > --
> > > > 1.8.3.1
> > > >
> > > >
> > > >
> >
> >
On 16/01/2018 01:55, Eric Wheeler wrote:
> On Tue, 9 Jan 2018, Paolo Bonzini wrote:
>> MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
>> Check that against host CPUID or guest CPUID, respectively for
>> host-initiated and guest-initiated accesses.
>
> Hi Radim, Paolo:
>
> In porting this patch series to v4.14
Just don't apply this patch.
Paolo
> In file included from arch/x86/kvm/vmx.c:21:0:
> In function 'x86_feature_cpuid',
> inlined from 'guest_cpuid_get_register' at arch/x86/kvm/cpuid.h:72:25,
> inlined from 'vmx_get_msr' at arch/x86/kvm/cpuid.h:101:6:
> arch/x86/kvm/cpuid.h:64:232: error: call to '__compiletime_assert_64'
> declared with attribute error: BUILD_BUG_ON failed:
> reverse_cpuid[x86_leaf].function == 0
> BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
>
> ^
> In function 'x86_feature_cpuid',
> inlined from 'guest_cpuid_get_register' at arch/x86/kvm/cpuid.h:72:25,
> inlined from 'vmx_set_msr' at arch/x86/kvm/cpuid.h:101:6:
> arch/x86/kvm/cpuid.h:64:232: error: call to '__compiletime_assert_64'
> declared with attribute error: BUILD_BUG_ON failed:
> reverse_cpuid[x86_leaf].function == 0
> BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
>
>
> I think this is caused by the following call stack for
> X86_FEATURE_SPEC_CTRL, but if not please correct me here:
>
> arch/x86/kvm/vmx.c:
> vmx_get_msr/vmx_set_msr()
> guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)
> guest_cpuid_get_register(vcpu, x86_feature);
> x86_feature_cpuid(x86_feature);
> x86_feature_cpuid(unsigned x86_feature)
> BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
>
>
> It looks like I need to add something to reverse_cpuid[] but I'm not sure
> what.
>
> Do you know what needs to be added here?
>
> -Eric
>
> --
> Eric Wheeler
>
>
>
>>
>> Suggested-by: Jim Mattson <[email protected]>
>> Signed-off-by: Paolo Bonzini <[email protected]>
>> ---
>> This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
>> I still wanted to ack Jim's improvement.
>>
>> arch/x86/kvm/svm.c | 8 ++++++++
>> arch/x86/kvm/vmx.c | 8 ++++++++
>> 2 files changed, 16 insertions(+)
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 97126c2bd663..3a646580d7c5 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> msr_info->data = svm->nested.vm_cr_msr;
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> msr_info->data = svm->spec_ctrl;
>> break;
>> case MSR_IA32_UCODE_REV:
>> @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
>> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> svm->spec_ctrl = data;
>> break;
>> case MSR_IA32_APICBASE:
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 49b4a2d61603..42bc7ee293e4 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> msr_info->data = guest_read_tsc(vcpu);
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> msr_info->data = to_vmx(vcpu)->spec_ctrl;
>> break;
>> case MSR_IA32_SYSENTER_CS:
>> @@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> kvm_write_tsc(vcpu, msr_info);
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> to_vmx(vcpu)->spec_ctrl = data;
>> break;
>> case MSR_IA32_CR_PAT:
>> --
>> 1.8.3.1
>>
>>
Hello Paolo,
I've back ported this patch on 4.1, after adding the per-vcpu MSR
bitmap. Also enabled the SPEC_CTRL_MSR intercept if qemu instructed so [1].
Reviewed-by: Mihai Carabas <[email protected]>
[1]
+++ b/arch/x86/kvm/vmx.c
@@ -8391,6 +8391,16 @@ static struct kvm_vcpu *vmx_create_vcpu(struct
kvm *kvm, unsigned int id)
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD,
MSR_TYPE_R | MSR_TYPE_W);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_BNDCFGS,
MSR_TYPE_R | MSR_TYPE_W);
+ /*
+ * If the physical CPU or the vCPU of this VM doesn't
+ * support SPEC_CTRL feature, catch each access to it.
+ */
+ if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ !guest_cpuid_has_spec_ctrl(&vmx->vcpu))
+ vmx_enable_intercept_for_msr(
+ msr_bitmap,
+ MSR_IA32_SPEC_CTRL,
+ MSR_TYPE_R | MSR_TYPE_W);
/*
* If PML is turned on, failure on enabling PML just results in
failure
On 09.01.2018 14:03, Paolo Bonzini wrote:
> MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
> Check that against host CPUID or guest CPUID, respectively for
> host-initiated and guest-initiated accesses.
>
> Suggested-by: Jim Mattson <[email protected]>
> Signed-off-by: Paolo Bonzini <[email protected]>
> ---
> This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
> I still wanted to ack Jim's improvement.
>
> arch/x86/kvm/svm.c | 8 ++++++++
> arch/x86/kvm/vmx.c | 8 ++++++++
> 2 files changed, 16 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 97126c2bd663..3a646580d7c5 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = svm->nested.vm_cr_msr;
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> msr_info->data = svm->spec_ctrl;
> break;
> case MSR_IA32_UCODE_REV:
> @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> svm->spec_ctrl = data;
> break;
> case MSR_IA32_APICBASE:
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 49b4a2d61603..42bc7ee293e4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = guest_read_tsc(vcpu);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> msr_info->data = to_vmx(vcpu)->spec_ctrl;
> break;
> case MSR_IA32_SYSENTER_CS:
> @@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> kvm_write_tsc(vcpu, msr_info);
> break;
> case MSR_IA32_SPEC_CTRL:
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + (!msr_info->host_initiated &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
> + return 1;
> to_vmx(vcpu)->spec_ctrl = data;
> break;
> case MSR_IA32_CR_PAT:
>
All MSR intercepts are enabled by default, so I don't think this patch
does anything at all, unless I'm missing some context.
On Tue, Jan 30, 2018 at 5:21 AM, Mihai Carabas <[email protected]> wrote:
> Hello Paolo,
>
> I've back ported this patch on 4.1, after adding the per-vcpu MSR bitmap.
> Also enabled the SPEC_CTRL_MSR intercept if qemu instructed so [1].
>
> Reviewed-by: Mihai Carabas <[email protected]>
>
> [1]
> +++ b/arch/x86/kvm/vmx.c
> @@ -8391,6 +8391,16 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm
> *kvm, unsigned int id)
> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD,
> MSR_TYPE_R | MSR_TYPE_W);
> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_BNDCFGS,
> MSR_TYPE_R | MSR_TYPE_W);
>
> + /*
> + * If the physical CPU or the vCPU of this VM doesn't
> + * support SPEC_CTRL feature, catch each access to it.
> + */
> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + !guest_cpuid_has_spec_ctrl(&vmx->vcpu))
> + vmx_enable_intercept_for_msr(
> + msr_bitmap,
> + MSR_IA32_SPEC_CTRL,
> + MSR_TYPE_R | MSR_TYPE_W);
>
> /*
> * If PML is turned on, failure on enabling PML just results in
> failure
>
>
>
> On 09.01.2018 14:03, Paolo Bonzini wrote:
>>
>> MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
>> Check that against host CPUID or guest CPUID, respectively for
>> host-initiated and guest-initiated accesses.
>>
>> Suggested-by: Jim Mattson <[email protected]>
>> Signed-off-by: Paolo Bonzini <[email protected]>
>> ---
>> This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
>> I still wanted to ack Jim's improvement.
>>
>> arch/x86/kvm/svm.c | 8 ++++++++
>> arch/x86/kvm/vmx.c | 8 ++++++++
>> 2 files changed, 16 insertions(+)
>>
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 97126c2bd663..3a646580d7c5 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>> msr_info->data = svm->nested.vm_cr_msr;
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu,
>> X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> msr_info->data = svm->spec_ctrl;
>> break;
>> case MSR_IA32_UCODE_REV:
>> @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr)
>> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data
>> 0x%llx\n", ecx, data);
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu,
>> X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> svm->spec_ctrl = data;
>> break;
>> case MSR_IA32_APICBASE:
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 49b4a2d61603..42bc7ee293e4 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>> msr_info->data = guest_read_tsc(vcpu);
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> msr_info->data = to_vmx(vcpu)->spec_ctrl;
>> break;
>> case MSR_IA32_SYSENTER_CS:
>> @@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
>> struct msr_data *msr_info)
>> kvm_write_tsc(vcpu, msr_info);
>> break;
>> case MSR_IA32_SPEC_CTRL:
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + (!msr_info->host_initiated &&
>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>> + return 1;
>> to_vmx(vcpu)->spec_ctrl = data;
>> break;
>> case MSR_IA32_CR_PAT:
>>
>
On 30.01.2018 18:33, Jim Mattson wrote:
> All MSR intercepts are enabled by default, so I don't think this patch
> does anything at all, unless I'm missing some context.
>
Currently on upstream some MSR are intercepted:
https://github.com/torvalds/linux/blob/master/arch/x86/kvm/vmx.c#L6838
In particular to this patch, the MSR_IA32_SPEC_CTRL intercept is
disabled in 3/8: https://patchwork.kernel.org/patch/10151889/
> On Tue, Jan 30, 2018 at 5:21 AM, Mihai Carabas <[email protected]> wrote:
>> Hello Paolo,
>>
>> I've back ported this patch on 4.1, after adding the per-vcpu MSR bitmap.
>> Also enabled the SPEC_CTRL_MSR intercept if qemu instructed so [1].
>>
>> Reviewed-by: Mihai Carabas <[email protected]>
>>
>> [1]
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -8391,6 +8391,16 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm
>> *kvm, unsigned int id)
>> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD,
>> MSR_TYPE_R | MSR_TYPE_W);
>> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_BNDCFGS,
>> MSR_TYPE_R | MSR_TYPE_W);
>>
>> + /*
>> + * If the physical CPU or the vCPU of this VM doesn't
>> + * support SPEC_CTRL feature, catch each access to it.
>> + */
>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + !guest_cpuid_has_spec_ctrl(&vmx->vcpu))
>> + vmx_enable_intercept_for_msr(
>> + msr_bitmap,
>> + MSR_IA32_SPEC_CTRL,
>> + MSR_TYPE_R | MSR_TYPE_W);
>>
>> /*
>> * If PML is turned on, failure on enabling PML just results in
>> failure
>>
>>
>>
>> On 09.01.2018 14:03, Paolo Bonzini wrote:
>>>
>>> MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
>>> Check that against host CPUID or guest CPUID, respectively for
>>> host-initiated and guest-initiated accesses.
>>>
>>> Suggested-by: Jim Mattson <[email protected]>
>>> Signed-off-by: Paolo Bonzini <[email protected]>
>>> ---
>>> This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
>>> I still wanted to ack Jim's improvement.
>>>
>>> arch/x86/kvm/svm.c | 8 ++++++++
>>> arch/x86/kvm/vmx.c | 8 ++++++++
>>> 2 files changed, 16 insertions(+)
>>>
>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>> index 97126c2bd663..3a646580d7c5 100644
>>> --- a/arch/x86/kvm/svm.c
>>> +++ b/arch/x86/kvm/svm.c
>>> @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu,
>>> struct msr_data *msr_info)
>>> msr_info->data = svm->nested.vm_cr_msr;
>>> break;
>>> case MSR_IA32_SPEC_CTRL:
>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>> + (!msr_info->host_initiated &&
>>> + !guest_cpuid_has(vcpu,
>>> X86_FEATURE_SPEC_CTRL)))
>>> + return 1;
>>> msr_info->data = svm->spec_ctrl;
>>> break;
>>> case MSR_IA32_UCODE_REV:
>>> @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu,
>>> struct msr_data *msr)
>>> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data
>>> 0x%llx\n", ecx, data);
>>> break;
>>> case MSR_IA32_SPEC_CTRL:
>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>> + (!msr_info->host_initiated &&
>>> + !guest_cpuid_has(vcpu,
>>> X86_FEATURE_SPEC_CTRL)))
>>> + return 1;
>>> svm->spec_ctrl = data;
>>> break;
>>> case MSR_IA32_APICBASE:
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 49b4a2d61603..42bc7ee293e4 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
>>> struct msr_data *msr_info)
>>> msr_info->data = guest_read_tsc(vcpu);
>>> break;
>>> case MSR_IA32_SPEC_CTRL:
>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>> + (!msr_info->host_initiated &&
>>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>>> + return 1;
>>> msr_info->data = to_vmx(vcpu)->spec_ctrl;
>>> break;
>>> case MSR_IA32_SYSENTER_CS:
>>> @@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
>>> struct msr_data *msr_info)
>>> kvm_write_tsc(vcpu, msr_info);
>>> break;
>>> case MSR_IA32_SPEC_CTRL:
>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>> + (!msr_info->host_initiated &&
>>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>>> + return 1;
>>> to_vmx(vcpu)->spec_ctrl = data;
>>> break;
>>> case MSR_IA32_CR_PAT:
>>>
>>
It's really hard to tell which patches are being proposed for which
repositories, but assuming that everything else is correct, I don't
think your condition is adequate. What if the physical CPU and the
virtual CPU both have CPUID.(EAX=7H,ECX=0):EDX[26], but only the
physical CPU has CPUID.(EAX=7H,ECX=0):EDX[27]? If the guest has write
access to MSR_IA32_SPEC_CTRL, it can set MSR_IA32_SPEC_CTRL[1]
(STIBP), even though setting that bit in the guest should raise #GP.
On Tue, Jan 30, 2018 at 8:43 AM, Mihai Carabas <[email protected]> wrote:
> On 30.01.2018 18:33, Jim Mattson wrote:
>>
>> All MSR intercepts are enabled by default, so I don't think this patch
>> does anything at all, unless I'm missing some context.
>>
>
> Currently on upstream some MSR are intercepted:
> https://github.com/torvalds/linux/blob/master/arch/x86/kvm/vmx.c#L6838
>
> In particular to this patch, the MSR_IA32_SPEC_CTRL intercept is disabled in
> 3/8: https://patchwork.kernel.org/patch/10151889/
>
>
>
>> On Tue, Jan 30, 2018 at 5:21 AM, Mihai Carabas <[email protected]>
>> wrote:
>>>
>>> Hello Paolo,
>>>
>>> I've back ported this patch on 4.1, after adding the per-vcpu MSR bitmap.
>>> Also enabled the SPEC_CTRL_MSR intercept if qemu instructed so [1].
>>>
>>> Reviewed-by: Mihai Carabas <[email protected]>
>>>
>>> [1]
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -8391,6 +8391,16 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm
>>> *kvm, unsigned int id)
>>> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_PRED_CMD,
>>> MSR_TYPE_R | MSR_TYPE_W);
>>> vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_BNDCFGS,
>>> MSR_TYPE_R | MSR_TYPE_W);
>>>
>>> + /*
>>> + * If the physical CPU or the vCPU of this VM doesn't
>>> + * support SPEC_CTRL feature, catch each access to it.
>>> + */
>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>> + !guest_cpuid_has_spec_ctrl(&vmx->vcpu))
>>> + vmx_enable_intercept_for_msr(
>>> + msr_bitmap,
>>> + MSR_IA32_SPEC_CTRL,
>>> + MSR_TYPE_R | MSR_TYPE_W);
>>>
>>> /*
>>> * If PML is turned on, failure on enabling PML just results in
>>> failure
>>>
>>>
>>>
>>> On 09.01.2018 14:03, Paolo Bonzini wrote:
>>>>
>>>>
>>>> MSR_IA32_SPEC_CTRL is not available unless CPU[7,0].EDX[26] is 1.
>>>> Check that against host CPUID or guest CPUID, respectively for
>>>> host-initiated and guest-initiated accesses.
>>>>
>>>> Suggested-by: Jim Mattson <[email protected]>
>>>> Signed-off-by: Paolo Bonzini <[email protected]>
>>>> ---
>>>> This is for after X86_FEATURE_SPEC_CTRL is added to Linux, but
>>>> I still wanted to ack Jim's improvement.
>>>>
>>>> arch/x86/kvm/svm.c | 8 ++++++++
>>>> arch/x86/kvm/vmx.c | 8 ++++++++
>>>> 2 files changed, 16 insertions(+)
>>>>
>>>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>>>> index 97126c2bd663..3a646580d7c5 100644
>>>> --- a/arch/x86/kvm/svm.c
>>>> +++ b/arch/x86/kvm/svm.c
>>>> @@ -3648,6 +3648,10 @@ static int svm_get_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>> msr_info->data = svm->nested.vm_cr_msr;
>>>> break;
>>>> case MSR_IA32_SPEC_CTRL:
>>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>>> + (!msr_info->host_initiated &&
>>>> + !guest_cpuid_has(vcpu,
>>>> X86_FEATURE_SPEC_CTRL)))
>>>> + return 1;
>>>> msr_info->data = svm->spec_ctrl;
>>>> break;
>>>> case MSR_IA32_UCODE_REV:
>>>> @@ -3806,6 +3810,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr)
>>>> vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data
>>>> 0x%llx\n", ecx, data);
>>>> break;
>>>> case MSR_IA32_SPEC_CTRL:
>>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>>> + (!msr_info->host_initiated &&
>>>> + !guest_cpuid_has(vcpu,
>>>> X86_FEATURE_SPEC_CTRL)))
>>>> + return 1;
>>>> svm->spec_ctrl = data;
>>>> break;
>>>> case MSR_IA32_APICBASE:
>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>> index 49b4a2d61603..42bc7ee293e4 100644
>>>> --- a/arch/x86/kvm/vmx.c
>>>> +++ b/arch/x86/kvm/vmx.c
>>>> @@ -3368,6 +3368,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>> msr_info->data = guest_read_tsc(vcpu);
>>>> break;
>>>> case MSR_IA32_SPEC_CTRL:
>>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>>> + (!msr_info->host_initiated &&
>>>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>>>> + return 1;
>>>> msr_info->data = to_vmx(vcpu)->spec_ctrl;
>>>> break;
>>>> case MSR_IA32_SYSENTER_CS:
>>>> @@ -3510,6 +3514,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu,
>>>> struct msr_data *msr_info)
>>>> kvm_write_tsc(vcpu, msr_info);
>>>> break;
>>>> case MSR_IA32_SPEC_CTRL:
>>>> + if (!static_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>>>> + (!msr_info->host_initiated &&
>>>> + !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)))
>>>> + return 1;
>>>> to_vmx(vcpu)->spec_ctrl = data;
>>>> break;
>>>> case MSR_IA32_CR_PAT:
>>>>
>>>
>
On Tue, 2018-01-30 at 08:57 -0800, Jim Mattson wrote:
> It's really hard to tell which patches are being proposed for which
> repositories, but assuming that everything else is correct, I don't
> think your condition is adequate. What if the physical CPU and the
> virtual CPU both have CPUID.(EAX=7H,ECX=0):EDX[26], but only the
> physical CPU has CPUID.(EAX=7H,ECX=0):EDX[27]? If the guest has write
> access to MSR_IA32_SPEC_CTRL, it can set MSR_IA32_SPEC_CTRL[1]
> (STIBP), even though setting that bit in the guest should raise #GP.
Everything we're talking about here is for tip/x86/pti. Which I note
has just updated to be 4.15-based, although I thought it was going to
stay on 4.14 for now. So I've updated my tree at
http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
accordingly.
You can always write to the STIBP bit without a #GP even when it's not
advertised/available.
There's a possibility that we'll want to always trap and *prevent*
that, instead of passing through — because doing so will also have an
effect on the HT siblings. But as discussed, I wanted to get the basics
working before exploring the complex IBRS/STIBP interactions. This much
should be OK to start with.
On Tue, Jan 30, 2018 at 9:14 AM, David Woodhouse <[email protected]> wrote:
> On Tue, 2018-01-30 at 08:57 -0800, Jim Mattson wrote:
>> It's really hard to tell which patches are being proposed for which
>> repositories, but assuming that everything else is correct, I don't
>> think your condition is adequate. What if the physical CPU and the
>> virtual CPU both have CPUID.(EAX=7H,ECX=0):EDX[26], but only the
>> physical CPU has CPUID.(EAX=7H,ECX=0):EDX[27]? If the guest has write
>> access to MSR_IA32_SPEC_CTRL, it can set MSR_IA32_SPEC_CTRL[1]
>> (STIBP), even though setting that bit in the guest should raise #GP.
>
> Everything we're talking about here is for tip/x86/pti. Which I note
> has just updated to be 4.15-based, although I thought it was going to
> stay on 4.14 for now. So I've updated my tree at
> http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
> accordingly.
>
> You can always write to the STIBP bit without a #GP even when it's not
> advertised/available.
Oops. Yes, you're right. It's writing the IBRS bit when only STIBP is
available that results in a #GP.
> There's a possibility that we'll want to always trap and *prevent*
> that, instead of passing through — because doing so will also have an
> effect on the HT siblings. But as discussed, I wanted to get the basics
> working before exploring the complex IBRS/STIBP interactions. This much
> should be OK to start with.
On Tue, 30 Jan 2018, David Woodhouse wrote:
> On Tue, 2018-01-30 at 08:57 -0800, Jim Mattson wrote:
> > It's really hard to tell which patches are being proposed for which
> > repositories, but assuming that everything else is correct, I don't
> > think your condition is adequate. What if the physical CPU and the
> > virtual CPU both have CPUID.(EAX=7H,ECX=0):EDX[26], but only the
> > physical CPU has CPUID.(EAX=7H,ECX=0):EDX[27]? If the guest has write
> > access to MSR_IA32_SPEC_CTRL, it can set MSR_IA32_SPEC_CTRL[1]
> > (STIBP), even though setting that bit in the guest should raise #GP.
>
> Everything we're talking about here is for tip/x86/pti. Which I note
> has just updated to be 4.15-based, although I thought it was going to
> stay on 4.14 for now. So I've updated my tree at
> http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
> accordingly.
Yes, we tried to stay on 4.14 base but this started to created nasty merge
conflicts for no value. Merging in v4.15 turned out to resolve those issues
while still serving as the feed branch for Gregs stable work. For the time
being we try to make stable backporting at least for 4.14/15 as painless as
possible.
Thanks,
tglx
On Tue, 2018-01-30 at 18:11 -0500, Paolo Bonzini wrote:
> Great, then the "per-VCPU MSR bitmaps" branch
> (git://git.kernel.org/pub/scm/virt/kvm/kvm.git refs/heads/msr-bitmaps)
> that I created last weekend can be pulled directly in tip/x86/pti.
>
> It cherry-picks directly to both 4.14 so it will be fine for Greg too.
Right, I've switched to pulling that branch in to my tree at
http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
I seem to recall there was some hecking at it, and I expected it to
change? :)
On 30/01/2018 18:47, David Woodhouse wrote:
> On Tue, 2018-01-30 at 18:11 -0500, Paolo Bonzini wrote:
>
>> Great, then the "per-VCPU MSR bitmaps" branch
>> (git://git.kernel.org/pub/scm/virt/kvm/kvm.git refs/heads/msr-bitmaps)
>> that I created last weekend can be pulled directly in tip/x86/pti.
>>
>> It cherry-picks directly to both 4.14 so it will be fine for Greg too.
>
> Right, I've switched to pulling that branch in to my tree at
> http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
>
> I seem to recall there was some hecking at it, and I expected it to
> change? :)
No, apart from adding a "!" in v1->v2 it's all fixed. SHA1 ids won't
change.
Paolo
On 30/01/2018 12:45, Thomas Gleixner wrote:
> On Tue, 30 Jan 2018, David Woodhouse wrote:
>
>> On Tue, 2018-01-30 at 08:57 -0800, Jim Mattson wrote:
>>> It's really hard to tell which patches are being proposed for which
>>> repositories, but assuming that everything else is correct, I don't
>>> think your condition is adequate. What if the physical CPU and the
>>> virtual CPU both have CPUID.(EAX=7H,ECX=0):EDX[26], but only the
>>> physical CPU has CPUID.(EAX=7H,ECX=0):EDX[27]? If the guest has write
>>> access to MSR_IA32_SPEC_CTRL, it can set MSR_IA32_SPEC_CTRL[1]
>>> (STIBP), even though setting that bit in the guest should raise #GP.
>>
>> Everything we're talking about here is for tip/x86/pti. Which I note
>> has just updated to be 4.15-based, although I thought it was going to
>> stay on 4.14 for now. So I've updated my tree at
>> http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
>> accordingly.
>
> Yes, we tried to stay on 4.14 base but this started to created nasty merge
> conflicts for no value. Merging in v4.15 turned out to resolve those issues
> while still serving as the feed branch for Gregs stable work. For the time
> being we try to make stable backporting at least for 4.14/15 as painless as
> possible.
Great, then the "per-VCPU MSR bitmaps" branch
(git://git.kernel.org/pub/scm/virt/kvm/kvm.git refs/heads/msr-bitmaps)
that I created last weekend can be pulled directly in tip/x86/pti.
It cherry-picks directly to both 4.14 so it will be fine for Greg too.
Paolo
* Paolo Bonzini <[email protected]> wrote:
> On 30/01/2018 12:45, Thomas Gleixner wrote:
> > On Tue, 30 Jan 2018, David Woodhouse wrote:
> >
> >> On Tue, 2018-01-30 at 08:57 -0800, Jim Mattson wrote:
> >>> It's really hard to tell which patches are being proposed for which
> >>> repositories, but assuming that everything else is correct, I don't
> >>> think your condition is adequate. What if the physical CPU and the
> >>> virtual CPU both have CPUID.(EAX=7H,ECX=0):EDX[26], but only the
> >>> physical CPU has CPUID.(EAX=7H,ECX=0):EDX[27]? If the guest has write
> >>> access to MSR_IA32_SPEC_CTRL, it can set MSR_IA32_SPEC_CTRL[1]
> >>> (STIBP), even though setting that bit in the guest should raise #GP.
> >>
> >> Everything we're talking about here is for tip/x86/pti. Which I note
> >> has just updated to be 4.15-based, although I thought it was going to
> >> stay on 4.14 for now. So I've updated my tree at
> >> http://git.infradead.org/linux-retpoline.git/shortlog/refs/heads/ibpb
> >> accordingly.
> >
> > Yes, we tried to stay on 4.14 base but this started to created nasty merge
> > conflicts for no value. Merging in v4.15 turned out to resolve those issues
> > while still serving as the feed branch for Gregs stable work. For the time
> > being we try to make stable backporting at least for 4.14/15 as painless as
> > possible.
>
> Great, then the "per-VCPU MSR bitmaps" branch
> (git://git.kernel.org/pub/scm/virt/kvm/kvm.git refs/heads/msr-bitmaps)
> that I created last weekend can be pulled directly in tip/x86/pti.
Can this branch still be rebased, to fix the SoB chain of:
de3a0021a606 ("KVM: nVMX: Eliminate vmcs02 pool")
?
I'm not sure what workflow resulted in this commit, but it is missing your SoB:
commit de3a0021a60635de96aa92713c1a31a96747d72c
Author: Jim Mattson <[email protected]>
AuthorDate: Mon Nov 27 17:22:25 2017 -0600
Commit: Paolo Bonzini <[email protected]>
CommitDate: Sat Jan 27 09:43:03 2018 +0100
KVM: nVMX: Eliminate vmcs02 pool
The potential performance advantages of a vmcs02 pool have never been
realized. To simplify the code, eliminate the pool. Instead, a single
vmcs02 is allocated per VCPU when the VCPU enters VMX operation.
Cc: [email protected] # prereq for Spectre mitigation
Signed-off-by: Jim Mattson <[email protected]>
Signed-off-by: Mark Kanda <[email protected]>
Reviewed-by: Ameya More <[email protected]>
Reviewed-by: David Hildenbrand <[email protected]>
Reviewed-by: Paolo Bonzini <[email protected]>
Signed-off-by: Radim Krčmář <[email protected]>
You probably rebased Radim'm tree?
If this tree can still be rebased I'd like to re-pull it into tip:x86/pti, as
those bits are not yet upstream.
Thanks,
Ingo
On Mon, 2018-02-05 at 12:10 +0100, Ingo Molnar wrote:
>
> Can this branch still be rebased, to fix the SoB chain of:
>
> de3a0021a606 ("KVM: nVMX: Eliminate vmcs02 pool")
>
> ?
It can't. Linus pulled it already.
* David Woodhouse <[email protected]> wrote:
> On Mon, 2018-02-05 at 12:10 +0100, Ingo Molnar wrote:
> >
> > Can this branch still be rebased, to fix the SoB chain of:
> >
> > ? de3a0021a606 ("KVM: nVMX: Eliminate vmcs02 pool")
> >
> > ?
>
> It can't. Linus pulled it already.
Ah, indeed, freshly so in 35277995e179 - never mind then!
Thanks,
Ingo