Combining my previous patchset for purely adding the feature and MSR
definitions, with the first four patches that Karim sent out which were
purely about enabling IBPB. This gives us a full retpoline-based
mitigation for Spectre variant 2, and the IBRS option can come later.
I expect further discussion of the final patch to tweak precisely when
we use IBPB in context switch.
---
v2: Fix STIPB/STIBP typo
Fix error in AMD CPUID bit definition (0x8000_0008 EBX[12])
Ashok Raj (1):
x86/kvm: Add IBPB support
David Woodhouse (4):
x86/cpufeatures: Add Intel feature bits for Speculation Control
x86/cpufeatures: Add AMD feature bits for Prediction Command
x86/msr: Add definitions for new speculation control MSRs
x86/pti: Do not enable PTI on fixed Intel processors
Thomas Gleixner (2):
x86/speculation: Add basic support for IBPB
x86/speculation: Use Indirect Branch Prediction Barrier in context
switch
Tim Chen (1):
x86/mm: Only flush indirect branches when switching into non dumpable
process
arch/x86/include/asm/cpufeature.h | 7 +++++--
arch/x86/include/asm/cpufeatures.h | 14 +++++++++++---
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/msr-index.h | 11 +++++++++++
arch/x86/include/asm/nospec-branch.h | 16 ++++++++++++++++
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/bugs.c | 7 +++++++
arch/x86/kernel/cpu/common.c | 10 ++++++++--
arch/x86/kernel/cpu/scattered.c | 3 +--
arch/x86/kvm/svm.c | 14 ++++++++++++++
arch/x86/kvm/vmx.c | 11 +++++++++++
arch/x86/mm/tlb.c | 21 ++++++++++++++++++++-
12 files changed, 108 insertions(+), 12 deletions(-)
--
2.7.4
Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES.
See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/msr-index.h | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index fa11fb1..3e50463 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -42,6 +42,13 @@
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
+#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
+#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
+#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+
+#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
+#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
+
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
@@ -60,6 +67,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
+#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
+#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
--
2.7.4
From: Ashok Raj <[email protected]>
Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
barriers on switching between VMs to avoid inter VM specte-v2 attacks.
[peterz: rebase and changelog rewrite]
[karahmed: - vmx: expose PRED_CMD whenever it is available
- svm: only pass through IBPB if it is available]
[dwmw2: - vmx: allow X86_FEATURE_AMD_PRED_CMD too]
Cc: Asit Mallick <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Arjan Van De Ven <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Jun Nakajima <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Greg KH <[email protected]>
Cc: Paolo Bonzini <[email protected]>
Signed-off-by: Ashok Raj <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: David Woodhouse <[email protected]>
Signed-off-by: KarimAllah Ahmed <[email protected]>
---
arch/x86/kvm/svm.c | 14 ++++++++++++++
arch/x86/kvm/vmx.c | 11 +++++++++++
2 files changed, 25 insertions(+)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2744b973..cfdb9ab 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -529,6 +529,7 @@ struct svm_cpu_data {
struct kvm_ldttss_desc *tss_desc;
struct page *save_area;
+ struct vmcb *current_vmcb;
};
static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
}
+
+ if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD))
+ set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
}
static void add_msr_offset(u32 offset)
@@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, svm);
+ /*
+ * The vmcb page can be recycled, causing a false negative in
+ * svm_vcpu_load(). So do a full IBPB now.
+ */
+ indirect_branch_prediction_barrier();
}
static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
int i;
if (unlikely(cpu != vcpu->cpu)) {
@@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (static_cpu_has(X86_FEATURE_RDTSCP))
wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+ if (sd->current_vmcb != svm->vmcb) {
+ sd->current_vmcb = svm->vmcb;
+ indirect_branch_prediction_barrier();
+ }
avic_vcpu_load(vcpu, cpu);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d1e25db..1e45bb3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
vmcs_load(vmx->loaded_vmcs->vmcs);
+ indirect_branch_prediction_barrier();
}
if (!already_loaded) {
@@ -6791,6 +6792,16 @@ static __init int hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 48;
}
+ /*
+ * The AMD_PRED_CMD bit might be exposed by hypervisors on Intel
+ * chips which only want to expose PRED_CMD to guests and not
+ * SPEC_CTRL. Because PRED_CMD is one-shot write-only, while
+ * PRED_CMD requires storage, live migration support, etc.
+ */
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ boot_cpu_has(X86_FEATURE_AMD_PRED_CMD))
+ vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
+
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
--
2.7.4
From: Andi Kleen <[email protected]>
Flush indirect branches when switching into a process that marked
itself non dumpable. This protects high value processes like gpg
better, without having too high performance overhead.
Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: David Woodhouse <[email protected]>
Signed-off-by: KarimAllah Ahmed <[email protected]>
---
arch/x86/mm/tlb.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 304de7d..f64e80c 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -225,8 +225,19 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* Avoid user/user BTB poisoning by flushing the branch predictor
* when switching between processes. This stops one process from
* doing Spectre-v2 attacks on another.
+ *
+ * As an optimization: Flush indirect branches only when
+ * switching into processes that disable dumping.
+ *
+ * This will not flush when switching into kernel threads.
+ * But it would flush when switching into idle and back
+ *
+ * It might be useful to have a one-off cache here
+ * to also not flush the idle case, but we would need some
+ * kind of stable sequence number to remember the previous mm.
*/
- indirect_branch_prediction_barrier();
+ if (tsk && tsk->mm && get_dumpable(tsk->mm) != SUID_DUMP_USER)
+ indirect_branch_prediction_barrier();
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
--
2.7.4
From: Thomas Gleixner <[email protected]>
Expose indirect_branch_prediction_barrier() for use in subsequent patches.
[karahmed: remove the special-casing of skylake for using IBPB (wtf?),
switch to using ALTERNATIVES instead of static_cpu_has]
[dwmw2: set up ax/cx/dx in the asm too so it gets NOP'd out]
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: KarimAllah Ahmed <[email protected]>
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/nospec-branch.h | 16 ++++++++++++++++
arch/x86/kernel/cpu/bugs.c | 7 +++++++
3 files changed, 24 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 8c9e5c0..cf28399 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -207,6 +207,7 @@
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+#define X86_FEATURE_IBPB ( 7*32+16) /* Using Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 4ad4108..c333c95 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -218,5 +218,21 @@ static inline void vmexit_fill_RSB(void)
#endif
}
+static inline void indirect_branch_prediction_barrier(void)
+{
+ unsigned long ax, cx, dx;
+
+ asm volatile(ALTERNATIVE("",
+ "movl %[msr], %%ecx\n\t"
+ "movl %[val], %%eax\n\t"
+ "movl $0, %%edx\n\t"
+ "wrmsr",
+ X86_FEATURE_IBPB)
+ : "=a" (ax), "=c" (cx), "=d" (dx)
+ : [msr] "i" (MSR_IA32_PRED_CMD),
+ [val] "i" (PRED_CMD_IBPB)
+ : "memory");
+}
+
#endif /* __ASSEMBLY__ */
#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 390b3dc..96548ff 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -249,6 +249,13 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Filling RSB on context switch\n");
}
+
+ /* Initialize Indirect Branch Prediction Barrier if supported */
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
+ boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
+ setup_force_cpu_cap(X86_FEATURE_IBPB);
+ pr_info("Enabling Indirect Branch Prediction Barrier\n");
+ }
}
#undef pr_fmt
--
2.7.4
Add three feature bits exposed by new microcode on Intel CPUs for
speculation control. We would now be up to five bits in CPUID(7).RDX
so take them out of the 'scattered' features and make a proper word
for them instead as that leaf is a pure feature bits leaf.
[bp: heckle commitlog]
Signed-off-by: David Woodhouse <[email protected]>
Reviewed-by: Borislav Petkov <[email protected]>
---
arch/x86/include/asm/cpufeature.h | 7 +++++--
arch/x86/include/asm/cpufeatures.h | 12 +++++++++---
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kernel/cpu/scattered.c | 2 --
6 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ea9a7dd..70eddb3 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -29,6 +29,7 @@ enum cpuid_leafs
CPUID_8000_000A_EDX,
CPUID_7_ECX,
CPUID_8000_0007_EBX,
+ CPUID_7_EDX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
@@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 18))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 19))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 25b9375..2efb8d4 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 18 /* N 32-bit words worth of info */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -206,8 +206,6 @@
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
@@ -319,6 +317,14 @@
#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index e428e16..c6a3af1 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -71,6 +71,7 @@
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57)
#define DISABLED_MASK17 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d91ba04..fb3a6de 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -106,6 +106,7 @@
#define REQUIRED_MASK15 0
#define REQUIRED_MASK16 (NEED_LA57)
#define REQUIRED_MASK17 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 372ba3f..e5d66e9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -745,6 +745,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_0_EBX] = ebx;
c->x86_capability[CPUID_7_ECX] = ecx;
+ c->x86_capability[CPUID_7_EDX] = edx;
}
/* Extended state features: level 0x0000000d */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index d0e6976..df11f5d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -21,8 +21,6 @@ struct cpuid_bit {
static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
- { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 },
- { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 },
{ X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 },
{ X86_FEATURE_CAT_L2, CPUID_EBX, 2, 0x00000010, 0 },
{ X86_FEATURE_CDP_L3, CPUID_ECX, 2, 0x00000010, 1 },
--
2.7.4
From: Thomas Gleixner <[email protected]>
[peterz: comment]
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/mm/tlb.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a156195..304de7d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,13 +6,14 @@
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-#include <linux/debugfs.h>
/*
* TLB flushing, formerly SMP-only
@@ -220,6 +221,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
u16 new_asid;
bool need_flush;
+ /*
+ * Avoid user/user BTB poisoning by flushing the branch predictor
+ * when switching between processes. This stops one process from
+ * doing Spectre-v2 attacks on another.
+ */
+ indirect_branch_prediction_barrier();
+
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
* If our current stack is in vmalloc space and isn't
--
2.7.4
When they advertise the IA32_ARCH_CAPABILITIES MSR and it has the RDCL_NO
bit set, they don't need KPTI either.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/kernel/cpu/common.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e5d66e9..80572ae 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -900,8 +900,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- if (c->x86_vendor != X86_VENDOR_AMD)
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ if (c->x86_vendor != X86_VENDOR_AMD) {
+ unsigned long ia32_cap = 0;
+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (!(ia32_cap & ARCH_CAP_RDCL_NO))
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ }
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
--
2.7.4
AMD doesn't implement the Speculation Control MSR that Intel does, but
the Prediction Control MSR does exist and is advertised by a separate
CPUID bit. Add support for that.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/kernel/cpu/scattered.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 2efb8d4..8c9e5c0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -207,6 +207,7 @@
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+#define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index df11f5d..4eb90b2 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
{ X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+ { X86_FEATURE_AMD_PRED_CMD, CPUID_EBX, 12, 0x80000008, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 }
};
--
2.7.4
* David Woodhouse <[email protected]> wrote:
> Add three feature bits exposed by new microcode on Intel CPUs for
> speculation control. We would now be up to five bits in CPUID(7).RDX
> so take them out of the 'scattered' features and make a proper word
> for them instead as that leaf is a pure feature bits leaf.
>
> [bp: heckle commitlog]
> Signed-off-by: David Woodhouse <[email protected]>
> Reviewed-by: Borislav Petkov <[email protected]>
> ---
> arch/x86/include/asm/cpufeature.h | 7 +++++--
> arch/x86/include/asm/cpufeatures.h | 12 +++++++++---
> arch/x86/include/asm/disabled-features.h | 3 ++-
> arch/x86/include/asm/required-features.h | 3 ++-
> arch/x86/kernel/cpu/common.c | 1 +
> arch/x86/kernel/cpu/scattered.c | 2 --
> 6 files changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
> index ea9a7dd..70eddb3 100644
> --- a/arch/x86/include/asm/cpufeature.h
> +++ b/arch/x86/include/asm/cpufeature.h
> @@ -29,6 +29,7 @@ enum cpuid_leafs
> CPUID_8000_000A_EDX,
> CPUID_7_ECX,
> CPUID_8000_0007_EBX,
> + CPUID_7_EDX,
> };
>
> #ifdef CONFIG_X86_FEATURE_NAMES
> @@ -79,8 +80,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
> CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
> CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
> CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
> + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
> REQUIRED_MASK_CHECK || \
> - BUILD_BUG_ON_ZERO(NCAPINTS != 18))
> + BUILD_BUG_ON_ZERO(NCAPINTS != 19))
>
> #define DISABLED_MASK_BIT_SET(feature_bit) \
> ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
> @@ -101,8 +103,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
> CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
> CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
> CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
> + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
> DISABLED_MASK_CHECK || \
> - BUILD_BUG_ON_ZERO(NCAPINTS != 18))
> + BUILD_BUG_ON_ZERO(NCAPINTS != 19))
>
> #define cpu_has(c, bit) \
> (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 25b9375..2efb8d4 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -13,7 +13,7 @@
> /*
> * Defines x86 CPU feature bits
> */
> -#define NCAPINTS 18 /* N 32-bit words worth of info */
> +#define NCAPINTS 19 /* N 32-bit words worth of info */
> #define NBUGINTS 1 /* N 32-bit bug flags */
>
> /*
> @@ -206,8 +206,6 @@
> #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
> -#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
> -#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
>
> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
> @@ -319,6 +317,14 @@
> #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
> #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
>
> +/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
> +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
> +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
> +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
> +#define X86_FEATURE_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
> +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
Ok, this patch should be split into at least two patches:
- one extending the feature words from 18 to 19 and moving the two vector
computing feature bits
- a separate one adding the speculation control bits
Thanks,
Ingo
* David Woodhouse <[email protected]> wrote:
> From: Thomas Gleixner <[email protected]>
>
> Expose indirect_branch_prediction_barrier() for use in subsequent patches.
>
> [karahmed: remove the special-casing of skylake for using IBPB (wtf?),
> switch to using ALTERNATIVES instead of static_cpu_has]
> [dwmw2: set up ax/cx/dx in the asm too so it gets NOP'd out]
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Signed-off-by: KarimAllah Ahmed <[email protected]>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> arch/x86/include/asm/cpufeatures.h | 1 +
> arch/x86/include/asm/nospec-branch.h | 16 ++++++++++++++++
> arch/x86/kernel/cpu/bugs.c | 7 +++++++
> 3 files changed, 24 insertions(+)
>
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 8c9e5c0..cf28399 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -207,6 +207,7 @@
> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
>
> +#define X86_FEATURE_IBPB ( 7*32+16) /* Using Indirect Branch Prediction Barrier */
> #define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 4ad4108..c333c95 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -218,5 +218,21 @@ static inline void vmexit_fill_RSB(void)
> #endif
> }
>
> +static inline void indirect_branch_prediction_barrier(void)
> +{
> + unsigned long ax, cx, dx;
> +
> + asm volatile(ALTERNATIVE("",
> + "movl %[msr], %%ecx\n\t"
> + "movl %[val], %%eax\n\t"
> + "movl $0, %%edx\n\t"
> + "wrmsr",
> + X86_FEATURE_IBPB)
> + : "=a" (ax), "=c" (cx), "=d" (dx)
> + : [msr] "i" (MSR_IA32_PRED_CMD),
> + [val] "i" (PRED_CMD_IBPB)
> + : "memory");
> +}
> +
> #endif /* __ASSEMBLY__ */
> #endif /* __NOSPEC_BRANCH_H__ */
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 390b3dc..96548ff 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -249,6 +249,13 @@ static void __init spectre_v2_select_mitigation(void)
> setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> pr_info("Filling RSB on context switch\n");
> }
> +
> + /* Initialize Indirect Branch Prediction Barrier if supported */
> + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
> + setup_force_cpu_cap(X86_FEATURE_IBPB);
> + pr_info("Enabling Indirect Branch Prediction Barrier\n");
> + }
> }
I'd suggest writing out the common 'IBPB' acronym in the messages as well:
pr_info("Enabling Indirect Branch Prediction Barrier (IBPB)\n");
Also, the kernel's barrier*() namespace as it exists today is:
barrier()
barrier_data()
I think the better name to introduce would be:
barrier_indirect_branch_prediction()
to maintain barrier_ as a prefix.
Thanks,
Ingo
* David Woodhouse <[email protected]> wrote:
> From: Andi Kleen <[email protected]>
>
> Flush indirect branches when switching into a process that marked
> itself non dumpable. This protects high value processes like gpg
> better, without having too high performance overhead.
>
> Signed-off-by: Andi Kleen <[email protected]>
> Signed-off-by: David Woodhouse <[email protected]>
> Signed-off-by: KarimAllah Ahmed <[email protected]>
> ---
> arch/x86/mm/tlb.c | 13 ++++++++++++-
> 1 file changed, 12 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 304de7d..f64e80c 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -225,8 +225,19 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> * Avoid user/user BTB poisoning by flushing the branch predictor
> * when switching between processes. This stops one process from
> * doing Spectre-v2 attacks on another.
> + *
> + * As an optimization: Flush indirect branches only when
> + * switching into processes that disable dumping.
> + *
> + * This will not flush when switching into kernel threads.
> + * But it would flush when switching into idle and back
> + *
> + * It might be useful to have a one-off cache here
> + * to also not flush the idle case, but we would need some
> + * kind of stable sequence number to remember the previous mm.
Punctuation and grammar is pretty inconsistent, please change it to something more
readable, like:
* As an optimization flush indirect branches only when
* switching into processes that disable dumping.
*
* This will not flush branches when switching into kernel
* threads, but it would flush them when switching to the
* idle thread and back.
*
* It might be useful to have a one-off cache here
* to also not flush the idle case, but we would need some
* kind of stable sequence number to remember the previous mm.
Thanks,
Ingo
On 01/21/2018, 10:49 AM, David Woodhouse wrote:
> Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and ARCH_CAPABILITIES.
>
> See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf
>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> arch/x86/include/asm/msr-index.h | 11 +++++++++++
> 1 file changed, 11 insertions(+)
>
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index fa11fb1..3e50463 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -42,6 +42,13 @@
> #define MSR_PPIN_CTL 0x0000004e
> #define MSR_PPIN 0x0000004f
>
> +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
> +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
> +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
> +
> +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
> +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
> +
> #define MSR_IA32_PERFCTR0 0x000000c1
> #define MSR_IA32_PERFCTR1 0x000000c2
> #define MSR_FSB_FREQ 0x000000cd
> @@ -60,6 +67,10 @@
> #define MSR_IA32_BBL_CR_CTL 0x00000119
> #define MSR_IA32_BBL_CR_CTL3 0x0000011e
>
> +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
> +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
> +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
Is there any reason why all 3 are not properly sorted?
0x04e > 0x048
0x119 > 0x10a
> #define MSR_IA32_SYSENTER_CS 0x00000174
> #define MSR_IA32_SYSENTER_ESP 0x00000175
> #define MSR_IA32_SYSENTER_EIP 0x00000176
>
thanks,
--
js
suse labs
> On 01/21/2018, 10:49 AM, David Woodhouse wrote:
>> Add MSR and bit definitions for SPEC_CTRL, PRED_CMD and
>> ARCH_CAPABILITIES.
>>
>> See Intel's 336996-Speculative-Execution-Side-Channel-Mitigations.pdf
>>
>> Signed-off-by: David Woodhouse <[email protected]>
>> ---
>> arch/x86/include/asm/msr-index.h | 11 +++++++++++
>> 1 file changed, 11 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/msr-index.h
>> b/arch/x86/include/asm/msr-index.h
>> index fa11fb1..3e50463 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -42,6 +42,13 @@
>> #define MSR_PPIN_CTL 0x0000004e
>> #define MSR_PPIN 0x0000004f
>>
>> +#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
>> +#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted
>> Speculation */
>> +#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch
>> Predictors */
>> +
>> +#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
>> +#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction
>> Barrier */
>> +
>> #define MSR_IA32_PERFCTR0 0x000000c1
>> #define MSR_IA32_PERFCTR1 0x000000c2
>> #define MSR_FSB_FREQ 0x000000cd
>> @@ -60,6 +67,10 @@
>> #define MSR_IA32_BBL_CR_CTL 0x00000119
>> #define MSR_IA32_BBL_CR_CTL3 0x0000011e
>>
>> +#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
>> +#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
>> +#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
>
> Is there any reason why all 3 are not properly sorted?
> 0x04e > 0x048
> 0x119 > 0x10a
Er... no good reason. I was definitely *trying* to put them in the right
place. I plead incompetence; will fix...
--
dwmw2
On Sun, Jan 21, 2018 at 09:49:05AM +0000, David Woodhouse wrote:
> When they advertise the IA32_ARCH_CAPABILITIES MSR and it has the RDCL_NO
> bit set, they don't need KPTI either.
>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> arch/x86/kernel/cpu/common.c | 9 +++++++--
> 1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index e5d66e9..80572ae 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -900,8 +900,13 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
>
> setup_force_cpu_cap(X86_FEATURE_ALWAYS);
>
> - if (c->x86_vendor != X86_VENDOR_AMD)
> - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
> + if (c->x86_vendor != X86_VENDOR_AMD) {
> + unsigned long ia32_cap = 0;
WARNING: Missing a blank line after declarations
#36: FILE: arch/x86/kernel/cpu/common.c:905:
+ unsigned long ia32_cap = 0;
+ if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
Yap, that thing seldom is right but this time it makes some sense.
Also,
unsigned long long ia32_cap = 0;
for 32-bit.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On 1/21/2018 3:49 AM, David Woodhouse wrote:
> AMD doesn't implement the Speculation Control MSR that Intel does, but
> the Prediction Control MSR does exist and is advertised by a separate
> CPUID bit. Add support for that.
>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> arch/x86/include/asm/cpufeatures.h | 1 +
> arch/x86/kernel/cpu/scattered.c | 1 +
> 2 files changed, 2 insertions(+)
>
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 2efb8d4..8c9e5c0 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -207,6 +207,7 @@
> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
>
> +#define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
>
> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
> index df11f5d..4eb90b2 100644
> --- a/arch/x86/kernel/cpu/scattered.c
> +++ b/arch/x86/kernel/cpu/scattered.c
> @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
> { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
> { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
> { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
> + { X86_FEATURE_AMD_PRED_CMD, CPUID_EBX, 12, 0x80000008, 0 },
I replied to the previous version, but I'll add it here, too.
This should be moved to the existing 0x80000008/EBX entry rather than have
it in scattered.
Also, there will be a total of three bits:
IBPB: 0x80000008 EBX[12]
IBRS: 0x80000008 EBX[14]
STIBP: 0x80000008 EBX[15]
Since IBRS and STIBP share the same MSR, if a processor only supports
STIBP (MSR bit 1), for ease of software implementation the processor
does not GP fault attempts to write bit 0. In a similar manner, if a
processor only suppors IBRS (MSR bit 0), the processor does not GP
fault attempts to write bit 1.
Thanks,
Tom
> { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
> { 0, 0, 0, 0, 0 }
> };
>
On 21/01/18 17:50, Tom Lendacky wrote:
> On 1/21/2018 3:49 AM, David Woodhouse wrote:
>> AMD doesn't implement the Speculation Control MSR that Intel does, but
>> the Prediction Control MSR does exist and is advertised by a separate
>> CPUID bit. Add support for that.
>>
>> Signed-off-by: David Woodhouse <[email protected]>
>> ---
>> arch/x86/include/asm/cpufeatures.h | 1 +
>> arch/x86/kernel/cpu/scattered.c | 1 +
>> 2 files changed, 2 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
>> index 2efb8d4..8c9e5c0 100644
>> --- a/arch/x86/include/asm/cpufeatures.h
>> +++ b/arch/x86/include/asm/cpufeatures.h
>> @@ -207,6 +207,7 @@
>> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
>> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
>>
>> +#define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
>> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
>> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
>>
>> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
>> index df11f5d..4eb90b2 100644
>> --- a/arch/x86/kernel/cpu/scattered.c
>> +++ b/arch/x86/kernel/cpu/scattered.c
>> @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
>> { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
>> { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
>> { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
>> + { X86_FEATURE_AMD_PRED_CMD, CPUID_EBX, 12, 0x80000008, 0 },
> I replied to the previous version, but I'll add it here, too.
>
> This should be moved to the existing 0x80000008/EBX entry rather than have
> it in scattered.
>
> Also, there will be a total of three bits:
> IBPB: 0x80000008 EBX[12]
> IBRS: 0x80000008 EBX[14]
> STIBP: 0x80000008 EBX[15]
>
> Since IBRS and STIBP share the same MSR, if a processor only supports
> STIBP (MSR bit 1), for ease of software implementation the processor
> does not GP fault attempts to write bit 0. In a similar manner, if a
> processor only suppors IBRS (MSR bit 0), the processor does not GP
> fault attempts to write bit 1.
Are you able to comment on the read behaviour after a write which is
ignored?
If the behaviour is "read as written" then virt cases are fine. If the
"ignore" causes a zero to be read back, then we're still going to need
to intercept and emulate all VM accesses.
Thanks,
~Andrew
On Sun, Jan 21, 2018 at 09:49:06AM +0000, David Woodhouse wrote:
> From: Thomas Gleixner <[email protected]>
>
> Expose indirect_branch_prediction_barrier() for use in subsequent patches.
>
> [karahmed: remove the special-casing of skylake for using IBPB (wtf?),
> switch to using ALTERNATIVES instead of static_cpu_has]
> [dwmw2: set up ax/cx/dx in the asm too so it gets NOP'd out]
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Signed-off-by: KarimAllah Ahmed <[email protected]>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> arch/x86/include/asm/cpufeatures.h | 1 +
> arch/x86/include/asm/nospec-branch.h | 16 ++++++++++++++++
> arch/x86/kernel/cpu/bugs.c | 7 +++++++
> 3 files changed, 24 insertions(+)
>
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 8c9e5c0..cf28399 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -207,6 +207,7 @@
> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
>
> +#define X86_FEATURE_IBPB ( 7*32+16) /* Using Indirect Branch Prediction Barrier */
Right, and as AMD has a separate bit for this in CPUID_80000008_EBX[12],
we probably don't really need the synthetic bit here but simply use the
one at (13*32+12) - word 13.
> #define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 4ad4108..c333c95 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -218,5 +218,21 @@ static inline void vmexit_fill_RSB(void)
> #endif
> }
>
> +static inline void indirect_branch_prediction_barrier(void)
I like ibp_barrier() better.
> +{
> + unsigned long ax, cx, dx;
> +
> + asm volatile(ALTERNATIVE("",
> + "movl %[msr], %%ecx\n\t"
> + "movl %[val], %%eax\n\t"
> + "movl $0, %%edx\n\t"
> + "wrmsr",
> + X86_FEATURE_IBPB)
> + : "=a" (ax), "=c" (cx), "=d" (dx)
> + : [msr] "i" (MSR_IA32_PRED_CMD),
> + [val] "i" (PRED_CMD_IBPB)
> + : "memory");
> +}
Btw, we can simplify this a bit by dropping the inputs and marking the 3
GPRs as clobbered:
alternative_input("",
"mov $0x49, %%ecx\n\t"
"mov $1, %%eax\n\t"
"xor %%edx, %%edx\n\t"
"wrmsr\n\t",
X86_FEATURE_IBPB,
ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
The "memory" clobber is probably not really needed but it wouldn't
hurt...
Also, above says:
> switch to using ALTERNATIVES instead of static_cpu_has]
Why?
if (static_cpu_has(X86_FEATURE_IBPB))
wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
It can't get any more readable than this. Why even f*ck with
alternatives?
> +
> #endif /* __ASSEMBLY__ */
> #endif /* __NOSPEC_BRANCH_H__ */
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index 390b3dc..96548ff 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -249,6 +249,13 @@ static void __init spectre_v2_select_mitigation(void)
> setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> pr_info("Filling RSB on context switch\n");
> }
> +
> + /* Initialize Indirect Branch Prediction Barrier if supported */
> + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
> + setup_force_cpu_cap(X86_FEATURE_IBPB);
> + pr_info("Enabling Indirect Branch Prediction Barrier\n");
We don't really need the pr_info as "ibpb" will appear in /proc/cpuinfo.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On 1/21/2018 3:49 AM, David Woodhouse wrote:
> From: Ashok Raj <[email protected]>
>
> Add MSR passthrough for MSR_IA32_PRED_CMD and place branch predictor
> barriers on switching between VMs to avoid inter VM specte-v2 attacks.
>
> [peterz: rebase and changelog rewrite]
> [karahmed: - vmx: expose PRED_CMD whenever it is available
> - svm: only pass through IBPB if it is available]
> [dwmw2: - vmx: allow X86_FEATURE_AMD_PRED_CMD too]
> Cc: Asit Mallick <[email protected]>
> Cc: Dave Hansen <[email protected]>
> Cc: Arjan Van De Ven <[email protected]>
> Cc: Tim Chen <[email protected]>
> Cc: Linus Torvalds <[email protected]>
> Cc: Andrea Arcangeli <[email protected]>
> Cc: Andi Kleen <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Dan Williams <[email protected]>
> Cc: Jun Nakajima <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Greg KH <[email protected]>
> Cc: Paolo Bonzini <[email protected]>
> Signed-off-by: Ashok Raj <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Link: http://lkml.kernel.org/r/[email protected]
>
> Signed-off-by: David Woodhouse <[email protected]>
> Signed-off-by: KarimAllah Ahmed <[email protected]>
> ---
> arch/x86/kvm/svm.c | 14 ++++++++++++++
> arch/x86/kvm/vmx.c | 11 +++++++++++
> 2 files changed, 25 insertions(+)
>
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 2744b973..cfdb9ab 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -529,6 +529,7 @@ struct svm_cpu_data {
> struct kvm_ldttss_desc *tss_desc;
>
> struct page *save_area;
> + struct vmcb *current_vmcb;
> };
>
> static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
> @@ -918,6 +919,9 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
>
> set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
> }
> +
> + if (boot_cpu_has(X86_FEATURE_AMD_PRED_CMD))
> + set_msr_interception(msrpm, MSR_IA32_PRED_CMD, 1, 1);
I don't think there's any reason to make the "if" check. You can just
add this to the direct_access_msrs array instead, as:
{ .index = MSR_IA32_PRED_CMD, .always = true },
Thanks,
Tom
> }
>
> static void add_msr_offset(u32 offset)
> @@ -1706,11 +1710,17 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
> __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
> kvm_vcpu_uninit(vcpu);
> kmem_cache_free(kvm_vcpu_cache, svm);
> + /*
> + * The vmcb page can be recycled, causing a false negative in
> + * svm_vcpu_load(). So do a full IBPB now.
> + */
> + indirect_branch_prediction_barrier();
> }
>
> static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> + struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
> int i;
>
> if (unlikely(cpu != vcpu->cpu)) {
> @@ -1739,6 +1749,10 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> if (static_cpu_has(X86_FEATURE_RDTSCP))
> wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
>
> + if (sd->current_vmcb != svm->vmcb) {
> + sd->current_vmcb = svm->vmcb;
> + indirect_branch_prediction_barrier();
> + }
> avic_vcpu_load(vcpu, cpu);
> }
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index d1e25db..1e45bb3 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2279,6 +2279,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) {
> per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs;
> vmcs_load(vmx->loaded_vmcs->vmcs);
> + indirect_branch_prediction_barrier();
> }
>
> if (!already_loaded) {
> @@ -6791,6 +6792,16 @@ static __init int hardware_setup(void)
> kvm_tsc_scaling_ratio_frac_bits = 48;
> }
>
> + /*
> + * The AMD_PRED_CMD bit might be exposed by hypervisors on Intel
> + * chips which only want to expose PRED_CMD to guests and not
> + * SPEC_CTRL. Because PRED_CMD is one-shot write-only, while
> + * PRED_CMD requires storage, live migration support, etc.
> + */
> + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
> + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD))
> + vmx_disable_intercept_for_msr(MSR_IA32_PRED_CMD, false);
> +
> vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
> vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
> vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
>
On 01/21/2018 07:06 PM, Borislav Petkov wrote:
> On Sun, Jan 21, 2018 at 09:49:06AM +0000, David Woodhouse wrote:
>> From: Thomas Gleixner <[email protected]>
>>
>> Expose indirect_branch_prediction_barrier() for use in subsequent patches.
>>
>> [karahmed: remove the special-casing of skylake for using IBPB (wtf?),
>> switch to using ALTERNATIVES instead of static_cpu_has]
>> [dwmw2: set up ax/cx/dx in the asm too so it gets NOP'd out]
>>
>> Signed-off-by: Thomas Gleixner <[email protected]>
>> Signed-off-by: KarimAllah Ahmed <[email protected]>
>> Signed-off-by: David Woodhouse <[email protected]>
>> ---
>> arch/x86/include/asm/cpufeatures.h | 1 +
>> arch/x86/include/asm/nospec-branch.h | 16 ++++++++++++++++
>> arch/x86/kernel/cpu/bugs.c | 7 +++++++
>> 3 files changed, 24 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
>> index 8c9e5c0..cf28399 100644
>> --- a/arch/x86/include/asm/cpufeatures.h
>> +++ b/arch/x86/include/asm/cpufeatures.h
>> @@ -207,6 +207,7 @@
>> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
>> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
>>
>> +#define X86_FEATURE_IBPB ( 7*32+16) /* Using Indirect Branch Prediction Barrier */
> Right, and as AMD has a separate bit for this in CPUID_80000008_EBX[12],
> we probably don't really need the synthetic bit here but simply use the
> one at (13*32+12) - word 13.
>
>> #define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
>> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
>> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
>> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
>> index 4ad4108..c333c95 100644
>> --- a/arch/x86/include/asm/nospec-branch.h
>> +++ b/arch/x86/include/asm/nospec-branch.h
>> @@ -218,5 +218,21 @@ static inline void vmexit_fill_RSB(void)
>> #endif
>> }
>>
>> +static inline void indirect_branch_prediction_barrier(void)
> I like ibp_barrier() better.
>
>> +{
>> + unsigned long ax, cx, dx;
>> +
>> + asm volatile(ALTERNATIVE("",
>> + "movl %[msr], %%ecx\n\t"
>> + "movl %[val], %%eax\n\t"
>> + "movl $0, %%edx\n\t"
>> + "wrmsr",
>> + X86_FEATURE_IBPB)
>> + : "=a" (ax), "=c" (cx), "=d" (dx)
>> + : [msr] "i" (MSR_IA32_PRED_CMD),
>> + [val] "i" (PRED_CMD_IBPB)
>> + : "memory");
>> +}
> Btw, we can simplify this a bit by dropping the inputs and marking the 3
> GPRs as clobbered:
>
> alternative_input("",
> "mov $0x49, %%ecx\n\t"
> "mov $1, %%eax\n\t"
> "xor %%edx, %%edx\n\t"
> "wrmsr\n\t",
> X86_FEATURE_IBPB,
> ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
>
>
> The "memory" clobber is probably not really needed but it wouldn't
> hurt...
>
> Also, above says:
>
>> switch to using ALTERNATIVES instead of static_cpu_has]
> Why?
>
> if (static_cpu_has(X86_FEATURE_IBPB))
> wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
>
> It can't get any more readable than this. Why even f*ck with
> alternatives?
Because static_cpu_has is an indirect branch which will cause
speculation and
we have to avoid that.
David told me that Peter was working on a fix for static_cpu_has to
avoid the
speculation but I do not know what is the status of this.
>
>> +
>> #endif /* __ASSEMBLY__ */
>> #endif /* __NOSPEC_BRANCH_H__ */
>> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
>> index 390b3dc..96548ff 100644
>> --- a/arch/x86/kernel/cpu/bugs.c
>> +++ b/arch/x86/kernel/cpu/bugs.c
>> @@ -249,6 +249,13 @@ static void __init spectre_v2_select_mitigation(void)
>> setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
>> pr_info("Filling RSB on context switch\n");
>> }
>> +
>> + /* Initialize Indirect Branch Prediction Barrier if supported */
>> + if (boot_cpu_has(X86_FEATURE_SPEC_CTRL) ||
>> + boot_cpu_has(X86_FEATURE_AMD_PRED_CMD)) {
>> + setup_force_cpu_cap(X86_FEATURE_IBPB);
>> + pr_info("Enabling Indirect Branch Prediction Barrier\n");
> We don't really need the pr_info as "ibpb" will appear in /proc/cpuinfo.
>
Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B
On Sun, 2018-01-21 at 19:06 +0100, Borislav Petkov wrote:
>
> > switch to using ALTERNATIVES instead of static_cpu_has]
>
> Why?
>
> if (static_cpu_has(X86_FEATURE_IBPB))
> wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
>
> It can't get any more readable than this. Why even f*ck with
> alternatives?
Because we're backporting this to every stable kernel under the sun,
and they don't already require asm-goto. Peter does have a dozen or so
objtool patches to protect us against the missed GCC optimisation which
would make it vulnerable via a conditional branch, but we'll do that
*after* the basic backportable implementation using ALTERNATIVE goes
in.
On Sun, Jan 21, 2018 at 07:29:43PM +0100, KarimAllah Ahmed wrote:
> Because static_cpu_has is an indirect branch which will cause speculation
> and
> we have to avoid that.
How so?
The JMP_NOSPEC macro protects against JMP <reg> jumps but the
static_cpu_has() macros all add JMPs with an immediate offset from the
next instruction and I wouldn't call them indirect JMPs as there are no
registers to speculate on there.
IOW, before alternatives, the patch site of static_cpu_has() looks like this:
# 151 "./arch/x86/include/asm/cpufeature.h" 1
1: jmp 6f
and that 6f label is:
6:
testb $1,boot_cpu_data+50(%rip) #, MEM[(const char *)&boot_cpu_data + 50B]
jnz .L707 #
jmp .L706 #
i.e., we basically do if (boot_cpu_has(..)).
If the feature is not present, same patch site turns into:
4: jmp .L706 #
5:
after patching. Which is a label after the whole thing. That is not an
indrect jump through a register either.
If the feature is present, the patch site becomes:
NOP - added by the patching
# ./arch/x86/include/asm/msr.h:105: asm volatile("1: wrmsr\n"
.loc 18 105 0
movl $73, %ecx #, tmp138
movl $1, %eax #, tmp139
xorl %edx, %edx # tmp140
#APP
# 105 "./arch/x86/include/asm/msr.h" 1
1: wrmsr
2:
so execution runs directly into the MSR write and the JMP is gone.
So I don't see indirect branches anywhere...
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On Sun, Jan 21, 2018 at 06:54:22PM +0000, David Woodhouse wrote:
> Because we're backporting this to every stable kernel under the sun,
> and they don't already require asm-goto.
Considering the cost of the barrier, I'd simplify the whole endeavor
*considerably* for backporting:
if (boot_cpu_has(X86_FEATURE_IBPB))
wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
Problem solved.
Upstream can then do static_cpu_has() if it wants to.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On Sun, 2018-01-21 at 20:01 +0100, Borislav Petkov wrote:
>
> so execution runs directly into the MSR write and the JMP is gone.
>
> So I don't see indirect branches anywhere...
Wait until the wind changes.
Congratulations, you've just turned a potential GCC missed optimisation
into a kernel bug. We don't *care* that it's unlikely that GCC will
miss that optimisation. The point is that it doesn't *have* to do it,
and we don't *check*.
cf. https://lkml.org/lkml/2018/1/12/176
... after which Peter went off and implemented that check, which is all
fine and dandy but let's not rely on backporting that too.
On Sun, 2018-01-21 at 20:04 +0100, Borislav Petkov wrote:
> On Sun, Jan 21, 2018 at 06:54:22PM +0000, David Woodhouse wrote:
> > Because we're backporting this to every stable kernel under the
> sun,
> > and they don't already require asm-goto.
>
> Considering the cost of the barrier, I'd simplify the whole endeavor
> *considerably* for backporting:
>
> if (boot_cpu_has(X86_FEATURE_IBPB))
> wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
>
> Problem solved.
Nope. Plenty of patch sets *did* have the bug above though, until it
was spotted.
On 21/01/18 19:31, David Woodhouse wrote:
> On Sun, 2018-01-21 at 20:01 +0100, Borislav Petkov wrote:
>> so execution runs directly into the MSR write and the JMP is gone.
>>
>> So I don't see indirect branches anywhere...
> Wait until the wind changes.
>
> Congratulations, you've just turned a potential GCC missed optimisation
> into a kernel bug. We don't *care* that it's unlikely that GCC will
> miss that optimisation. The point is that it doesn't *have* to do it,
> and we don't *check*.
>
> cf. https://lkml.org/lkml/2018/1/12/176
>
>
> ... after which Peter went off and implemented that check, which is all
> fine and dandy but let's not rely on backporting that too.
It doesn't matter if an attacker can use SP1 to try and skip the IBPB.
Exits to userspace/guest are serialising (with some retroactive updates
to the architecture spec coming), so an attacker can't cause victim code
to be executed before speculation has caught up and noticed that the
IBPB did need to happen.
~Andrew
On Sun, Jan 21, 2018 at 07:31:00PM +0000, David Woodhouse wrote:
> Congratulations, you've just turned a potential GCC missed optimisation
> into a kernel bug. We don't *care* that it's unlikely that GCC will
> miss that optimisation. The point is that it doesn't *have* to do it,
> and we don't *check*.
Err, I guess I'm missing something:
gcc doesn't add any code here - we do. We turn the JMP into a NOP and
the three instructions of the wrmsr are from an asm volatile().
So please elaborate.
Thx.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On Sun, Jan 21, 2018 at 07:31:39PM +0000, David Woodhouse wrote:
> > if (boot_cpu_has(X86_FEATURE_IBPB))
> > wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
> >
> > Problem solved.
>
> Nope. Plenty of patch sets *did* have the bug above though, until it
> was spotted.
And that bug is...?
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On Sun, 2018-01-21 at 19:37 +0000, Andrew Cooper wrote:
>
> It doesn't matter if an attacker can use SP1 to try and skip the IBPB.
>
> Exits to userspace/guest are serialising (with some retroactive updates
> to the architecture spec coming), so an attacker can't cause victim code
> to be executed before speculation has caught up and noticed that the
> IBPB did need to happen.
For the specific case of IBPB, knowing what we do about non-
architectural behaviour, that's probably true.
In the early patch sets in both Xen and Linux, we did have a
conditional branch on {sys,hyper}call entry that blithely let the CPU
speculate all the way to the {sys,hyper}call table jump. No exit to
userspace/guest there.
Which is why I've been saying I want call sites to have an *explicit*
comment saying why they're safe to use conditional branches without
taking extra steps to be safe, like the 'else lfence'. And why I'd
really like the underlying primitives to *support* being fixed at
runtime.
ALTERNATIVE is fine for now, and can end up with basically the same
code as static_cpu_has() — either we do the wrmsr, or we jump/nop over
where it used to be. Let's worry about getting clever with it *later*.
On Sun, 2018-01-21 at 20:54 +0100, Borislav Petkov wrote:
> On Sun, Jan 21, 2018 at 07:31:39PM +0000, David Woodhouse wrote:
> > > if (boot_cpu_has(X86_FEATURE_IBPB))
> > > wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
> > >
> > > Problem solved.
> >
> > Nope. Plenty of patch sets *did* have the bug above though, until it
> > was spotted.
>
> And that bug is...?
That bug is the *reason* we're arguing about static_cpu_has vs.
ALTERNATIVE.
A conditional branch that the CPU sees can be speculated over...
Now, Andrew is right that in a number of cases there will be another
serialising instruction before we ever hit a problematic indirect
branch. But as I just said elsewhere, I'd really like the *primitives*
to support unconditional operation.
On Sun, Jan 21, 2018 at 08:07:06PM +0000, David Woodhouse wrote:
> That bug is the *reason* we're arguing about static_cpu_has vs.
> ALTERNATIVE.
>
> A conditional branch that the CPU sees can be speculated over...
Lemme see if I understand it correctly: you don't want to have any such
constructs:
testb $1,boot_cpu_data+50(%rip) #, MEM[(const char *)&boot_cpu_data + 50B]
jnz .L707 #
which could cause any speculation.
Instead, you want to have unconditional code which is slapped in by
alternatives.
Oh, and you don't want to have that at privilege crossing sites, like
VMEXIT or so.
Am I close?
> Now, Andrew is right that in a number of cases there will be another
> serialising instruction before we ever hit a problematic indirect
> branch. But as I just said elsewhere, I'd really like the *primitives*
> to support unconditional operation.
In this particular case, WRMSR is already serializing too. So it is hard
to exploit stuff here :-)
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
On 21/01/2018 20:04, David Woodhouse wrote:
> On Sun, 2018-01-21 at 19:37 +0000, Andrew Cooper wrote:
>> It doesn't matter if an attacker can use SP1 to try and skip the IBPB.
>>
>> Exits to userspace/guest are serialising (with some retroactive updates
>> to the architecture spec coming), so an attacker can't cause victim code
>> to be executed before speculation has caught up and noticed that the
>> IBPB did need to happen.
> For the specific case of IBPB, knowing what we do about non-
> architectural behaviour, that's probably true.
>
> In the early patch sets in both Xen and Linux, we did have a
> conditional branch on {sys,hyper}call entry that blithely let the CPU
> speculate all the way to the {sys,hyper}call table jump. No exit to
> userspace/guest there.
Right, but that is a different situation. That is an attacker trying to
attack the kernel/hypervisor directly using SP2, which is mitigated with
retpoline/lfence+jmp/IBRS (as appropriate).
This IBPB case is an attacker trying to attack a new piece of userspace
using SP2, and furthermore, trying to use SP1 to skip the IBPB.
It is an inherent property of all these issues that an attacker can't
cause the misdirected basic blocks to be retired, which means they can't
change the actual behaviour of execution in supervisor context.
As the exit to user/guest context is serialising, the only thing the
attacker can usefully do is tickle a speculatively-leaky block.
> Which is why I've been saying I want call sites to have an *explicit*
> comment saying why they're safe to use conditional branches without
> taking extra steps to be safe, like the 'else lfence'. And why I'd
> really like the underlying primitives to *support* being fixed at
> runtime.
I'm afraid that, by this logic, every conditional branch needs a
comment, and that is impractical. I don't see what is special about
this conditional branch vs every other conditional branch in the
codebase, and calling it out in isolation feels wrong.
~Andrew
On Sun, 2018-01-21 at 20:19 +0000, Andrew Cooper wrote:
> On 21/01/2018 20:04, David Woodhouse wrote:
> > For the specific case of IBPB, knowing what we do about non-
> > architectural behaviour, that's probably true.
>
> This IBPB case is an attacker trying to attack a new piece of userspace
> using SP2, and furthermore, trying to use SP1 to skip the IBPB.
> ...
> As the exit to user/guest context is serialising, the only thing the
> attacker can usefully do is tickle a speculatively-leaky block.
Right, I think we're saying the same things above. It's probably OK for
IBPB given that we know that vmlaunch is *really* serialising despite
not being architecturally so.
And sure, all of these attacks are *highly* improbable. The one on the
way into the syscall was the really easy one.
> > Which is why I've been saying I want call sites to have an *explicit*
> > comment saying why they're safe to use conditional branches without
> > taking extra steps to be safe, like the 'else lfence'. And why I'd
> > really like the underlying primitives to *support* being fixed at
> > runtime.
>
> I'm afraid that, by this logic, every conditional branch needs a
> comment, and that is impractical. I don't see what is special about
> this conditional branch vs every other conditional branch in the
> codebase, and calling it out in isolation feels wrong.
The code paths these are going into are general fairly linear, and
they're inserted at the point where they can't be bypassed by any
condition *except* the corresponding boot_cpu_has(IBxx). Are there
other conditional branches that would take us right across the wrmsr
and into vulnerable code?
Maybe I am being overly paranoid and it really was just that *one* IBRS
write in the syscall path that was vulnerable, and all the rest would
be fine. I'd still rather start simple and use alternatives for now,
and then get clever later. It's not like there's a real *problem* with
using alternatives.
On 1/21/2018 12:01 PM, Andrew Cooper wrote:
> On 21/01/18 17:50, Tom Lendacky wrote:
>> On 1/21/2018 3:49 AM, David Woodhouse wrote:
>>> AMD doesn't implement the Speculation Control MSR that Intel does, but
>>> the Prediction Control MSR does exist and is advertised by a separate
>>> CPUID bit. Add support for that.
>>>
>>> Signed-off-by: David Woodhouse <[email protected]>
>>> ---
>>> arch/x86/include/asm/cpufeatures.h | 1 +
>>> arch/x86/kernel/cpu/scattered.c | 1 +
>>> 2 files changed, 2 insertions(+)
>>>
>>> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
>>> index 2efb8d4..8c9e5c0 100644
>>> --- a/arch/x86/include/asm/cpufeatures.h
>>> +++ b/arch/x86/include/asm/cpufeatures.h
>>> @@ -207,6 +207,7 @@
>>> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
>>> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
>>>
>>> +#define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
>>> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
>>> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
>>>
>>> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
>>> index df11f5d..4eb90b2 100644
>>> --- a/arch/x86/kernel/cpu/scattered.c
>>> +++ b/arch/x86/kernel/cpu/scattered.c
>>> @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
>>> { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
>>> { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
>>> { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
>>> + { X86_FEATURE_AMD_PRED_CMD, CPUID_EBX, 12, 0x80000008, 0 },
>> I replied to the previous version, but I'll add it here, too.
>>
>> This should be moved to the existing 0x80000008/EBX entry rather than have
>> it in scattered.
>>
>> Also, there will be a total of three bits:
>> IBPB: 0x80000008 EBX[12]
>> IBRS: 0x80000008 EBX[14]
>> STIBP: 0x80000008 EBX[15]
>>
>> Since IBRS and STIBP share the same MSR, if a processor only supports
>> STIBP (MSR bit 1), for ease of software implementation the processor
>> does not GP fault attempts to write bit 0. In a similar manner, if a
>> processor only suppors IBRS (MSR bit 0), the processor does not GP
>> fault attempts to write bit 1.
>
> Are you able to comment on the read behaviour after a write which is
> ignored?
>
> If the behaviour is "read as written" then virt cases are fine. If the
> "ignore" causes a zero to be read back, then we're still going to need
> to intercept and emulate all VM accesses.
The behavior is "read as written", so the bit will be updated even though
the support for the bit is not present.
Thanks,
Tom
>
> Thanks,
>
> ~Andrew
>
On 22/01/18 14:31, Tom Lendacky wrote:
> On 1/21/2018 12:01 PM, Andrew Cooper wrote:
>> On 21/01/18 17:50, Tom Lendacky wrote:
>>> On 1/21/2018 3:49 AM, David Woodhouse wrote:
>>>> AMD doesn't implement the Speculation Control MSR that Intel does, but
>>>> the Prediction Control MSR does exist and is advertised by a separate
>>>> CPUID bit. Add support for that.
>>>>
>>>> Signed-off-by: David Woodhouse <[email protected]>
>>>> ---
>>>> arch/x86/include/asm/cpufeatures.h | 1 +
>>>> arch/x86/kernel/cpu/scattered.c | 1 +
>>>> 2 files changed, 2 insertions(+)
>>>>
>>>> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
>>>> index 2efb8d4..8c9e5c0 100644
>>>> --- a/arch/x86/include/asm/cpufeatures.h
>>>> +++ b/arch/x86/include/asm/cpufeatures.h
>>>> @@ -207,6 +207,7 @@
>>>> #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
>>>> #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
>>>>
>>>> +#define X86_FEATURE_AMD_PRED_CMD ( 7*32+17) /* Prediction Command MSR (AMD) */
>>>> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
>>>> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
>>>>
>>>> diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
>>>> index df11f5d..4eb90b2 100644
>>>> --- a/arch/x86/kernel/cpu/scattered.c
>>>> +++ b/arch/x86/kernel/cpu/scattered.c
>>>> @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
>>>> { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 },
>>>> { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 },
>>>> { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
>>>> + { X86_FEATURE_AMD_PRED_CMD, CPUID_EBX, 12, 0x80000008, 0 },
>>> I replied to the previous version, but I'll add it here, too.
>>>
>>> This should be moved to the existing 0x80000008/EBX entry rather than have
>>> it in scattered.
>>>
>>> Also, there will be a total of three bits:
>>> IBPB: 0x80000008 EBX[12]
>>> IBRS: 0x80000008 EBX[14]
>>> STIBP: 0x80000008 EBX[15]
>>>
>>> Since IBRS and STIBP share the same MSR, if a processor only supports
>>> STIBP (MSR bit 1), for ease of software implementation the processor
>>> does not GP fault attempts to write bit 0. In a similar manner, if a
>>> processor only suppors IBRS (MSR bit 0), the processor does not GP
>>> fault attempts to write bit 1.
>> Are you able to comment on the read behaviour after a write which is
>> ignored?
>>
>> If the behaviour is "read as written" then virt cases are fine. If the
>> "ignore" causes a zero to be read back, then we're still going to need
>> to intercept and emulate all VM accesses.
> The behavior is "read as written", so the bit will be updated even though
> the support for the bit is not present.
Fantastic! Thanks for confirming.
~Andrew