A revised version of the dropped IBRS-firmware patch which now just
ignores the horrid hpwdt driver completely, and explicitly disables
preemption while IBRS is set.
Revised version of the IBRS_ALL patch with a typo fixed, revert another
broken bikeshedding patch, and add support for retpoline builds with
clang now that clang is fixed.
David Woodhouse (4):
x86/speculation: Use IBRS if available before calling into firmware
x86/speculation: Support "Enhanced IBRS" on future CPUs
Revert "x86/retpoline: Simplify vmexit_fill_RSB()"
x86/retpoline: Support retpoline build with Clang
arch/x86/Makefile | 5 +-
arch/x86/entry/entry_32.S | 3 +-
arch/x86/entry/entry_64.S | 3 +-
arch/x86/include/asm/apm.h | 6 ++
arch/x86/include/asm/asm-prototypes.h | 3 -
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/efi.h | 17 ++++-
arch/x86/include/asm/nospec-branch.h | 118 +++++++++++++++++++++++++++++-----
arch/x86/kernel/cpu/bugs.c | 26 +++++++-
arch/x86/kvm/vmx.c | 31 +++++----
arch/x86/lib/Makefile | 1 -
arch/x86/lib/retpoline.S | 56 ----------------
include/linux/compiler-clang.h | 5 ++
include/linux/compiler-gcc.h | 4 ++
include/linux/init.h | 8 +--
15 files changed, 185 insertions(+), 102 deletions(-)
--
2.7.4
Retpoline means the kernel is safe because it has no indirect branches.
But firmware isn't, so use IBRS for firmware calls if it's available.
Block preemption while IBRS is set, although in practice the call sites
already had to be doing that.
Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware
code, from an NMI handler. I don't want to touch that with a bargepole.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/apm.h | 6 ++++++
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/efi.h | 17 ++++++++++++++--
arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++---------
arch/x86/kernel/cpu/bugs.c | 12 ++++++++++-
5 files changed, 63 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015d..c356098 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
+#include <asm/nospec-branch.h>
+
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
}
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 73b5fff..66c1434 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb..a399c1e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/tlb.h>
+#include <asm/nospec-branch.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
extern asmlinkage unsigned long efi_call_phys(void *, ...);
-#define arch_efi_call_virt_setup() kernel_fpu_begin()
-#define arch_efi_call_virt_teardown() kernel_fpu_end()
+#define arch_efi_call_virt_setup() \
+({ \
+ kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
+})
+
+#define arch_efi_call_virt_teardown() \
+({ \
+ firmware_restrict_branch_speculation_end(); \
+ kernel_fpu_end(); \
+})
+
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
\
if (efi_scratch.use_pgd) { \
efi_scratch.prev_cr3 = __read_cr3(); \
@@ -91,6 +103,7 @@ struct efi_scratch {
__flush_tlb_all(); \
} \
\
+ firmware_restrict_branch_speculation_end(); \
__kernel_fpu_end(); \
preempt_enable(); \
})
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 76b0585..0995c6a 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -163,17 +163,38 @@ static inline void vmexit_fill_RSB(void)
#endif
}
+#define alternative_msr_write(_msr, _val, _feature) \
+ asm volatile(ALTERNATIVE("", \
+ "movl %[msr], %%ecx\n\t" \
+ "movl %[val], %%eax\n\t" \
+ "movl $0, %%edx\n\t" \
+ "wrmsr", \
+ _feature) \
+ : : [msr] "i" (_msr), [val] "i" (_val) \
+ : "eax", "ecx", "edx", "memory")
+
static inline void indirect_branch_prediction_barrier(void)
{
- asm volatile(ALTERNATIVE("",
- "movl %[msr], %%ecx\n\t"
- "movl %[val], %%eax\n\t"
- "movl $0, %%edx\n\t"
- "wrmsr",
- X86_FEATURE_USE_IBPB)
- : : [msr] "i" (MSR_IA32_PRED_CMD),
- [val] "i" (PRED_CMD_IBPB)
- : "eax", "ecx", "edx", "memory");
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+ X86_FEATURE_USE_IBPB);
+}
+
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ */
+static inline void firmware_restrict_branch_speculation_start(void)
+{
+ preempt_disable();
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,
+ X86_FEATURE_USE_IBRS_FW);
+}
+
+static inline void firmware_restrict_branch_speculation_end(void)
+{
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,
+ X86_FEATURE_USE_IBRS_FW);
+ preempt_enable();
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b5..bfca937 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
+
+ /*
+ * Retpoline means the kernel is safe because it has no indirect
+ * branches. But firmware isn't, so use IBRS to protect that.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
}
#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
spectre_v2_module_string());
}
#endif
--
2.7.4
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/Makefile | 5 ++++-
include/linux/compiler-clang.h | 5 +++++
include/linux/compiler-gcc.h | 4 ++++
include/linux/init.h | 8 ++++----
4 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad5516..dbc7d0e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,7 +232,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
- RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+
+ RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
ifneq ($(RETPOLINE_CFLAGS),)
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
endif
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 3b609ed..be3aef6 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -19,3 +19,8 @@
#define randomized_struct_fields_start struct {
#define randomized_struct_fields_end };
+
+/* Clang doesn't have a way to turn it off per-function, yet. */
+#ifdef __noretpoline
+#undef __noretpoline
+#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 631354a..2e8a287 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -93,6 +93,10 @@
#define __weak __attribute__((weak))
#define __alias(symbol) __attribute__((alias(#symbol)))
+#ifdef RETPOLINE
+#define __noretpoline __attribute__((indirect_branch("keep")))
+#endif
+
/*
* it doesn't make sense on ARM (currently the only user of __naked)
* to trace naked functions because then mcount is called without
diff --git a/include/linux/init.h b/include/linux/init.h
index 506a981..bc27cf0 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -6,10 +6,10 @@
#include <linux/types.h>
/* Built-in __init functions needn't be compiled with retpoline */
-#if defined(RETPOLINE) && !defined(MODULE)
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#if defined(__noretpoline) && !defined(MODULE)
+#define __noinitretpoline __noretpoline
#else
-#define __noretpoline
+#define __noinitretpoline
#endif
/* These macros are used to mark some functions or
@@ -47,7 +47,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(.init.text) __cold __latent_entropy __noretpoline
+#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline
#define __initdata __section(.init.data)
#define __initconst __section(.init.rodata)
#define __exitdata __section(.exit.data)
--
2.7.4
The original IBRS hack in microcode is horribly slow. For the next
generation of CPUs, as a stopgap until we get a proper fix, Intel
promise an "Enhanced IBRS" which will be fast.
The assumption is that predictions in the BTB/RSB will be tagged with
the VMX mode and ring that they were learned in, and thus the CPU will
avoid consuming unsafe predictions without a performance penalty.
Intel's documentation says that it is still required to set the IBRS bit
in the SPEC_CTRL MSR and ensure that it remains set.
Cope with this by trapping and emulating *all* access to SPEC_CTRL from
KVM guests when the IBRS_ALL feature is present, so it can never be
turned off. Guests who see IBRS_ALL should never do anything except
turn it on at boot anyway. And if they didn't know about IBRS_ALL and
they keep frobbing IBRS on every kernel entry/exit... well the vmexit
for a no-op is probably going to be faster than they were expecting
anyway, so they'll live.
Signed-off-by: David Woodhouse <[email protected]>
Acked-by: Arjan van de Ven <[email protected]>
---
arch/x86/include/asm/nospec-branch.h | 9 ++++++++-
arch/x86/kernel/cpu/bugs.c | 16 ++++++++++++++--
arch/x86/kvm/vmx.c | 31 +++++++++++++++++++------------
3 files changed, 41 insertions(+), 15 deletions(-)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 0995c6a..34cbce3 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -141,9 +141,16 @@ enum spectre_v2_mitigation {
SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
- SPECTRE_V2_IBRS,
+ SPECTRE_V2_IBRS_ALL,
};
+extern enum spectre_v2_mitigation spectre_v2_enabled;
+
+static inline bool spectre_v2_ibrs_all(void)
+{
+ return spectre_v2_enabled == SPECTRE_V2_IBRS_ALL;
+}
+
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bfca937..eefd900 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -88,12 +88,13 @@ static const char *spectre_v2_strings[] = {
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+ [SPECTRE_V2_IBRS_ALL] = "Mitigation: Enhanced IBRS",
};
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -237,6 +238,16 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+ u64 ia32_cap = 0;
+
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (ia32_cap & ARCH_CAP_IBRS_ALL) {
+ mode = SPECTRE_V2_IBRS_ALL;
+ wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS);
+ goto ibrs_all;
+ }
+ }
if (IS_ENABLED(CONFIG_RETPOLINE))
goto retpoline_auto;
break;
@@ -274,6 +285,7 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}
+ ibrs_all:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -305,7 +317,7 @@ static void __init spectre_v2_select_mitigation(void)
* Retpoline means the kernel is safe because it has no indirect
* branches. But firmware isn't, so use IBRS to protect that.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ if (mode != SPECTRE_V2_IBRS_ALL && boot_cpu_has(X86_FEATURE_IBRS)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 91e3539..62ad7e3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3419,13 +3419,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->spec_ctrl = data;
- if (!data)
+ if (!data && !spectre_v2_ibrs_all())
break;
/*
* For non-nested:
* When it's written (to non-zero) for the first time, pass
- * it through.
+ * it through unless we have IBRS_ALL and it should just be
+ * set for ever.
*
* For nested:
* The handling of the MSR bitmap for L2 guests is done in
@@ -9441,7 +9442,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
+ if (!spectre_v2_ibrs_all() && vmx->spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched;
@@ -9563,11 +9564,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
);
/*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ * Without IBRS_ALL, we do not use IBRS in the kernel. If this
+ * vCPU has used the SPEC_CTRL MSR it may have left it on;
+ * save the value and turn it off. This is much more efficient
+ * than blindly adding it to the atomic save/restore list.
+ * Especially as the former (saving guest MSRs on vmexit)
+ * doesn't even exist in KVM.
*
* For non-nested case:
* If the L01 MSR bitmap does not intercept the MSR, then we need to
@@ -9576,12 +9578,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* For nested case:
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
+ *
+ * If IBRS_ALL is present then the whole thing is a no-op fiction
+ * for guests and every access is trapped, so do nothing.
*/
- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
- rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ if (!spectre_v2_ibrs_all()) {
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
- if (vmx->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ }
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
--
2.7.4
This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting
the RSB filling out of line and calling it, we waste one RSB slot for
returning from the function itself, which means one fewer actual function
call we can make if we're doing the Skylake abomination of call-depth
counting.
It also changed the number of RSB stuffings we do on vmexit from 32,
which was correct, to 16. Let's just stop with the bikeshedding; it
didn't actually *fix* anything anyway.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/entry/entry_32.S | 3 +-
arch/x86/entry/entry_64.S | 3 +-
arch/x86/include/asm/asm-prototypes.h | 3 --
arch/x86/include/asm/nospec-branch.h | 70 +++++++++++++++++++++++++++++++----
arch/x86/lib/Makefile | 1 -
arch/x86/lib/retpoline.S | 56 ----------------------------
6 files changed, 65 insertions(+), 71 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2a35b1e..60c4c34 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %ebx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1c5420420..1d83563 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %rbx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d11161..1908214 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 34cbce3..94749fb 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
#ifdef __ASSEMBLY__
/*
@@ -78,10 +122,17 @@
#endif
.endm
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "", "call __clear_rsb", \ftr
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
#endif
.endm
@@ -163,10 +214,15 @@ extern char __indirect_thunk_end[];
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- alternative_input("",
- "call __fill_rsb",
- X86_FEATURE_RETPOLINE,
- ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+ unsigned long loops;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 69a4739..f23934b 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -27,7 +27,6 @@ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3..c909961 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
GENERATE_THUNK(r14)
GENERATE_THUNK(r15)
#endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
- mov $(\nr / 2), %_ASM_BX
- .align 16
-771:
- call 772f
-773: /* speculation trap */
- pause
- lfence
- jmp 773b
- .align 16
-772:
- call 774f
-775: /* speculation trap */
- pause
- lfence
- jmp 775b
- .align 16
-774:
- dec %_ASM_BX
- jnz 771b
- add $((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
-
-ENTRY(__fill_rsb)
- STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
- ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
- STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
- ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
--
2.7.4
On 2/14/2018 7:44 AM, David Woodhouse wrote:
> Retpoline means the kernel is safe because it has no indirect branches.
> But firmware isn't, so use IBRS for firmware calls if it's available.
>
> Block preemption while IBRS is set, although in practice the call sites
> already had to be doing that.
>
> Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware
> code, from an NMI handler. I don't want to touch that with a bargepole.
>
> Signed-off-by: David Woodhouse <[email protected]>
> ---
> arch/x86/include/asm/apm.h | 6 ++++++
> arch/x86/include/asm/cpufeatures.h | 1 +
> arch/x86/include/asm/efi.h | 17 ++++++++++++++--
> arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++---------
> arch/x86/kernel/cpu/bugs.c | 12 ++++++++++-
> 5 files changed, 63 insertions(+), 12 deletions(-)
>
... <snip> ...
> +/*
> + * With retpoline, we must use IBRS to restrict branch prediction
> + * before calling into firmware.
> + */
> +static inline void firmware_restrict_branch_speculation_start(void)
> +{
> + preempt_disable();
> + alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,
> + X86_FEATURE_USE_IBRS_FW);
> +}
> +
> +static inline void firmware_restrict_branch_speculation_end(void)
> +{
> + alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,
> + X86_FEATURE_USE_IBRS_FW);
> + preempt_enable();
> }
Shouldn't these writes to the MSR be just for the IBRS bit? The spec
also defines the STIBP bit for this MSR, and if that bit had been set by
BIOS for example, these writes will clear it. And who knows what future
bits may be defined and how they'll be used.
Thanks,
Tom
>
> #endif /* __ASSEMBLY__ */
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index d71c8b5..bfca937 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -300,6 +300,15 @@ static void __init spectre_v2_select_mitigation(void)
> setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
> pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
> }
> +
> + /*
> + * Retpoline means the kernel is safe because it has no indirect
> + * branches. But firmware isn't, so use IBRS to protect that.
> + */
> + if (boot_cpu_has(X86_FEATURE_IBRS)) {
> + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
> + pr_info("Enabling Restricted Speculation for firmware calls\n");
> + }
> }
>
> #undef pr_fmt
> @@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
> if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
> return sprintf(buf, "Not affected\n");
>
> - return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
> + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
> boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
> + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
> spectre_v2_module_string());
> }
> #endif
>
On Wed, 2018-02-14 at 10:07 -0600, Tom Lendacky wrote:
> Shouldn't these writes to the MSR be just for the IBRS bit? The spec
> also defines the STIBP bit for this MSR, and if that bit had been set by
> BIOS for example, these writes will clear it. And who knows what future
> bits may be defined and how they'll be used.
We don't use STIBP. If one day we do decide to set it in userspace for
"sensitive" processes, if we're done having the debate about what those
are, then that seems unlikely to conflict what what this code is doing
anyway, as we would presumably *clear* it again on the way back into
the kernel.
I certainly don't want to add a read/modify/write cycle here just to
cope with some hypothetical future use case for STIBP, when there would
be better ways to cope.
On 2/14/2018 10:11 AM, David Woodhouse wrote:
>
>
> On Wed, 2018-02-14 at 10:07 -0600, Tom Lendacky wrote:
>> Shouldn't these writes to the MSR be just for the IBRS bit? The spec
>> also defines the STIBP bit for this MSR, and if that bit had been set by
>> BIOS for example, these writes will clear it. And who knows what future
>> bits may be defined and how they'll be used.
>
> We don't use STIBP. If one day we do decide to set it in userspace for
Right, I understand the kernel doesn't use STIBP, that's why I mentioned
BIOS as an example.
> "sensitive" processes, if we're done having the debate about what those
> are, then that seems unlikely to conflict what what this code is doing
> anyway, as we would presumably *clear* it again on the way back into
> the kernel.
>
> I certainly don't want to add a read/modify/write cycle here just to
Right, definitely to be avoided. Maybe the value could be tracked in a
per-cpu variable so you never have to read it before the write. Just
change the bit in question and write. Not sure that's really feasible
though.
> cope with some hypothetical future use case for STIBP, when there would
> be better ways to cope.
Just putting it out there, no worries.
Thanks,
Tom
>
On Wed, 2018-02-14 at 10:36 -0600, Tom Lendacky wrote:
> On 2/14/2018 10:11 AM, David Woodhouse wrote:
> >
> >
> >
> > On Wed, 2018-02-14 at 10:07 -0600, Tom Lendacky wrote:
> > >
> > > Shouldn't these writes to the MSR be just for the IBRS bit? The spec
> > > also defines the STIBP bit for this MSR, and if that bit had been set by
> > > BIOS for example, these writes will clear it. And who knows what future
> > > bits may be defined and how they'll be used.
> >
> > We don't use STIBP. If one day we do decide to set it in userspace for
>
> Right, I understand the kernel doesn't use STIBP, that's why I mentioned
> BIOS as an example.
BIOS has no business setting this for us either. Either we support it
and turn it on in the kernel ourselves, or it's off.