On CPUs which advertise IBRS_ALL, enable it and disable the retpoline
as IBRS_ALL will be faster. Guests which still frob IBRS on every kernel
entry/exit will trap and the MSR access will be emulated, but this should
not be any slower than what they *thought* they were doing anyway. If
Paolo genuinely cares about such guests, a later patch can attempt to
optimise this somehow. As long as it doesn't penalise the host and the
other guests while it's at it.
Also use IBRS whenever it's available before calling into firmware at
runtime.
Revert another bikeshedding patch that was broken, and add support for
retpoline builds with clang.
---
v2: Remember to export spectre_v2_enabled
v3: No changes; just rebase to current tip/x86/pti and clarify the state
of the discussion about SPEC_CTRL trapping for IBRS_ALL.
David Woodhouse (4):
x86/speculation: Use IBRS if available before calling into firmware
x86/speculation: Support "Enhanced IBRS" on future CPUs
Revert "x86/retpoline: Simplify vmexit_fill_RSB()"
x86/retpoline: Support retpoline build with Clang
arch/x86/Makefile | 5 +-
arch/x86/entry/entry_32.S | 3 +-
arch/x86/entry/entry_64.S | 3 +-
arch/x86/include/asm/apm.h | 6 ++
arch/x86/include/asm/asm-prototypes.h | 3 -
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/efi.h | 17 ++++-
arch/x86/include/asm/nospec-branch.h | 118 +++++++++++++++++++++++++++++-----
arch/x86/kernel/cpu/bugs.c | 27 +++++++-
arch/x86/kvm/vmx.c | 31 +++++----
arch/x86/lib/Makefile | 1 -
arch/x86/lib/retpoline.S | 56 ----------------
include/linux/compiler-clang.h | 5 ++
include/linux/compiler-gcc.h | 4 ++
include/linux/init.h | 8 +--
15 files changed, 186 insertions(+), 102 deletions(-)
--
2.7.4
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/Makefile | 5 ++++-
include/linux/compiler-clang.h | 5 +++++
include/linux/compiler-gcc.h | 4 ++++
include/linux/init.h | 8 ++++----
4 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad5516..dbc7d0e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,7 +232,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
- RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+
+ RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
ifneq ($(RETPOLINE_CFLAGS),)
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
endif
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d02a4df..d3f264a 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -27,3 +27,8 @@
#if __has_feature(address_sanitizer)
#define __SANITIZE_ADDRESS__
#endif
+
+/* Clang doesn't have a way to turn it off per-function, yet. */
+#ifdef __noretpoline
+#undef __noretpoline
+#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 73bc63e..673fbf9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -93,6 +93,10 @@
#define __weak __attribute__((weak))
#define __alias(symbol) __attribute__((alias(#symbol)))
+#ifdef RETPOLINE
+#define __noretpoline __attribute__((indirect_branch("keep")))
+#endif
+
/*
* it doesn't make sense on ARM (currently the only user of __naked)
* to trace naked functions because then mcount is called without
diff --git a/include/linux/init.h b/include/linux/init.h
index 506a981..bc27cf0 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -6,10 +6,10 @@
#include <linux/types.h>
/* Built-in __init functions needn't be compiled with retpoline */
-#if defined(RETPOLINE) && !defined(MODULE)
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#if defined(__noretpoline) && !defined(MODULE)
+#define __noinitretpoline __noretpoline
#else
-#define __noretpoline
+#define __noinitretpoline
#endif
/* These macros are used to mark some functions or
@@ -47,7 +47,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(.init.text) __cold __latent_entropy __noretpoline
+#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline
#define __initdata __section(.init.data)
#define __initconst __section(.init.rodata)
#define __exitdata __section(.exit.data)
--
2.7.4
This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting
the RSB filling out of line and calling it, we waste one RSB slot for
returning from the function itself, which means one fewer actual function
call we can make if we're doing the Skylake abomination of call-depth
counting.
It also changed the number of RSB stuffings we do on vmexit from 32,
which was correct, to 16. Let's just stop with the bikeshedding; it
didn't actually *fix* anything anyway.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/entry/entry_32.S | 3 +-
arch/x86/entry/entry_64.S | 3 +-
arch/x86/include/asm/asm-prototypes.h | 3 --
arch/x86/include/asm/nospec-branch.h | 70 +++++++++++++++++++++++++++++++----
arch/x86/lib/Makefile | 1 -
arch/x86/lib/retpoline.S | 56 ----------------------------
6 files changed, 65 insertions(+), 71 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c02..6ad064c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %ebx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 77edc23..7a53879 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %rbx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d11161..1908214 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 34cbce3..94749fb 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
#ifdef __ASSEMBLY__
/*
@@ -78,10 +122,17 @@
#endif
.endm
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "", "call __clear_rsb", \ftr
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
#endif
.endm
@@ -163,10 +214,15 @@ extern char __indirect_thunk_end[];
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- alternative_input("",
- "call __fill_rsb",
- X86_FEATURE_RETPOLINE,
- ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+ unsigned long loops;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700..25a972c 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3..c909961 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
GENERATE_THUNK(r14)
GENERATE_THUNK(r15)
#endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
- mov $(\nr / 2), %_ASM_BX
- .align 16
-771:
- call 772f
-773: /* speculation trap */
- pause
- lfence
- jmp 773b
- .align 16
-772:
- call 774f
-775: /* speculation trap */
- pause
- lfence
- jmp 775b
- .align 16
-774:
- dec %_ASM_BX
- jnz 771b
- add $((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
-
-ENTRY(__fill_rsb)
- STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
- ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
- STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
- ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
--
2.7.4
Retpoline means the kernel is safe because it has no indirect branches.
But firmware isn't, so use IBRS for firmware calls if it's available.
Block preemption while IBRS is set, although in practice the call sites
already had to be doing that.
Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware
code, from an NMI handler. I don't want to touch that with a bargepole.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/apm.h | 6 ++++++
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/efi.h | 17 ++++++++++++++--
arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++---------
arch/x86/kernel/cpu/bugs.c | 12 ++++++++++-
5 files changed, 63 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015d..c356098 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
+#include <asm/nospec-branch.h>
+
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
}
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3..f41079d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb..a399c1e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/tlb.h>
+#include <asm/nospec-branch.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
extern asmlinkage unsigned long efi_call_phys(void *, ...);
-#define arch_efi_call_virt_setup() kernel_fpu_begin()
-#define arch_efi_call_virt_teardown() kernel_fpu_end()
+#define arch_efi_call_virt_setup() \
+({ \
+ kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
+})
+
+#define arch_efi_call_virt_teardown() \
+({ \
+ firmware_restrict_branch_speculation_end(); \
+ kernel_fpu_end(); \
+})
+
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
\
if (efi_scratch.use_pgd) { \
efi_scratch.prev_cr3 = __read_cr3(); \
@@ -91,6 +103,7 @@ struct efi_scratch {
__flush_tlb_all(); \
} \
\
+ firmware_restrict_branch_speculation_end(); \
__kernel_fpu_end(); \
preempt_enable(); \
})
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 76b0585..0995c6a 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -163,17 +163,38 @@ static inline void vmexit_fill_RSB(void)
#endif
}
+#define alternative_msr_write(_msr, _val, _feature) \
+ asm volatile(ALTERNATIVE("", \
+ "movl %[msr], %%ecx\n\t" \
+ "movl %[val], %%eax\n\t" \
+ "movl $0, %%edx\n\t" \
+ "wrmsr", \
+ _feature) \
+ : : [msr] "i" (_msr), [val] "i" (_val) \
+ : "eax", "ecx", "edx", "memory")
+
static inline void indirect_branch_prediction_barrier(void)
{
- asm volatile(ALTERNATIVE("",
- "movl %[msr], %%ecx\n\t"
- "movl %[val], %%eax\n\t"
- "movl $0, %%edx\n\t"
- "wrmsr",
- X86_FEATURE_USE_IBPB)
- : : [msr] "i" (MSR_IA32_PRED_CMD),
- [val] "i" (PRED_CMD_IBPB)
- : "eax", "ecx", "edx", "memory");
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+ X86_FEATURE_USE_IBPB);
+}
+
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ */
+static inline void firmware_restrict_branch_speculation_start(void)
+{
+ preempt_disable();
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,
+ X86_FEATURE_USE_IBRS_FW);
+}
+
+static inline void firmware_restrict_branch_speculation_end(void)
+{
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,
+ X86_FEATURE_USE_IBRS_FW);
+ preempt_enable();
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b5..bfca937 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
+
+ /*
+ * Retpoline means the kernel is safe because it has no indirect
+ * branches. But firmware isn't, so use IBRS to protect that.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
}
#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
spectre_v2_module_string());
}
#endif
--
2.7.4
The original IBRS hack in microcode is horribly slow. For the next
generation of CPUs, as a stopgap until we get a proper fix, Intel
promise an "Enhanced IBRS" which will be fast.
The assumption is that predictions in the BTB/RSB will be tagged with
the VMX mode and ring that they were learned in, and thus the CPU will
avoid consuming unsafe predictions without a performance penalty.
Intel's documentation says that it is still required to set the IBRS bit
in the SPEC_CTRL MSR and ensure that it remains set.
Cope with this by trapping and emulating *all* access to SPEC_CTRL from
KVM guests when the IBRS_ALL feature is present, so it can never be
turned off. Guests who see IBRS_ALL should never do anything except
turn it on at boot anyway. And if they didn't know about IBRS_ALL and
they keep frobbing IBRS on every kernel entry/exit... well the vmexit
for a no-op is probably going to be faster than they were expecting
anyway, so they'll live.
Signed-off-by: David Woodhouse <[email protected]>
Acked-by: Arjan van de Ven <[email protected]>
---
arch/x86/include/asm/nospec-branch.h | 9 ++++++++-
arch/x86/kernel/cpu/bugs.c | 17 +++++++++++++++--
arch/x86/kvm/vmx.c | 31 +++++++++++++++++++------------
3 files changed, 42 insertions(+), 15 deletions(-)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 0995c6a..34cbce3 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -141,9 +141,16 @@ enum spectre_v2_mitigation {
SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
SPECTRE_V2_RETPOLINE_GENERIC,
SPECTRE_V2_RETPOLINE_AMD,
- SPECTRE_V2_IBRS,
+ SPECTRE_V2_IBRS_ALL,
};
+extern enum spectre_v2_mitigation spectre_v2_enabled;
+
+static inline bool spectre_v2_ibrs_all(void)
+{
+ return spectre_v2_enabled == SPECTRE_V2_IBRS_ALL;
+}
+
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bfca937..505c467 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -88,12 +88,14 @@ static const char *spectre_v2_strings[] = {
[SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
[SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
[SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
+ [SPECTRE_V2_IBRS_ALL] = "Mitigation: Enhanced IBRS",
};
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+EXPORT_SYMBOL_GPL(spectre_v2_enabled);
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -237,6 +239,16 @@ static void __init spectre_v2_select_mitigation(void)
case SPECTRE_V2_CMD_FORCE:
case SPECTRE_V2_CMD_AUTO:
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+ u64 ia32_cap = 0;
+
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (ia32_cap & ARCH_CAP_IBRS_ALL) {
+ mode = SPECTRE_V2_IBRS_ALL;
+ wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS);
+ goto ibrs_all;
+ }
+ }
if (IS_ENABLED(CONFIG_RETPOLINE))
goto retpoline_auto;
break;
@@ -274,6 +286,7 @@ static void __init spectre_v2_select_mitigation(void)
setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
}
+ ibrs_all:
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
@@ -305,7 +318,7 @@ static void __init spectre_v2_select_mitigation(void)
* Retpoline means the kernel is safe because it has no indirect
* branches. But firmware isn't, so use IBRS to protect that.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ if (mode != SPECTRE_V2_IBRS_ALL && boot_cpu_has(X86_FEATURE_IBRS)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dec126..5dfeb11 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3387,13 +3387,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx->spec_ctrl = data;
- if (!data)
+ if (!data && !spectre_v2_ibrs_all())
break;
/*
* For non-nested:
* When it's written (to non-zero) for the first time, pass
- * it through.
+ * it through unless we have IBRS_ALL and it should just be
+ * set for ever.
*
* For nested:
* The handling of the MSR bitmap for L2 guests is done in
@@ -9451,7 +9452,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
+ if (!spectre_v2_ibrs_all() && vmx->spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
vmx->__launched = vmx->loaded_vmcs->launched;
@@ -9573,11 +9574,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
);
/*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
+ * Without IBRS_ALL, we do not use IBRS in the kernel. If this
+ * vCPU has used the SPEC_CTRL MSR it may have left it on;
+ * save the value and turn it off. This is much more efficient
+ * than blindly adding it to the atomic save/restore list.
+ * Especially as the former (saving guest MSRs on vmexit)
+ * doesn't even exist in KVM.
*
* For non-nested case:
* If the L01 MSR bitmap does not intercept the MSR, then we need to
@@ -9586,12 +9588,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* For nested case:
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
+ *
+ * If IBRS_ALL is present then the whole thing is a no-op fiction
+ * for guests and every access is trapped, so do nothing.
*/
- if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
- rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ if (!spectre_v2_ibrs_all()) {
+ if (!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
- if (vmx->spec_ctrl)
- wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ if (vmx->spec_ctrl)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ }
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
--
2.7.4
On Mon, 19 Feb 2018, David Woodhouse wrote:
> Retpoline means the kernel is safe because it has no indirect branches.
> But firmware isn't, so use IBRS for firmware calls if it's available.
>
> Block preemption while IBRS is set, although in practice the call sites
> already had to be doing that.
>
> Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware
> code, from an NMI handler. I don't want to touch that with a bargepole.
>
> Signed-off-by: David Woodhouse <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
On Mon, 19 Feb 2018, David Woodhouse wrote:
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 0995c6a..34cbce3 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -141,9 +141,16 @@ enum spectre_v2_mitigation {
> SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
> SPECTRE_V2_RETPOLINE_GENERIC,
> SPECTRE_V2_RETPOLINE_AMD,
> - SPECTRE_V2_IBRS,
> + SPECTRE_V2_IBRS_ALL,
> };
>
> +extern enum spectre_v2_mitigation spectre_v2_enabled;
> +
> +static inline bool spectre_v2_ibrs_all(void)
> +{
> + return spectre_v2_enabled == SPECTRE_V2_IBRS_ALL;
> +}
> +
> extern char __indirect_thunk_start[];
> extern char __indirect_thunk_end[];
>
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index bfca937..505c467 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -88,12 +88,14 @@ static const char *spectre_v2_strings[] = {
> [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline",
> [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline",
> [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline",
> + [SPECTRE_V2_IBRS_ALL] = "Mitigation: Enhanced IBRS",
> };
>
> #undef pr_fmt
> #define pr_fmt(fmt) "Spectre V2 : " fmt
>
> -static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
> +enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
> +EXPORT_SYMBOL_GPL(spectre_v2_enabled);
>
> #ifdef RETPOLINE
> static bool spectre_v2_bad_module;
> @@ -237,6 +239,16 @@ static void __init spectre_v2_select_mitigation(void)
>
> case SPECTRE_V2_CMD_FORCE:
> case SPECTRE_V2_CMD_AUTO:
> + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
> + u64 ia32_cap = 0;
> +
> + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
> + if (ia32_cap & ARCH_CAP_IBRS_ALL) {
Hmm. We already read the MSR in cpu/common.c to check for the RDCL_NO
bit. Shouldn't we just read it once and set a feature bit for IBRS_ALL?
> + mode = SPECTRE_V2_IBRS_ALL;
> + wrmsrl(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS);
> + goto ibrs_all;
> + }
> + }
> if (IS_ENABLED(CONFIG_RETPOLINE))
> goto retpoline_auto;
> break;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 3dec126..5dfeb11 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3387,13 +3387,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>
> vmx->spec_ctrl = data;
>
> - if (!data)
> + if (!data && !spectre_v2_ibrs_all())
> break;
>
> /*
> * For non-nested:
> * When it's written (to non-zero) for the first time, pass
> - * it through.
> + * it through unless we have IBRS_ALL and it should just be
> + * set for ever.
A non zero write is going to disable the intercept only when IBRS_ALL
is on. The comment says is should be set forever, i.e. not changeable by
the guest. So the condition should be:
if (!data || spectre_v2_ibrs_all())
break;
Hmm?
> *
> * For nested:
> * The handling of the MSR bitmap for L2 guests is done in
> @@ -9451,7 +9452,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
> * is no need to worry about the conditional branch over the wrmsr
> * being speculatively taken.
> */
> - if (vmx->spec_ctrl)
> + if (!spectre_v2_ibrs_all() && vmx->spec_ctrl)
> wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
Which matches the code here.
Thanks,
tglx
On Mon, 19 Feb 2018, David Woodhouse wrote:
> This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting
> the RSB filling out of line and calling it, we waste one RSB slot for
> returning from the function itself, which means one fewer actual function
> call we can make if we're doing the Skylake abomination of call-depth
> counting.
>
> It also changed the number of RSB stuffings we do on vmexit from 32,
> which was correct, to 16. Let's just stop with the bikeshedding; it
> didn't actually *fix* anything anyway.
>
> Signed-off-by: David Woodhouse <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
On Mon, 19 Feb 2018, David Woodhouse wrote:
Reviewed-by: Thomas Gleixner <[email protected]>
> +/* Clang doesn't have a way to turn it off per-function, yet. */
Is that going to happen in the foreseable future? Hmm, why am I asking,
clang is going to break anyway once we require asm-gotos hard.
Thanks,
tglx
On Tue, 2018-02-20 at 09:36 +0100, Thomas Gleixner wrote:
> On Mon, 19 Feb 2018, David Woodhouse wrote:
>
> Reviewed-by: Thomas Gleixner <[email protected]>
>
> >
> > +/* Clang doesn't have a way to turn it off per-function, yet. */
>
> Is that going to happen in the foreseable future? Hmm, why am I asking,
> clang is going to break anyway once we require asm-gotos hard.
It's only minor optimisation so I told them there was no massive
urgency to get it implemented. Unlike asm-goto.
On Tue, 2018-02-20 at 09:31 +0100, Thomas Gleixner wrote:
> > @@ -237,6 +239,16 @@ static void __init spectre_v2_select_mitigation(void)
> >
> > case SPECTRE_V2_CMD_FORCE:
> > case SPECTRE_V2_CMD_AUTO:
> > + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
> > + u64 ia32_cap = 0;
> > +
> > + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
> > + if (ia32_cap & ARCH_CAP_IBRS_ALL) {
>
> Hmm. We already read the MSR in cpu/common.c to check for the RDCL_NO
> bit. Shouldn't we just read it once and set a feature bit for IBRS_ALL?
Yeah. See my comment to Dave where he was going to add a *third* place
that does the same for a different bit. I think we should probably just
turn these into cpufeature bits like the 'scattered' ones.
> > @@ -3387,13 +3387,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >
> > vmx->spec_ctrl = data;
> >
> > - if (!data)
> > + if (!data && !spectre_v2_ibrs_all())
> > break;
> >
> > /*
> > * For non-nested:
> > * When it's written (to non-zero) for the first time, pass
> > - * it through.
> > + * it through unless we have IBRS_ALL and it should just be
> > + * set for ever.
>
> A non zero write is going to disable the intercept only when IBRS_ALL
> is on. The comment says is should be set forever, i.e. not changeable by
> the guest. So the condition should be:
>
> if (!data || spectre_v2_ibrs_all())
> break;
> Hmm?
Yes, good catch. Thanks.
However, Paolo is very insistent that taking the trap every time is
actually a lot *slower* than really frobbing IBRS on certain
microarchitectures, so my hand-waving "pfft, what did they expect?" is
not acceptable.
Which I think puts us back to the "throwing the toys out of the pram"
solution; demanding that Intel give us a new feature bit for "IBRS_ALL,
and the bit in the MSR is a no-op". Which was going to be true for
*most* new CPUs anyway. (Note: a blacklist for those few CPUs on which
it *isn't* true might also suffice instead of a new feature bit.)
Unless someone really wants to implement the atomic MSR save and
restore on vmexit, and make it work with nesting, and make the whole
thing sufficiently simple that we don't throw our toys out of the pram
anyway when we see it?
Commit-ID: d1c99108af3c5992640aa2afa7d2e88c3775c06e
Gitweb: https://git.kernel.org/tip/d1c99108af3c5992640aa2afa7d2e88c3775c06e
Author: David Woodhouse <[email protected]>
AuthorDate: Mon, 19 Feb 2018 10:50:56 +0000
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Feb 2018 09:38:26 +0100
Revert "x86/retpoline: Simplify vmexit_fill_RSB()"
This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting
the RSB filling out of line and calling it, we waste one RSB slot for
returning from the function itself, which means one fewer actual function
call we can make if we're doing the Skylake abomination of call-depth
counting.
It also changed the number of RSB stuffings we do on vmexit from 32,
which was correct, to 16. Let's just stop with the bikeshedding; it
didn't actually *fix* anything anyway.
Signed-off-by: David Woodhouse <[email protected]>
Acked-by: Thomas Gleixner <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/entry/entry_32.S | 3 +-
arch/x86/entry/entry_64.S | 3 +-
arch/x86/include/asm/asm-prototypes.h | 3 --
arch/x86/include/asm/nospec-branch.h | 70 +++++++++++++++++++++++++++++++----
arch/x86/lib/Makefile | 1 -
arch/x86/lib/retpoline.S | 56 ----------------------------
6 files changed, 65 insertions(+), 71 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 16c2c02..6ad064c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %ebx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 77edc23..7a53879 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
* exist, overwrite the RSB with entries which capture
* speculative execution to prevent attack.
*/
- /* Clobbers %rbx */
- FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+ FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
/* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 4d11161..1908214 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
INDIRECT_THUNK(si)
INDIRECT_THUNK(di)
INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 76b0585..af34b1e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
+#define RSB_FILL_LOOPS 16 /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp) \
+ mov $(nr/2), reg; \
+771: \
+ call 772f; \
+773: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 773b; \
+772: \
+ call 774f; \
+775: /* speculation trap */ \
+ pause; \
+ lfence; \
+ jmp 775b; \
+774: \
+ dec reg; \
+ jnz 771b; \
+ add $(BITS_PER_LONG/8) * nr, sp;
+
#ifdef __ASSEMBLY__
/*
@@ -78,10 +122,17 @@
#endif
.endm
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+ * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+ * monstrosity above, manually.
+ */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "", "call __clear_rsb", \ftr
+ ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \
+ \ftr
+.Lskip_rsb_\@:
#endif
.endm
@@ -156,10 +207,15 @@ extern char __indirect_thunk_end[];
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- alternative_input("",
- "call __fill_rsb",
- X86_FEATURE_RETPOLINE,
- ASM_NO_INPUT_CLOBBER(_ASM_BX, "memory"));
+ unsigned long loops;
+
+ asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
+ ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 91e9700..25a972c 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -28,7 +28,6 @@ lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RETPOLINE) += retpoline.o
-OBJECT_FILES_NON_STANDARD_retpoline.o :=y
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 480edc3..c909961 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,7 +7,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
-#include <asm/bitsperlong.h>
.macro THUNK reg
.section .text.__x86.indirect_thunk
@@ -47,58 +46,3 @@ GENERATE_THUNK(r13)
GENERATE_THUNK(r14)
GENERATE_THUNK(r15)
#endif
-
-/*
- * Fill the CPU return stack buffer.
- *
- * Each entry in the RSB, if used for a speculative 'ret', contains an
- * infinite 'pause; lfence; jmp' loop to capture speculative execution.
- *
- * This is required in various cases for retpoline and IBRS-based
- * mitigations for the Spectre variant 2 vulnerability. Sometimes to
- * eliminate potentially bogus entries from the RSB, and sometimes
- * purely to ensure that it doesn't get empty, which on some CPUs would
- * allow predictions from other (unwanted!) sources to be used.
- *
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version - two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-.macro STUFF_RSB nr:req sp:req
- mov $(\nr / 2), %_ASM_BX
- .align 16
-771:
- call 772f
-773: /* speculation trap */
- pause
- lfence
- jmp 773b
- .align 16
-772:
- call 774f
-775: /* speculation trap */
- pause
- lfence
- jmp 775b
- .align 16
-774:
- dec %_ASM_BX
- jnz 771b
- add $((BITS_PER_LONG/8) * \nr), \sp
-.endm
-
-#define RSB_FILL_LOOPS 16 /* To avoid underflow */
-
-ENTRY(__fill_rsb)
- STUFF_RSB RSB_FILL_LOOPS, %_ASM_SP
- ret
-END(__fill_rsb)
-EXPORT_SYMBOL_GPL(__fill_rsb)
-
-#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
-
-ENTRY(__clear_rsb)
- STUFF_RSB RSB_CLEAR_LOOPS, %_ASM_SP
- ret
-END(__clear_rsb)
-EXPORT_SYMBOL_GPL(__clear_rsb)
Commit-ID: 87358710c1fb4f1bf96bbe2349975ff9953fc9b2
Gitweb: https://git.kernel.org/tip/87358710c1fb4f1bf96bbe2349975ff9953fc9b2
Author: David Woodhouse <[email protected]>
AuthorDate: Mon, 19 Feb 2018 10:50:57 +0000
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Feb 2018 11:17:58 +0100
x86/retpoline: Support retpoline builds with Clang
Signed-off-by: David Woodhouse <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/Makefile | 5 ++++-
include/linux/compiler-clang.h | 5 +++++
include/linux/compiler-gcc.h | 4 ++++
include/linux/init.h | 8 ++++----
4 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fad5516..dbc7d0e 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -232,7 +232,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Avoid indirect branches in kernel to deal with Spectre
ifdef CONFIG_RETPOLINE
- RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register
+ RETPOLINE_CFLAGS_CLANG := -mretpoline-external-thunk
+
+ RETPOLINE_CFLAGS += $(call cc-option,$(RETPOLINE_CFLAGS_GCC),$(call cc-option,$(RETPOLINE_CFLAGS_CLANG)))
ifneq ($(RETPOLINE_CFLAGS),)
KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
endif
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d02a4df..d3f264a 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -27,3 +27,8 @@
#if __has_feature(address_sanitizer)
#define __SANITIZE_ADDRESS__
#endif
+
+/* Clang doesn't have a way to turn it off per-function, yet. */
+#ifdef __noretpoline
+#undef __noretpoline
+#endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 73bc63e..673fbf9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -93,6 +93,10 @@
#define __weak __attribute__((weak))
#define __alias(symbol) __attribute__((alias(#symbol)))
+#ifdef RETPOLINE
+#define __noretpoline __attribute__((indirect_branch("keep")))
+#endif
+
/*
* it doesn't make sense on ARM (currently the only user of __naked)
* to trace naked functions because then mcount is called without
diff --git a/include/linux/init.h b/include/linux/init.h
index 506a981..bc27cf0 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -6,10 +6,10 @@
#include <linux/types.h>
/* Built-in __init functions needn't be compiled with retpoline */
-#if defined(RETPOLINE) && !defined(MODULE)
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#if defined(__noretpoline) && !defined(MODULE)
+#define __noinitretpoline __noretpoline
#else
-#define __noretpoline
+#define __noinitretpoline
#endif
/* These macros are used to mark some functions or
@@ -47,7 +47,7 @@
/* These are for everybody (although not all archs will actually
discard it in modules) */
-#define __init __section(.init.text) __cold __latent_entropy __noretpoline
+#define __init __section(.init.text) __cold __latent_entropy __noinitretpoline
#define __initdata __section(.init.data)
#define __initconst __section(.init.rodata)
#define __exitdata __section(.exit.data)
Commit-ID: dd84441a797150dcc49298ec95c459a8891d8bb1
Gitweb: https://git.kernel.org/tip/dd84441a797150dcc49298ec95c459a8891d8bb1
Author: David Woodhouse <[email protected]>
AuthorDate: Mon, 19 Feb 2018 10:50:54 +0000
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 20 Feb 2018 09:38:33 +0100
x86/speculation: Use IBRS if available before calling into firmware
Retpoline means the kernel is safe because it has no indirect branches.
But firmware isn't, so use IBRS for firmware calls if it's available.
Block preemption while IBRS is set, although in practice the call sites
already had to be doing that.
Ignore hpwdt.c for now. It's taking spinlocks and calling into firmware
code, from an NMI handler. I don't want to touch that with a bargepole.
Signed-off-by: David Woodhouse <[email protected]>
Reviewed-by: Thomas Gleixner <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/apm.h | 6 ++++++
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/include/asm/efi.h | 17 ++++++++++++++--
arch/x86/include/asm/nospec-branch.h | 39 +++++++++++++++++++++++++++---------
arch/x86/kernel/cpu/bugs.c | 12 ++++++++++-
5 files changed, 63 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h
index 4d4015d..c356098 100644
--- a/arch/x86/include/asm/apm.h
+++ b/arch/x86/include/asm/apm.h
@@ -7,6 +7,8 @@
#ifndef _ASM_X86_MACH_DEFAULT_APM_H
#define _ASM_X86_MACH_DEFAULT_APM_H
+#include <asm/nospec-branch.h>
+
#ifdef APM_ZERO_SEGS
# define APM_DO_ZERO_SEGS \
"pushl %%ds\n\t" \
@@ -32,6 +34,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -44,6 +47,7 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
"=S" (*esi)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
}
static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
@@ -56,6 +60,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
* N.B. We do NOT need a cld after the BIOS call
* because we always save and restore the flags.
*/
+ firmware_restrict_branch_speculation_start();
__asm__ __volatile__(APM_DO_ZERO_SEGS
"pushl %%edi\n\t"
"pushl %%ebp\n\t"
@@ -68,6 +73,7 @@ static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in,
"=S" (si)
: "a" (func), "b" (ebx_in), "c" (ecx_in)
: "memory", "cc");
+ firmware_restrict_branch_speculation_end();
return error;
}
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0dfe4d3..f41079d 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -213,6 +213,7 @@
#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 85f6ccb..a399c1e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -6,6 +6,7 @@
#include <asm/pgtable.h>
#include <asm/processor-flags.h>
#include <asm/tlb.h>
+#include <asm/nospec-branch.h>
/*
* We map the EFI regions needed for runtime services non-contiguously,
@@ -36,8 +37,18 @@
extern asmlinkage unsigned long efi_call_phys(void *, ...);
-#define arch_efi_call_virt_setup() kernel_fpu_begin()
-#define arch_efi_call_virt_teardown() kernel_fpu_end()
+#define arch_efi_call_virt_setup() \
+({ \
+ kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
+})
+
+#define arch_efi_call_virt_teardown() \
+({ \
+ firmware_restrict_branch_speculation_end(); \
+ kernel_fpu_end(); \
+})
+
/*
* Wrap all the virtual calls in a way that forces the parameters on the stack.
@@ -73,6 +84,7 @@ struct efi_scratch {
efi_sync_low_kernel_mappings(); \
preempt_disable(); \
__kernel_fpu_begin(); \
+ firmware_restrict_branch_speculation_start(); \
\
if (efi_scratch.use_pgd) { \
efi_scratch.prev_cr3 = __read_cr3(); \
@@ -91,6 +103,7 @@ struct efi_scratch {
__flush_tlb_all(); \
} \
\
+ firmware_restrict_branch_speculation_end(); \
__kernel_fpu_end(); \
preempt_enable(); \
})
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index af34b1e..ec90c32 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -219,17 +219,38 @@ static inline void vmexit_fill_RSB(void)
#endif
}
+#define alternative_msr_write(_msr, _val, _feature) \
+ asm volatile(ALTERNATIVE("", \
+ "movl %[msr], %%ecx\n\t" \
+ "movl %[val], %%eax\n\t" \
+ "movl $0, %%edx\n\t" \
+ "wrmsr", \
+ _feature) \
+ : : [msr] "i" (_msr), [val] "i" (_val) \
+ : "eax", "ecx", "edx", "memory")
+
static inline void indirect_branch_prediction_barrier(void)
{
- asm volatile(ALTERNATIVE("",
- "movl %[msr], %%ecx\n\t"
- "movl %[val], %%eax\n\t"
- "movl $0, %%edx\n\t"
- "wrmsr",
- X86_FEATURE_USE_IBPB)
- : : [msr] "i" (MSR_IA32_PRED_CMD),
- [val] "i" (PRED_CMD_IBPB)
- : "eax", "ecx", "edx", "memory");
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
+ X86_FEATURE_USE_IBPB);
+}
+
+/*
+ * With retpoline, we must use IBRS to restrict branch prediction
+ * before calling into firmware.
+ */
+static inline void firmware_restrict_branch_speculation_start(void)
+{
+ preempt_disable();
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS,
+ X86_FEATURE_USE_IBRS_FW);
+}
+
+static inline void firmware_restrict_branch_speculation_end(void)
+{
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, 0,
+ X86_FEATURE_USE_IBRS_FW);
+ preempt_enable();
}
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d71c8b5..bfca937 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -300,6 +300,15 @@ retpoline_auto:
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
}
+
+ /*
+ * Retpoline means the kernel is safe because it has no indirect
+ * branches. But firmware isn't, so use IBRS to protect that.
+ */
+ if (boot_cpu_has(X86_FEATURE_IBRS)) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
+ pr_info("Enabling Restricted Speculation for firmware calls\n");
+ }
}
#undef pr_fmt
@@ -326,8 +335,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
return sprintf(buf, "Not affected\n");
- return sprintf(buf, "%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
spectre_v2_module_string());
}
#endif
On Tue, 20 Feb 2018, David Woodhouse wrote:
> On Tue, 2018-02-20 at 09:31 +0100, Thomas Gleixner wrote:
> > > @@ -3387,13 +3387,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > >
> > > vmx->spec_ctrl = data;
> > >
> > > - if (!data)
> > > + if (!data && !spectre_v2_ibrs_all())
> > > break;
> > >
> > > /*
> > > * For non-nested:
> > > * When it's written (to non-zero) for the first time, pass
> > > - * it through.
> > > + * it through unless we have IBRS_ALL and it should just be
> > > + * set for ever.
> >
> > A non zero write is going to disable the intercept only when IBRS_ALL
> > is on. The comment says is should be set forever, i.e. not changeable by
> > the guest. So the condition should be:
> >
> > if (!data || spectre_v2_ibrs_all())
> > break;
> > Hmm?
>
> Yes, good catch. Thanks.
>
> However, Paolo is very insistent that taking the trap every time is
> actually a lot *slower* than really frobbing IBRS on certain
> microarchitectures, so my hand-waving "pfft, what did they expect?" is
> not acceptable.
>
> Which I think puts us back to the "throwing the toys out of the pram"
> solution; demanding that Intel give us a new feature bit for "IBRS_ALL,
> and the bit in the MSR is a no-op". Which was going to be true for
> *most* new CPUs anyway. (Note: a blacklist for those few CPUs on which
> it *isn't* true might also suffice instead of a new feature bit.)
>
> Unless someone really wants to implement the atomic MSR save and
> restore on vmexit, and make it work with nesting, and make the whole
> thing sufficiently simple that we don't throw our toys out of the pram
> anyway when we see it?
That whole stuff was duct taped into microcode in a rush and the result is
that we have only the choice between fire and frying pan. Whatever we
decide to implement is not going to be a half baken hack.
I fully agree that Intel needs to get their act together and implement
IBRS_ALL sanely.
The right thing to do is to allow the host to lock down the MSR once it
enabled IBRS_ALL so that any write to it will just turn into NOOPs. That
removes all worries about guests and in future generations of CPUs this bit
might just be hardwired to one and the MSR just a dummy for compability
reasons.
Thanks,
tglx
On Tue, 20 Feb 2018, Thomas Gleixner wrote:
> On Tue, 20 Feb 2018, David Woodhouse wrote:
> > On Tue, 2018-02-20 at 09:31 +0100, Thomas Gleixner wrote:
> > > > @@ -3387,13 +3387,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > > >
> > > > vmx->spec_ctrl = data;
> > > >
> > > > - if (!data)
> > > > + if (!data && !spectre_v2_ibrs_all())
> > > > break;
> > > >
> > > > /*
> > > > * For non-nested:
> > > > * When it's written (to non-zero) for the first time, pass
> > > > - * it through.
> > > > + * it through unless we have IBRS_ALL and it should just be
> > > > + * set for ever.
> > >
> > > A non zero write is going to disable the intercept only when IBRS_ALL
> > > is on. The comment says is should be set forever, i.e. not changeable by
> > > the guest. So the condition should be:
> > >
> > > if (!data || spectre_v2_ibrs_all())
> > > break;
> > > Hmm?
> >
> > Yes, good catch. Thanks.
> >
> > However, Paolo is very insistent that taking the trap every time is
> > actually a lot *slower* than really frobbing IBRS on certain
> > microarchitectures, so my hand-waving "pfft, what did they expect?" is
> > not acceptable.
> >
> > Which I think puts us back to the "throwing the toys out of the pram"
There are no more toys in the pram. I threw them all out weeks ago ...
> > solution; demanding that Intel give us a new feature bit for "IBRS_ALL,
> > and the bit in the MSR is a no-op". Which was going to be true for
> > *most* new CPUs anyway. (Note: a blacklist for those few CPUs on which
> > it *isn't* true might also suffice instead of a new feature bit.)
> >
> > Unless someone really wants to implement the atomic MSR save and
> > restore on vmexit, and make it work with nesting, and make the whole
> > thing sufficiently simple that we don't throw our toys out of the pram
> > anyway when we see it?
>
> That whole stuff was duct taped into microcode in a rush and the result is
> that we have only the choice between fire and frying pan. Whatever we
> decide to implement is not going to be a half baken hack.
s/not// of course
>
> I fully agree that Intel needs to get their act together and implement
> IBRS_ALL sanely.
>
> The right thing to do is to allow the host to lock down the MSR once it
> enabled IBRS_ALL so that any write to it will just turn into NOOPs. That
> removes all worries about guests and in future generations of CPUs this bit
> might just be hardwired to one and the MSR just a dummy for compability
> reasons.
>
> Thanks,
>
> tglx
>
On Tue, 2018-02-20 at 11:42 +0100, Thomas Gleixner wrote:
>
> > > However, Paolo is very insistent that taking the trap every time is
> > > actually a lot *slower* than really frobbing IBRS on certain
> > > microarchitectures, so my hand-waving "pfft, what did they expect?" is
> > > not acceptable.
> > >
> > > Which I think puts us back to the "throwing the toys out of the pram"
>
> There are no more toys in the pram. I threw them all out weeks ago ...
One option is to take the patch as-is¹ with the trap on every access.
As soon as Intel define that 'IBRS_ALL_AND_THE_BIT_IS_A_NOOP' bit in
MSR_IA32_ARCH_CAPABILITIES, *then* we can expose it to guests directly
again just as we do at the moment.
That way, the slowdown that Paolo is concerned about is limited to a
small set of current CPUs on which we're mostly unlikely to care too
much about KVM anyway.
¹ as-is, except I really will go and turn the IA32_ARCH_CAPABILITIES
bits into scattered cpufeatures before I repost it.
On 19/02/2018 11:50, David Woodhouse wrote:
> Cope with this by trapping and emulating *all* access to SPEC_CTRL from
> KVM guests when the IBRS_ALL feature is present, so it can never be
> turned off. Guests who see IBRS_ALL should never do anything except
> turn it on at boot anyway. And if they didn't know about IBRS_ALL and
> they keep frobbing IBRS on every kernel entry/exit... well the vmexit
> for a no-op is probably going to be faster than they were expecting
> anyway, so they'll live.
The problem is, it isn't. On a Haswell (which has fairly slow
SPEC_CTRL) toggling IBRS is 200 cycles. This gives a context switch
time of around 2000 clock cycles with PTI enabled.
This is fairly awful, but with a vmexit cost of ~1100 cycles that goes
up to 2000+(1100-200)*2 = 3800. That's more or less doubling the cost
of a system call.
With newer machines SPEC_CTRL cost goes down but vmexit cost doesn't, so
it's only worse.
For now, we really should do something like
if (vmx->spec_ctrl != host_spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, host_spec_ctrl);
else
lfence();
which later can become
if (vmx->spec_ctrl != host_spec_ctrl)
wrmsrl(MSR_IA32_SPEC_CTRL, host_spec_ctrl);
else {
/* lfence not needed if host_spec_ctrl == 0 */
if (static_cpu_has(BUG_REALLY_WANTS_IBRS))
nospec_barrier();
}
Paolo
On 20/02/2018 12:22, David Woodhouse wrote:
>>>> However, Paolo is very insistent that taking the trap every time is
>>>> actually a lot *slower* than really frobbing IBRS on certain
>>>> microarchitectures, so my hand-waving "pfft, what did they expect?" is
>>>> not acceptable.
>>>>
>>>> Which I think puts us back to the "throwing the toys out of the pram"
>> There are no more toys in the pram. I threw them all out weeks ago ...
>
> One option is to take the patch as-is¹ with the trap on every access.
Please reword the commit message at least, mentioning that the slow case
is not one we don't care much about yet (no IBRS_ALL CPUs in the wild
afaik) and we won't care much about in the long run either (IBRS_ALL
really only used on a handful of blacklisted processors).
Thanks,
Paolo
> As soon as Intel define that 'IBRS_ALL_AND_THE_BIT_IS_A_NOOP' bit in
> MSR_IA32_ARCH_CAPABILITIES, *then* we can expose it to guests directly
> again just as we do at the moment.
>
> That way, the slowdown that Paolo is concerned about is limited to a
> small set of current CPUs on which we're mostly unlikely to care too
> much about KVM anyway.
On Tue, 20 Feb 2018, David Woodhouse wrote:
> On Tue, 2018-02-20 at 11:42 +0100, Thomas Gleixner wrote:
> >
> > > > However, Paolo is very insistent that taking the trap every time is
> > > > actually a lot *slower* than really frobbing IBRS on certain
> > > > microarchitectures, so my hand-waving "pfft, what did they expect?" is
> > > > not acceptable.
> > > >
> > > > Which I think puts us back to the "throwing the toys out of the pram"
> >
> > There are no more toys in the pram. I threw them all out weeks ago ...
>
> One option is to take the patch as-is¹ with the trap on every access.
> As soon as Intel define that 'IBRS_ALL_AND_THE_BIT_IS_A_NOOP' bit in
> MSR_IA32_ARCH_CAPABILITIES, *then* we can expose it to guests directly
> again just as we do at the moment.
Arjan, is there any update on this?
Thanks,
tglx