This is a mitigation for the 'variant 2' attack described in
https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
Using GCC patches available from the gcc-7_2_0-retpoline branch of
http://git.infradead.org/users/dwmw2/gcc-retpoline.git and by manually
patching assembler code, all vulnerable indirect branches (that occur
after userspace first runs) are eliminated from the kernel.
They are replaced with a 'retpoline' call sequence which deliberately
prevents speculation.
v1: Initial post.
v2: Add CONFIG_RETPOLINE to build kernel without it.
Change warning messages.
Hide modpost warning message
v3: Update to the latest CET-capable retpoline version
Reinstate ALTERNATIVE support
v4: Finish reconciling Andi's and my patch sets, bug fixes.
Exclude objtool support for now
Add 'noretpoline' boot option
Add AMD retpoline alternative
v5: Silence MODVERSIONS warnings
Use pause;jmp loop instead of lfence;jmp
Switch to X86_FEATURE_RETPOLINE positive feature logic
Emit thunks inline from assembler macros
Merge AMD support into initial patch
Andi Kleen (4):
x86/retpoline/irq32: Convert assembler indirect jumps
x86/retpoline: Add boot time option to disable retpoline
x86/retpoline: Exclude objtool with retpoline
retpoline/modpost: Quieten MODVERSION retpoline build
David Woodhouse (8):
x86/spectre: Add X86_BUG_SPECTRE_V[12]
x86/retpoline: Add initial retpoline support
x86/retpoline/crypto: Convert crypto assembler indirect jumps
x86/retpoline/entry: Convert entry assembler indirect jumps
x86/retpoline/ftrace: Convert ftrace assembler indirect jumps
x86/retpoline/hyperv: Convert assembler indirect jumps
x86/retpoline/xen: Convert Xen hypercall indirect jumps
x86/retpoline/checksum32: Convert assembler indirect jumps
Documentation/admin-guide/kernel-parameters.txt | 3 +
arch/x86/Kconfig | 17 ++++-
arch/x86/Kconfig.debug | 6 +-
arch/x86/Makefile | 10 +++
arch/x86/crypto/aesni-intel_asm.S | 5 +-
arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 +-
arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 +-
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 +-
arch/x86/entry/entry_32.S | 5 +-
arch/x86/entry/entry_64.S | 12 +++-
arch/x86/include/asm/cpufeatures.h | 4 ++
arch/x86/include/asm/mshyperv.h | 18 ++---
arch/x86/include/asm/nospec-branch.h | 91 +++++++++++++++++++++++++
arch/x86/include/asm/xen/hypercall.h | 5 +-
arch/x86/kernel/cpu/common.c | 8 +++
arch/x86/kernel/cpu/intel.c | 11 +++
arch/x86/kernel/ftrace_32.S | 6 +-
arch/x86/kernel/ftrace_64.S | 8 +--
arch/x86/kernel/irq_32.c | 9 +--
arch/x86/lib/Makefile | 1 +
arch/x86/lib/checksum_32.S | 7 +-
arch/x86/lib/retpoline.S | 30 ++++++++
scripts/mod/modpost.c | 6 +-
23 files changed, 231 insertions(+), 40 deletions(-)
create mode 100644 arch/x86/include/asm/nospec-branch.h
create mode 100644 arch/x86/lib/retpoline.S
--
2.7.4
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/kernel/cpu/common.c | 3 +++
2 files changed, 5 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 21ac898..1641c2f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -342,5 +342,7 @@
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2d3bd22..372ba3f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (c->x86_vendor != X86_VENDOR_AMD)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
--
2.7.4
Convert all indirect jumps in hyperv inline asm code to use non-speculative
sequences when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/mshyperv.h | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 581bb54..6534e57 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -7,6 +7,7 @@
#include <linux/nmi.h>
#include <asm/io.h>
#include <asm/hyperv.h>
+#include <asm/nospec-branch.h>
/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
@@ -186,10 +187,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return U64_MAX;
__asm__ __volatile__("mov %4, %%r8\n"
- "call *%5"
+ NOSPEC_CALL
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
- : "r" (output_address), "m" (hv_hypercall_pg)
+ : "r" (output_address),
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
#else
u32 input_address_hi = upper_32_bits(input_address);
@@ -200,13 +202,13 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
if (!hv_hypercall_pg)
return U64_MAX;
- __asm__ __volatile__("call *%7"
+ __asm__ __volatile__(NOSPEC_CALL
: "=A" (hv_status),
"+c" (input_address_lo), ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input_address_hi),
"D"(output_address_hi), "S"(output_address_lo),
- "m" (hv_hypercall_pg)
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory");
#endif /* !x86_64 */
return hv_status;
@@ -227,10 +229,10 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
#ifdef CONFIG_X86_64
{
- __asm__ __volatile__("call *%4"
+ __asm__ __volatile__(NOSPEC_CALL
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
- : "m" (hv_hypercall_pg)
+ : THUNK_TARGET(hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
}
#else
@@ -238,13 +240,13 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
u32 input1_hi = upper_32_bits(input1);
u32 input1_lo = lower_32_bits(input1);
- __asm__ __volatile__ ("call *%5"
+ __asm__ __volatile__ (NOSPEC_CALL
: "=A"(hv_status),
"+c"(input1_lo),
ASM_CALL_CONSTRAINT
: "A" (control),
"b" (input1_hi),
- "m" (hv_hypercall_pg)
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "edi", "esi");
}
#endif
--
2.7.4
From: Andi Kleen <[email protected]>
Add a noretpoline option boot to disable retpoline and patch out the
extra sequences. It cannot patch out the jumps to the thunk functions
from code generated by the compiler, but those thunks turn into a single
indirect branch now.
Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: David Woodhouse <[email protected]>
---
Documentation/admin-guide/kernel-parameters.txt | 3 +++
arch/x86/kernel/cpu/intel.c | 11 +++++++++++
2 files changed, 14 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 520fdec..f30f9b4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2596,6 +2596,9 @@
nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
+ noretpoline [X86] Disable the retpoline kernel indirect branch speculation
+ workarounds. System may allow data leaks with this option.
+
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b720dac..35e123e 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -31,6 +31,17 @@
#include <asm/apic.h>
#endif
+#ifdef RETPOLINE
+static int __init noretpoline_setup(char *__unused)
+{
+ pr_info("Retpoline runtime disabled\n");
+ setup_clear_cpu_cap(X86_FEATURE_RETPOLINE);
+ setup_clear_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+ return 1;
+}
+__setup("noretpoline", noretpoline_setup);
+#endif
+
/*
* Just in case our CPU detection goes bad, or you have a weird system,
* allow a way to override the automatic disabling of MPX.
--
2.7.4
Convert all indirect jumps in crypto assembler code to use non-speculative
sequences when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/crypto/aesni-intel_asm.S | 5 +++--
arch/x86/crypto/camellia-aesni-avx-asm_64.S | 3 ++-
arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 3 ++-
arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 3 ++-
4 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 16627fe..f128680 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,6 +32,7 @@
#include <linux/linkage.h>
#include <asm/inst.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
/*
* The following macros are used to move an (un)aligned 16 byte value to/from
@@ -2884,7 +2885,7 @@ ENTRY(aesni_xts_crypt8)
pxor INC, STATE4
movdqu IV, 0x30(OUTP)
- call *%r11
+ NOSPEC_CALL %r11
movdqu 0x00(OUTP), INC
pxor INC, STATE1
@@ -2929,7 +2930,7 @@ ENTRY(aesni_xts_crypt8)
_aesni_gf128mul_x_ble()
movups IV, (IVP)
- call *%r11
+ NOSPEC_CALL %r11
movdqu 0x40(OUTP), INC
pxor INC, STATE1
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index f7c495e..ba3f075 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,6 +17,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1227,7 +1228,7 @@ camellia_xts_crypt_16way:
vpxor 14 * 16(%rax), %xmm15, %xmm14;
vpxor 15 * 16(%rax), %xmm15, %xmm15;
- call *%r9;
+ NOSPEC_CALL %r9;
addq $(16 * 16), %rsp;
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index eee5b39..9b0a88a 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -1343,7 +1344,7 @@ camellia_xts_crypt_32way:
vpxor 14 * 32(%rax), %ymm15, %ymm14;
vpxor 15 * 32(%rax), %ymm15, %ymm15;
- call *%r9;
+ NOSPEC_CALL %r9;
addq $(16 * 32), %rsp;
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 7a7de27..05178b44 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -45,6 +45,7 @@
#include <asm/inst.h>
#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
@@ -172,7 +173,7 @@ continue_block:
movzxw (bufp, %rax, 2), len
lea crc_array(%rip), bufp
lea (bufp, len, 1), bufp
- jmp *bufp
+ NOSPEC_JMP bufp
################################################################
## 2a) PROCESS FULL BLOCKS:
--
2.7.4
From: Andi Kleen <[email protected]>
objtool's assembler nanny currently cannot deal with the code generated
by the retpoline compiler and throws hundreds of warnings, mostly
because it sees calls that don't have a symbolic target.
Exclude all the options that rely on objtool when RETPOLINE is active.
This mainly means that we use the frame pointer unwinder and livepatch
is not supported.
Eventually objtool can be fixed to handle this.
Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/Kconfig | 4 ++--
arch/x86/Kconfig.debug | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 77c58ae..651d25f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -171,8 +171,8 @@ config X86
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
- select HAVE_STACK_VALIDATION if X86_64
+ select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION && !RETPOLINE
+ select HAVE_STACK_VALIDATION if X86_64 && !RETPOLINE
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_USER_RETURN_NOTIFIER
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 6293a87..9f3928d 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -359,8 +359,8 @@ config PUNIT_ATOM_DEBUG
choice
prompt "Choose kernel unwinder"
- default UNWINDER_ORC if X86_64
- default UNWINDER_FRAME_POINTER if X86_32
+ default UNWINDER_ORC if X86_64 && !RETPOLINE
+ default UNWINDER_FRAME_POINTER if X86_32 || RETPOLINE
---help---
This determines which method will be used for unwinding kernel stack
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
@@ -368,7 +368,7 @@ choice
config UNWINDER_ORC
bool "ORC unwinder"
- depends on X86_64
+ depends on X86_64 && !RETPOLINE
select STACK_VALIDATION
---help---
This option enables the ORC (Oops Rewind Capability) unwinder for
--
2.7.4
Convert indirect jumps in core 32/64bit entry assembler code to use
non-speculative sequences when CONFIG_RETPOLINE is enabled.
Don't use NOSPEC_CALL in entry_SYSCALL_64_fastpath because the return
address after the 'call' instruction must be *precisely* at the
.Lentry_SYSCALL_64_after_fastpath label for stub_ptregs_64 to work,
and the use of alternatives will mess that up unless we play horrid
games to prepend with NOPs and make the variants the same length. It's
not worth it; in the case where we ALTERNATIVE out the retpoline, the
first instruction at __x86.indirect_thunk.rax is going to be a bare
jmp *%rax anyway.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/entry/entry_32.S | 5 +++--
arch/x86/entry/entry_64.S | 12 +++++++++---
2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index ace8f32..cf9ef33 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -44,6 +44,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
.section .entry.text, "ax"
@@ -290,7 +291,7 @@ ENTRY(ret_from_fork)
/* kernel thread */
1: movl %edi, %eax
- call *%ebx
+ NOSPEC_CALL %ebx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
@@ -919,7 +920,7 @@ common_exception:
movl %ecx, %es
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call *%edi
+ NOSPEC_CALL %edi
jmp ret_from_exception
END(common_exception)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ed31d00..2a2bb98 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -37,6 +37,7 @@
#include <asm/pgtable_types.h>
#include <asm/export.h>
#include <asm/frame.h>
+#include <asm/nospec-branch.h>
#include <linux/err.h>
#include "calling.h"
@@ -187,7 +188,7 @@ ENTRY(entry_SYSCALL_64_trampoline)
*/
pushq %rdi
movq $entry_SYSCALL_64_stage2, %rdi
- jmp *%rdi
+ NOSPEC_JMP %rdi
END(entry_SYSCALL_64_trampoline)
.popsection
@@ -266,7 +267,12 @@ entry_SYSCALL_64_fastpath:
* It might end up jumping to the slow path. If it jumps, RAX
* and all argument registers are clobbered.
*/
+#ifdef CONFIG_RETPOLINE
+ movq sys_call_table(, %rax, 8), %rax
+ call __x86.indirect_thunk.rax
+#else
call *sys_call_table(, %rax, 8)
+#endif
.Lentry_SYSCALL_64_after_fastpath_call:
movq %rax, RAX(%rsp)
@@ -438,7 +444,7 @@ ENTRY(stub_ptregs_64)
jmp entry_SYSCALL64_slow_path
1:
- jmp *%rax /* Called from C */
+ NOSPEC_JMP %rax /* Called from C */
END(stub_ptregs_64)
.macro ptregs_stub func
@@ -517,7 +523,7 @@ ENTRY(ret_from_fork)
1:
/* kernel thread */
movq %r12, %rdi
- call *%rbx
+ NOSPEC_CALL %rbx
/*
* A kernel thread is allowed to return here after successfully
* calling do_execve(). Exit to userspace to complete the execve()
--
2.7.4
From: Andi Kleen <[email protected]>
The internal retpoline thunks used by the compiler contain a dot.
They have to be exported, but modversions cannot handle them
it because they don't have a prototype due to the C incompatible
name (and it doesn't support asm("..."))
This leads to lots of warnings from modpost with a retpoline
build with MODVERSIONS enabled. The actual symbols load fine,
they just don't get versioned. That's not a problem here
because we don't expect them to change ever.
Quieten the respective warning messages in modpost for any
symbols containing a dot.
Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: David Woodhouse <[email protected]>
---
scripts/mod/modpost.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 98314b4..e564da2 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -693,7 +693,9 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
#endif
if (is_crc) {
const char *e = is_vmlinux(mod->name) ?"":".ko";
- warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n", symname + strlen(CRC_PFX), mod->name, e);
+ const char *name = symname + strlen(CRC_PFX);
+ if (!strchr(name, '.'))
+ warn("EXPORT symbol \"%s\" [%s%s] version generation failed, symbol will not be versioned.\n", name, mod->name, e);
}
mod->unres = alloc_symbol(symname,
ELF_ST_BIND(sym->st_info) == STB_WEAK,
@@ -2212,7 +2214,7 @@ static int add_versions(struct buffer *b, struct module *mod)
for (s = mod->unres; s; s = s->next) {
if (!s->module)
continue;
- if (!s->crc_valid) {
+ if (!s->crc_valid && !strchr(s->name, '.')) {
warn("\"%s\" [%s.ko] has no CRC!\n",
s->name, mod->name);
continue;
--
2.7.4
Convert all indirect jumps in 32bit checksum assembler code to use
non-speculative sequences when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/lib/checksum_32.S | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4d34bb5..98cf15d 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -29,7 +29,8 @@
#include <asm/errno.h>
#include <asm/asm.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
+
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
@@ -156,7 +157,7 @@ ENTRY(csum_partial)
negl %ebx
lea 45f(%ebx,%ebx,2), %ebx
testl %esi, %esi
- jmp *%ebx
+ NOSPEC_JMP %ebx
# Handle 2-byte-aligned regions
20: addw (%esi), %ax
@@ -439,7 +440,7 @@ ENTRY(csum_partial_copy_generic)
andl $-32,%edx
lea 3f(%ebx,%ebx), %ebx
testl %esi, %esi
- jmp *%ebx
+ NOSPEC_JMP %ebx
1: addl $64,%esi
addl $64,%edi
SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
--
2.7.4
Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide
the corresponding thunks. Provide assembler macros for invoking the thunks
in the same way that GCC does, from native and inline assembler.
This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In
some circumstances, IBRS microcode features may be used instead, and the
retpoline can be disabled.
On AMD CPUs the retpoline can be dramatically simplified to a simple
lfence; jmp *\reg. This is enabled by setting the X86_FEATURE_RETPOLINE_AMD
feature bit instead of (or as well as) X86_FEATURE_RETPOLINE.
[Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks]
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/Kconfig | 13 ++++++
arch/x86/Makefile | 10 ++++
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/nospec-branch.h | 91 ++++++++++++++++++++++++++++++++++++
arch/x86/kernel/cpu/common.c | 5 ++
arch/x86/lib/Makefile | 1 +
arch/x86/lib/retpoline.S | 30 ++++++++++++
7 files changed, 152 insertions(+)
create mode 100644 arch/x86/include/asm/nospec-branch.h
create mode 100644 arch/x86/lib/retpoline.S
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cd5199d..77c58ae 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -428,6 +428,19 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
config INTEL_RDT
bool "Intel Resource Director Technology support"
default n
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a20eacd..918e550 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -235,6 +235,16 @@ KBUILD_CFLAGS += -Wno-sign-compare
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ else
+ $(warning Retpoline not supported in compiler. System may be insecure.)
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1641c2f..6f10eda 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -203,6 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Intel Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 0000000..b0403c9
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+#ifdef __ASSEMBLY__
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call 1112f
+1111: pause
+ jmp 1111b
+1112: mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+.macro RETPOLINE_CALL reg:req
+ jmp 1113f
+1110: RETPOLINE_JMP \reg
+1113: call 1110b
+.endm
+
+/*
+ * NOSPEC_JMP and NOSPEC_CALL macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro NOSPEC_JMP reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro NOSPEC_CALL reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define NOSPEC_CALL ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86.indirect_thunk.%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+#elif defined(CONFIG_X86_64) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define NOSPEC_CALL ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ " jmp 1113f; " \
+ "1110: call 1112f; " \
+ "1111: pause; " \
+ " jmp 1111b; " \
+ "1112: movl %[thunk_target], (%esp); " \
+ " ret; " \
+ "1113: call 1110b;\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline */
+# define NOSPEC_CALL "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 372ba3f..40e6e54 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+#ifdef CONFIG_RETPOLINE
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+#endif
fpu__init_system(c);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 457f681..d435c89 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 0000000..ccb117a
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86.indirect_thunk.\reg)
+ CFI_STARTPROC
+ NOSPEC_JMP %\reg
+ CFI_ENDPROC
+ENDPROC(__x86.indirect_thunk.\reg)
+EXPORT_SYMBOL(__x86.indirect_thunk.\reg)
+.endm
+
+#ifdef CONFIG_64BIT
+.irp reg rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+ THUNK \reg
+.endr
+#else
+.irp reg eax ebx ecx edx esi edi ebp
+ THUNK \reg
+.endr
+#endif
--
2.7.4
Convert all indirect jumps in ftrace assembler code to use non-speculative
sequences when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/kernel/ftrace_32.S | 6 ++++--
arch/x86/kernel/ftrace_64.S | 8 ++++----
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index b6c6468..c3842c9 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -8,6 +8,7 @@
#include <asm/segment.h>
#include <asm/export.h>
#include <asm/ftrace.h>
+#include <asm/nospec-branch.h>
#ifdef CC_USING_FENTRY
# define function_hook __fentry__
@@ -197,7 +198,8 @@ ftrace_stub:
movl 0x4(%ebp), %edx
subl $MCOUNT_INSN_SIZE, %eax
- call *ftrace_trace_function
+ movl ftrace_trace_function, %ecx
+ NOSPEC_CALL %ecx
popl %edx
popl %ecx
@@ -241,5 +243,5 @@ return_to_handler:
movl %eax, %ecx
popl %edx
popl %eax
- jmp *%ecx
+ NOSPEC_JMP %ecx
#endif
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index c832291..0893068 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -7,7 +7,7 @@
#include <asm/ptrace.h>
#include <asm/ftrace.h>
#include <asm/export.h>
-
+#include <asm/nospec-branch.h>
.code64
.section .entry.text, "ax"
@@ -286,8 +286,8 @@ trace:
* ip and parent ip are used and the list function is called when
* function tracing is enabled.
*/
- call *ftrace_trace_function
-
+ movq ftrace_trace_function, %r8
+ NOSPEC_CALL %r8
restore_mcount_regs
jmp fgraph_trace
@@ -329,5 +329,5 @@ GLOBAL(return_to_handler)
movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $24, %rsp
- jmp *%rdi
+ NOSPEC_JMP %rdi
#endif
--
2.7.4
From: Andi Kleen <[email protected]>
Convert all indirect jumps in 32bit irq inline asm code to use
non speculative sequences.
Signed-off-by: Andi Kleen <[email protected]>
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/kernel/irq_32.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index a83b334..e1e58f7 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -20,6 +20,7 @@
#include <linux/mm.h>
#include <asm/apic.h>
+#include <asm/nospec-branch.h>
#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -55,11 +56,11 @@ DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
static void call_on_stack(void *func, void *stack)
{
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ NOSPEC_CALL
"movl %%ebx,%%esp \n"
: "=b" (stack)
: "0" (stack),
- "D"(func)
+ [thunk_target] "D"(func)
: "memory", "cc", "edx", "ecx", "eax");
}
@@ -95,11 +96,11 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
call_on_stack(print_stack_overflow, isp);
asm volatile("xchgl %%ebx,%%esp \n"
- "call *%%edi \n"
+ NOSPEC_CALL
"movl %%ebx,%%esp \n"
: "=a" (arg1), "=b" (isp)
: "0" (desc), "1" (isp),
- "D" (desc->handle_irq)
+ [thunk_target] "D" (desc->handle_irq)
: "memory", "cc", "ecx");
return 1;
}
--
2.7.4
Convert indirect call in Xen hypercall to use non-speculative sequence,
when CONFIG_RETPOLINE is enabled.
Signed-off-by: David Woodhouse <[email protected]>
---
arch/x86/include/asm/xen/hypercall.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 7cb282e..393c004 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -217,9 +218,9 @@ privcmd_call(unsigned call,
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
stac();
- asm volatile("call *%[call]"
+ asm volatile(NOSPEC_CALL
: __HYPERCALL_5PARAM
- : [call] "a" (&hypercall_page[call])
+ : [thunk_target] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
clac();
--
2.7.4
On Sat, 2018-01-06 at 11:49 +0000, David Woodhouse wrote:
>
> +#if defined(CONFIG_X86_64) && defined(RETPOLINE)
> +/*
> + * Since the inline asm uses the %V modifier which is only in newer GCC,
> + * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
> + */
> +# define NOSPEC_CALL ALTERNATIVE( \
> + "call *%[thunk_target]\n", \
> + "call __x86.indirect_thunk.%V[thunk_target]\n", \
> + X86_FEATURE_RETPOLINE)
> +# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
> +#elif defined(CONFIG_X86_64) && defined(CONFIG_RETPOLINE)
^^^^^^^^^^^^^
Arse. That'll invalidate my 32-bit test runs somewhat. I knew I should
have also repeated the "deliberately break the ASM and check it
crashes" tests. V6 coming in a little while, with 32-bit fixed
properly...
> +/*
> + * For i386 we use the original ret-equivalent retpoline, because
> + * otherwise we'll run out of registers. We don't care about CET
> + * here, anyway.
> + */
On Sat, Jan 6, 2018 at 3:49 AM, David Woodhouse <[email protected]> wrote:
>
> - call *ftrace_trace_function
> + movl ftrace_trace_function, %ecx
> + NOSPEC_CALL %ecx
Can't we just do
NOSPEC_CALL ftrace_trace_function
now?
[ Goes off and looks ]
Oh. The AMD lfence version wants a register. Oh well.
Linus
>From b330ffe76cbe0574b4ae729b8399e2afbf4bc6eb Mon Sep 17 00:00:00 2001
From: David Woodhouse <[email protected]>
Date: Thu, 4 Jan 2018 13:58:29 +0000
Subject: [PATCH 02/12] x86/retpoline: Add initial retpoline support
Enable the use of -mindirect-branch=thunk-extern in newer GCC, and provide
the corresponding thunks. Provide assembler macros for invoking the thunks
in the same way that GCC does, from native and inline assembler.
This adds X86_FEATURE_RETPOLINE and sets it by default on all CPUs. In
some circumstances, IBRS microcode features may be used instead, and the
retpoline can be disabled.
On AMD CPUs the retpoline can be dramatically simplified to a simple
lfence; jmp *\reg. This is enabled by setting the X86_FEATURE_RETPOLINE_AMD
feature bit instead of (or as well as) X86_FEATURE_RETPOLINE.
[Andi Kleen: Rename the macros, add CONFIG_RETPOLINE option, export thunks]
Signed-off-by: David Woodhouse <[email protected]>
---
Won't send the full set again just yet; this is the 32-bit fix.
Also at http://git.infradead.org/users/dwmw2/linux-retpoline.git/
arch/x86/Kconfig | 13 +++++
arch/x86/Makefile | 10 ++++
arch/x86/include/asm/cpufeatures.h | 2 +
arch/x86/include/asm/nospec-branch.h | 92 ++++++++++++++++++++++++++++++++++++
arch/x86/kernel/cpu/common.c | 5 ++
arch/x86/lib/Makefile | 1 +
arch/x86/lib/retpoline.S | 30 ++++++++++++
7 files changed, 153 insertions(+)
create mode 100644 arch/x86/include/asm/nospec-branch.h
create mode 100644 arch/x86/lib/retpoline.S
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cd5199d..77c58ae 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -428,6 +428,19 @@ config GOLDFISH
def_bool y
depends on X86_GOLDFISH
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+ Without compiler support, at least indirect branches in assembler
+ code are eliminated. Since this includes the syscall entry path,
+ it is not entirely pointless.
+
config INTEL_RDT
bool "Intel Resource Director Technology support"
default n
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a20eacd..918e550 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -235,6 +235,16 @@ KBUILD_CFLAGS += -Wno-sign-compare
#
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+# Avoid indirect branches in kernel to deal with Spectre
+ifdef CONFIG_RETPOLINE
+ RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register)
+ ifneq ($(RETPOLINE_CFLAGS),)
+ KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ else
+ $(warning Retpoline not supported in compiler. System may be insecure.)
+ endif
+endif
+
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 1641c2f..6f10eda 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -203,6 +203,8 @@
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* Intel Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
new file mode 100644
index 0000000..c4d08fa
--- /dev/null
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __NOSPEC_BRANCH_H__
+#define __NOSPEC_BRANCH_H__
+
+#include <asm/alternative.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeatures.h>
+
+#ifdef __ASSEMBLY__
+
+/*
+ * These are the bare retpoline primitives for indirect jmp and call.
+ * Do not use these directly; they only exist to make the ALTERNATIVE
+ * invocation below less ugly.
+ */
+.macro RETPOLINE_JMP reg:req
+ call 1112f
+1111: pause
+ jmp 1111b
+1112: mov \reg, (%_ASM_SP)
+ ret
+.endm
+
+.macro RETPOLINE_CALL reg:req
+ jmp 1113f
+1110: RETPOLINE_JMP \reg
+1113: call 1110b
+.endm
+
+/*
+ * NOSPEC_JMP and NOSPEC_CALL macros can be used instead of a simple
+ * indirect jmp/call which may be susceptible to the Spectre variant 2
+ * attack.
+ */
+.macro NOSPEC_JMP reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(jmp *\reg), \
+ __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
+ __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ jmp *\reg
+#endif
+.endm
+
+.macro NOSPEC_CALL reg:req
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE_2 __stringify(call *\reg), \
+ __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
+ __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
+#else
+ call *\reg
+#endif
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+/*
+ * Since the inline asm uses the %V modifier which is only in newer GCC,
+ * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ */
+# define NOSPEC_CALL ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ "call __x86.indirect_thunk.%V[thunk_target]\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
+#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+/*
+ * For i386 we use the original ret-equivalent retpoline, because
+ * otherwise we'll run out of registers. We don't care about CET
+ * here, anyway.
+ */
+# define NOSPEC_CALL ALTERNATIVE( \
+ "call *%[thunk_target]\n", \
+ " jmp 1113f; " \
+ "1110: call 1112f; " \
+ "1111: pause; " \
+ " jmp 1111b; " \
+ "1112: addl $4, %%esp; " \
+ " pushl %[thunk_target]; " \
+ " ret; " \
+ "1113: call 1110b;\n", \
+ X86_FEATURE_RETPOLINE)
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#else /* No retpoline */
+# define NOSPEC_CALL "call *%[thunk_target]\n"
+# define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 372ba3f..40e6e54 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+#ifdef CONFIG_RETPOLINE
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+#endif
fpu__init_system(c);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 457f681..d435c89 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -26,6 +26,7 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+lib-$(CONFIG_RETPOLINE) += retpoline.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
new file mode 100644
index 0000000..ccb117a
--- /dev/null
+++ b/arch/x86/lib/retpoline.S
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/stringify.h>
+#include <linux/linkage.h>
+#include <asm/dwarf2.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+#include <asm/nospec-branch.h>
+
+.macro THUNK reg
+ .section .text.__x86.indirect_thunk.\reg
+
+ENTRY(__x86.indirect_thunk.\reg)
+ CFI_STARTPROC
+ NOSPEC_JMP %\reg
+ CFI_ENDPROC
+ENDPROC(__x86.indirect_thunk.\reg)
+EXPORT_SYMBOL(__x86.indirect_thunk.\reg)
+.endm
+
+#ifdef CONFIG_64BIT
+.irp reg rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+ THUNK \reg
+.endr
+#else
+.irp reg eax ebx ecx edx esi edi ebp
+ THUNK \reg
+.endr
+#endif
--
2.7.4
--
dwmw2
On Sat, Jan 06, 2018 at 11:49:24AM +0000, David Woodhouse wrote:
> +/*
> + * NOSPEC_JMP and NOSPEC_CALL macros can be used instead of a simple
> + * indirect jmp/call which may be susceptible to the Spectre variant 2
> + * attack.
> + */
Can be, or must be?
> +.macro NOSPEC_JMP reg:req
> +#ifdef CONFIG_RETPOLINE
> + ALTERNATIVE_2 __stringify(jmp *\reg), \
> + __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \
> + __stringify(lfence; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
> +#else
> + jmp *\reg
> +#endif
> +.endm
> +
> +.macro NOSPEC_CALL reg:req
> +#ifdef CONFIG_RETPOLINE
> + ALTERNATIVE_2 __stringify(call *\reg), \
> + __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
> + __stringify(lfence; call *\reg), X86_FEATURE_RETPOLINE_AMD
> +#else
> + call *\reg
> +#endif
> +.endm
Would it make any sense to name these INDIRECT_JMP and INDIRECT_CALL instead?
NOSPEC_ seems to describe how it needs to be implemented on some CPUs, as
opposed to what the user wants to do (make an indirect jump or call).
Eric
On Sat, 2018-01-06 at 10:35 -0800, Eric Biggers wrote:
> On Sat, Jan 06, 2018 at 11:49:24AM +0000, David Woodhouse wrote:
> >
> > +/*
> > + * NOSPEC_JMP and NOSPEC_CALL macros can be used instead of a simple
> > + * indirect jmp/call which may be susceptible to the Spectre variant 2
> > + * attack.
> > + */
>
> Can be, or must be?
Can be is fine. It isn't necessarily the case that all indirect
branches MUST be changed. Although we *have* been auditing the kernel
binary and looking for them, some of them can stay as they are.
> Would it make any sense to name these INDIRECT_JMP and INDIRECT_CALL instead?
> NOSPEC_ seems to describe how it needs to be implemented on some CPUs, as
> opposed to what the user wants to do (make an indirect jump or call).
While NOSPEC_CALL explains why you're using the macro instead of just
'call'. I think this is fine. I'd rather not do too much bikeshedding
over the names.
On Sat, 6 Jan 2018, Linus Torvalds wrote:
> On Sat, Jan 6, 2018 at 3:49 AM, David Woodhouse <[email protected]> wrote:
> >
> > - call *ftrace_trace_function
> > + movl ftrace_trace_function, %ecx
> > + NOSPEC_CALL %ecx
>
> Can't we just do
>
> NOSPEC_CALL ftrace_trace_function
>
> now?
>
> [ Goes off and looks ]
>
> Oh. The AMD lfence version wants a register. Oh well.
The register load could be put into the macro itself, though we need to
supply a scratch register
NOSPEC_CALL ftrace_trace_function scratch_reg=%ecx
Whether thats much better, I don't know.
Thanks,
tglx
Commit-ID: 99c6fa2511d8a683e61468be91b83f85452115fa
Gitweb: https://git.kernel.org/tip/99c6fa2511d8a683e61468be91b83f85452115fa
Author: David Woodhouse <[email protected]>
AuthorDate: Sat, 6 Jan 2018 11:49:23 +0000
Committer: Thomas Gleixner <[email protected]>
CommitDate: Sat, 6 Jan 2018 21:57:19 +0100
x86/cpufeatures: Add X86_BUG_SPECTRE_V[12]
Add the bug bits for spectre v1/2 and force them unconditionally for all
cpus.
Signed-off-by: David Woodhouse <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: Rik van Riel <[email protected]>
Cc: Andi Kleen <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Jiri Kosina <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Tim Chen <[email protected]>
Cc: Greg Kroah-Hartman <[email protected]>
Cc: Paul Turner <[email protected]>
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/include/asm/cpufeatures.h | 2 ++
arch/x86/kernel/cpu/common.c | 3 +++
2 files changed, 5 insertions(+)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 21ac898..1641c2f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -342,5 +342,7 @@
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2d3bd22..372ba3f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -902,6 +902,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
if (c->x86_vendor != X86_VENDOR_AMD)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
fpu__init_system(c);
#ifdef CONFIG_X86_32
On 06/01/18 11:49, David Woodhouse wrote:
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 372ba3f..40e6e54 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
>
> setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
> setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
> +#ifdef CONFIG_RETPOLINE
> + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
> + if (c->x86_vendor == X86_VENDOR_AMD)
> + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
This isn't safe. It needs to be dependant on finding that LFENCEs are
actually dispatch serialising.
In particular, when virtualised, you'll most likely be saddled with the
hypervisors choice of setting, in which case you need to use retpoline
as a fallback.
~Andrew
> +#endif
>
> fpu__init_system(c);
>
>
On Sat, 2018-01-06 at 21:16 +0000, Andrew Cooper wrote:
> On 06/01/18 11:49, David Woodhouse wrote:
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index 372ba3f..40e6e54 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
> >
> > setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
> > setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
> > +#ifdef CONFIG_RETPOLINE
> > + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
> > + if (c->x86_vendor == X86_VENDOR_AMD)
> > + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
>
> This isn't safe. It needs to be dependant on finding that LFENCEs are
> actually dispatch serialising.
>
> In particular, when virtualised, you'll most likely be saddled with the
> hypervisors choice of setting, in which case you need to use retpoline
> as a fallback.
Thanks. I was about to rebase on top of tip/x86/pti which has Tom's
patches to make lfence serialising — which seem to say that if the MSR
isn't available, it *will* be serialising.
I think I'll just refrain from setting X86_FEATURE_RETPOLINE_AMD for
now, and let Tom turn that on in his own time.
On Sat, 6 Jan 2018, Andrew Cooper wrote:
> On 06/01/18 11:49, David Woodhouse wrote:
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index 372ba3f..40e6e54 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
> >
> > setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
> > setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
> > +#ifdef CONFIG_RETPOLINE
> > + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
> > + if (c->x86_vendor == X86_VENDOR_AMD)
> > + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
>
> This isn't safe. It needs to be dependant on finding that LFENCEs are
> actually dispatch serialising.
>
> In particular, when virtualised, you'll most likely be saddled with the
> hypervisors choice of setting, in which case you need to use retpoline
> as a fallback.
On bare metal we are sure, the virtualization part is a different question.
Thanks,
tglx
On 06/01/18 21:23, Thomas Gleixner wrote:
> On Sat, 6 Jan 2018, Andrew Cooper wrote:
>> On 06/01/18 11:49, David Woodhouse wrote:
>>> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
>>> index 372ba3f..40e6e54 100644
>>> --- a/arch/x86/kernel/cpu/common.c
>>> +++ b/arch/x86/kernel/cpu/common.c
>>> @@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
>>>
>>> setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
>>> setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
>>> +#ifdef CONFIG_RETPOLINE
>>> + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
>>> + if (c->x86_vendor == X86_VENDOR_AMD)
>>> + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
>> This isn't safe. It needs to be dependant on finding that LFENCEs are
>> actually dispatch serialising.
>>
>> In particular, when virtualised, you'll most likely be saddled with the
>> hypervisors choice of setting, in which case you need to use retpoline
>> as a fallback.
> On bare metal we are sure, the virtualization part is a different question.
Leaving virtualisation to one side, how does this cope with pre-SSE2
hardware?
~Andrew
On Sat, 2018-01-06 at 21:34 +0000, Andrew Cooper wrote:
> On 06/01/18 21:23, Thomas Gleixner wrote:
> >
> > On Sat, 6 Jan 2018, Andrew Cooper wrote:
> > >
> > > On 06/01/18 11:49, David Woodhouse wrote:
> > > >
> > > > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > > > index 372ba3f..40e6e54 100644
> > > > --- a/arch/x86/kernel/cpu/common.c
> > > > +++ b/arch/x86/kernel/cpu/common.c
> > > > @@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
> > > >
> > > > setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
> > > > setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
> > > > +#ifdef CONFIG_RETPOLINE
> > > > + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
> > > > + if (c->x86_vendor == X86_VENDOR_AMD)
> > > > + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
> > > This isn't safe. It needs to be dependant on finding that LFENCEs are
> > > actually dispatch serialising.
> > >
> > > In particular, when virtualised, you'll most likely be saddled with the
> > > hypervisors choice of setting, in which case you need to use retpoline
> > > as a fallback.
> > On bare metal we are sure, the virtualization part is a different question.
> Leaving virtualisation to one side, how does this cope with pre-SSE2
> hardware?
Either way, I've rebased my retpoline tree on top of tip/x86/pti with
Tom's patches, but I *haven't* enabled X86_FEATURE_RETPOLINE_AMD. AMD
can use the standard retpoline implementation until this question is
resolved.
Arjan pointed out that CONFIG_TRIM_UNUSED_SYMBOLS *really* doesn't like
the dot in the symbols that GCC uses for the thunks.
This seems to work, although my eyes are bleeding just a little bit.
Given this, and the hack we already needed for MODVERSIONS, I wonder if
a better approach might be to export the thunks using underscores in
place of the dots, which is a relatively simple abuse of
__EXPORT_SYMBOL(__x86_indirect_thunk_foo,__x86.indirect_thunk.foo,),
and then have a hack either when generating or loading modules to do
the same replacement.
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index ccb117a4588b..64d7a45ea954 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -8,7 +8,13 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
-.macro THUNK reg
+#ifdef CONFIG_TRIM_UNUSED_KSYMS
+#define EXPORT_REG(reg) __is_defined(__KSYM___x86_indirect_thunk_ ## reg)
+#else
+#define EXPORT_REG(reg) 1
+#endif
+
+.macro THUNK reg export
.section .text.__x86.indirect_thunk.\reg
ENTRY(__x86.indirect_thunk.\reg)
@@ -16,15 +22,33 @@ ENTRY(__x86.indirect_thunk.\reg)
NOSPEC_JMP %\reg
CFI_ENDPROC
ENDPROC(__x86.indirect_thunk.\reg)
-EXPORT_SYMBOL(__x86.indirect_thunk.\reg)
+
+.if \export
+ EXPORT_SYMBOL_FORCE(__x86.indirect_thunk.\reg)
+.endif
.endm
-#ifdef CONFIG_64BIT
-.irp reg rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
- THUNK \reg
-.endr
+#ifdef __KSYM_DEPS__
+#define GENERATE_THUNK(reg) EXPORT_SYMBOL(__x86.indirect_thunk. ## reg)
#else
-.irp reg eax ebx ecx edx esi edi ebp
- THUNK \reg
-.endr
+#define GENERATE_THUNK(reg) THUNK reg EXPORT_REG(reg)
+#endif
+
+GENERATE_THUNK(_ASM_AX)
+GENERATE_THUNK(_ASM_BX)
+GENERATE_THUNK(_ASM_CX)
+GENERATE_THUNK(_ASM_DX)
+GENERATE_THUNK(_ASM_SI)
+GENERATE_THUNK(_ASM_DI)
+GENERATE_THUNK(_ASM_BP)
+GENERATE_THUNK(_ASM_SP)
+#ifdef CONFIG_64BIT
+GENERATE_THUNK(r8)
+GENERATE_THUNK(r9)
+GENERATE_THUNK(r10)
+GENERATE_THUNK(r11)
+GENERATE_THUNK(r12)
+GENERATE_THUNK(r13)
+GENERATE_THUNK(r14)
+GENERATE_THUNK(r15)
#endif
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 719db1968d81..b13bb65e2530 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -63,33 +63,33 @@ KSYM(__kcrctab_\name):
#if defined(__KSYM_DEPS__)
-#define __EXPORT_SYMBOL(sym, val, sec) === __KSYM_##sym ===
+#define __EXPORT_SYMBOL(sym, val, sec, force) === __KSYM_##sym ===
#elif defined(CONFIG_TRIM_UNUSED_KSYMS)
#include <linux/kconfig.h>
#include <generated/autoksyms.h>
-#define __EXPORT_SYMBOL(sym, val, sec) \
- __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym))
+#define __EXPORT_SYMBOL(sym, val, sec, force) \
+ __cond_export_sym(sym, val, sec, __or(force, __is_defined(__KSYM_##sym)))
#define __cond_export_sym(sym, val, sec, conf) \
___cond_export_sym(sym, val, sec, conf)
#define ___cond_export_sym(sym, val, sec, enabled) \
__cond_export_sym_##enabled(sym, val, sec)
#define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
#define __cond_export_sym_0(sym, val, sec) /* nothing */
-
#else
-#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
+#define __EXPORT_SYMBOL(sym, val, sec, force) ___EXPORT_SYMBOL sym, val, sec
#endif
#define EXPORT_SYMBOL(name) \
- __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),)
+ __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), , 0)
#define EXPORT_SYMBOL_GPL(name) \
- __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl)
+ __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl, 0)
#define EXPORT_DATA_SYMBOL(name) \
- __EXPORT_SYMBOL(name, KSYM(name),)
+ __EXPORT_SYMBOL(name, KSYM(name), , 0)
#define EXPORT_DATA_SYMBOL_GPL(name) \
- __EXPORT_SYMBOL(name, KSYM(name),_gpl)
-
+ __EXPORT_SYMBOL(name, KSYM(name), _gpl, 0)
+#define EXPORT_SYMBOL_FORCE(name) \
+ __EXPORT_SYMBOL(name, KSYM(name), , 1)
#endif
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
index 513da1a4a2da..991cd136291b 100755
--- a/scripts/adjust_autoksyms.sh
+++ b/scripts/adjust_autoksyms.sh
@@ -60,7 +60,7 @@ cat > "$new_ksyms_file" << EOT
EOT
[ "$(ls -A "$MODVERDIR")" ] &&
-sed -ns -e '3{s/ /\n/g;/^$/!p;}' "$MODVERDIR"/*.mod | sort -u |
+sed -ns -e '3{s/ /\n/g;/^$/!p;}' "$MODVERDIR"/*.mod | sort -u | tr . _ |
while read sym; do
if [ -n "$CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX" ]; then
sym="${sym#_}"
On Sat, Jan 6, 2018 at 11:53 AM, Thomas Gleixner <[email protected]> wrote:
> On Sat, 6 Jan 2018, Linus Torvalds wrote:
>
>>
>> [ Goes off and looks ]
>>
>> Oh. The AMD lfence version wants a register. Oh well.
>
> The register load could be put into the macro itself, though we need to
> supply a scratch register
Yeah, not worth it I suspect. I guess we can live with the register version.
Linus
On 1/6/2018 3:21 PM, Woodhouse, David wrote:
> On Sat, 2018-01-06 at 21:16 +0000, Andrew Cooper wrote:
>> On 06/01/18 11:49, David Woodhouse wrote:
>>> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
>>> index 372ba3f..40e6e54 100644
>>> --- a/arch/x86/kernel/cpu/common.c
>>> +++ b/arch/x86/kernel/cpu/common.c
>>> @@ -904,6 +904,11 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
>>>
>>> setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
>>> setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
>>> +#ifdef CONFIG_RETPOLINE
>>> + setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
>>> + if (c->x86_vendor == X86_VENDOR_AMD)
>>> + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
>>
>> This isn't safe. It needs to be dependant on finding that LFENCEs are
>> actually dispatch serialising.
>>
>> In particular, when virtualised, you'll most likely be saddled with the
>> hypervisors choice of setting, in which case you need to use retpoline
>> as a fallback.
>
> Thanks. I was about to rebase on top of tip/x86/pti which has Tom's
> patches to make lfence serialising — which seem to say that if the MSR
> isn't available, it *will* be serialising.
>
> I think I'll just refrain from setting X86_FEATURE_RETPOLINE_AMD for
> now, and let Tom turn that on in his own time.
I can do that. I'll move it to arch/x86/kernel/cpu/amd.c to just after
the line that sets the MSR bit making lfence serializing. I'll submit
that once your patches are pulled in (or at least the feature bits).
Thanks,
Tom
>
On Sun, 2018-01-07 at 00:10 +0000, David Woodhouse wrote:
> Arjan pointed out that CONFIG_TRIM_UNUSED_SYMBOLS *really* doesn't like
> the dot in the symbols that GCC uses for the thunks.
>
> This seems to work, although my eyes are bleeding just a little bit.
>
> Given this, and the hack we already needed for MODVERSIONS, I wonder if
> a better approach might be to export the thunks using underscores in
> place of the dots, which is a relatively simple abuse of
> __EXPORT_SYMBOL(__x86_indirect_thunk_foo,__x86.indirect_thunk.foo,),
> and then have a hack either when generating or loading modules to do
> the same replacement.
Alternatively, and much simpler... HJ (or Igor), please can we change
the GCC patches so that the __x86.indirect_thunk.rxx symbols don't have
those awful dots in them? They are causing *lots* of pain.
Or we could build *modules* with the inline thunk and lose the ability
to ALTERNATIVE it away, I suppose.
> diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
> index ccb117a4588b..64d7a45ea954 100644
> --- a/arch/x86/lib/retpoline.S
> +++ b/arch/x86/lib/retpoline.S
> @@ -8,7 +8,13 @@
> #include
> #include
>
> -.macro THUNK reg
> +#ifdef CONFIG_TRIM_UNUSED_KSYMS
> +#define EXPORT_REG(reg) __is_defined(__KSYM___x86_indirect_thunk_ ## reg)
> +#else
> +#define EXPORT_REG(reg) 1
> +#endif
> +
> +.macro THUNK reg export
> .section .text.__x86.indirect_thunk.\reg
>
> ENTRY(__x86.indirect_thunk.\reg)
> @@ -16,15 +22,33 @@ ENTRY(__x86.indirect_thunk.\reg)
> NOSPEC_JMP %\reg
> CFI_ENDPROC
> ENDPROC(__x86.indirect_thunk.\reg)
> -EXPORT_SYMBOL(__x86.indirect_thunk.\reg)
> +
> +.if \export
> + EXPORT_SYMBOL_FORCE(__x86.indirect_thunk.\reg)
> +.endif
> .endm
>
> -#ifdef CONFIG_64BIT
> -.irp reg rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
> - THUNK \reg
> -.endr
> +#ifdef __KSYM_DEPS__
> +#define GENERATE_THUNK(reg) EXPORT_SYMBOL(__x86.indirect_thunk. ## reg)
> #else
> -.irp reg eax ebx ecx edx esi edi ebp
> - THUNK \reg
> -.endr
> +#define GENERATE_THUNK(reg) THUNK reg EXPORT_REG(reg)
> +#endif
> +
> +GENERATE_THUNK(_ASM_AX)
> +GENERATE_THUNK(_ASM_BX)
> +GENERATE_THUNK(_ASM_CX)
> +GENERATE_THUNK(_ASM_DX)
> +GENERATE_THUNK(_ASM_SI)
> +GENERATE_THUNK(_ASM_DI)
> +GENERATE_THUNK(_ASM_BP)
> +GENERATE_THUNK(_ASM_SP)
> +#ifdef CONFIG_64BIT
> +GENERATE_THUNK(r8)
> +GENERATE_THUNK(r9)
> +GENERATE_THUNK(r10)
> +GENERATE_THUNK(r11)
> +GENERATE_THUNK(r12)
> +GENERATE_THUNK(r13)
> +GENERATE_THUNK(r14)
> +GENERATE_THUNK(r15)
> #endif
> diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
> index 719db1968d81..b13bb65e2530 100644
> --- a/include/asm-generic/export.h
> +++ b/include/asm-generic/export.h
> @@ -63,33 +63,33 @@ KSYM(__kcrctab_\name):
>
> #if defined(__KSYM_DEPS__)
>
> -#define __EXPORT_SYMBOL(sym, val, sec) === __KSYM_##sym ===
> +#define __EXPORT_SYMBOL(sym, val, sec, force) === __KSYM_##sym ===
>
> #elif defined(CONFIG_TRIM_UNUSED_KSYMS)
>
> #include
> #include
>
> -#define __EXPORT_SYMBOL(sym, val, sec) \
> - __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym))
> +#define __EXPORT_SYMBOL(sym, val, sec, force) \
> + __cond_export_sym(sym, val, sec, __or(force, __is_defined(__KSYM_##sym)))
> #define __cond_export_sym(sym, val, sec, conf) \
> ___cond_export_sym(sym, val, sec, conf)
> #define ___cond_export_sym(sym, val, sec, enabled) \
> __cond_export_sym_##enabled(sym, val, sec)
> #define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
> #define __cond_export_sym_0(sym, val, sec) /* nothing */
> -
> #else
> -#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
> +#define __EXPORT_SYMBOL(sym, val, sec, force) ___EXPORT_SYMBOL sym, val, sec
> #endif
>
> #define EXPORT_SYMBOL(name) \
> - __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),)
> + __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), , 0)
> #define EXPORT_SYMBOL_GPL(name) \
> - __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl)
> + __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl, 0)
> #define EXPORT_DATA_SYMBOL(name) \
> - __EXPORT_SYMBOL(name, KSYM(name),)
> + __EXPORT_SYMBOL(name, KSYM(name), , 0)
> #define EXPORT_DATA_SYMBOL_GPL(name) \
> - __EXPORT_SYMBOL(name, KSYM(name),_gpl)
> -
> + __EXPORT_SYMBOL(name, KSYM(name), _gpl, 0)
> +#define EXPORT_SYMBOL_FORCE(name) \
> + __EXPORT_SYMBOL(name, KSYM(name), , 1)
> #endif
> diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
> index 513da1a4a2da..991cd136291b 100755
> --- a/scripts/adjust_autoksyms.sh
> +++ b/scripts/adjust_autoksyms.sh
> @@ -60,7 +60,7 @@ cat > "$new_ksyms_file" << EOT
>
> EOT
> [ "$(ls -A "$MODVERDIR")" ] &&
> -sed -ns -e '3{s/ /\n/g;/^$/!p;}' "$MODVERDIR"/*.mod | sort -u |
> +sed -ns -e '3{s/ /\n/g;/^$/!p;}' "$MODVERDIR"/*.mod | sort -u | tr . _ |
> while read sym; do
> if [ -n "$CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX" ]; then
> sym="${sym#_}"
Sure, I can use __x86_indirect_thunk_rax.
H.J.
> -----Original Message-----
> From: David Woodhouse [mailto:[email protected]]
> Sent: Sunday, January 07, 2018 12:03 AM
> To: Andi Kleen <[email protected]>; Van De Ven, Arjan
> <[email protected]>; Lu, Hongjiu <[email protected]>;
> Tsimbalist, Igor V <[email protected]>
> Cc: Paul Turner <[email protected]>; LKML <[email protected]>;
> Linus Torvalds <[email protected]>; Greg Kroah-Hartman
> <[email protected]>; Tim Chen <[email protected]>;
> Hansen, Dave <[email protected]>; [email protected]; Kees Cook
> <[email protected]>; Rik van Riel <[email protected]>; Peter Zijlstra
> <[email protected]>; Andy Lutomirski <[email protected]>; Jiri
> Kosina <[email protected]>; [email protected]
> Subject: Re: [RFC PATCH 13/12] Retpoline vs.
> CONFIG_TRIM_UNUSED_SYMBOLS
>
> On Sun, 2018-01-07 at 00:10 +0000, David Woodhouse wrote:
> > Arjan pointed out that CONFIG_TRIM_UNUSED_SYMBOLS *really*
> doesn't like
> > the dot in the symbols that GCC uses for the thunks.
> >
> > This seems to work, although my eyes are bleeding just a little bit.
> >
> > Given this, and the hack we already needed for MODVERSIONS, I wonder if
> > a better approach might be to export the thunks using underscores in
> > place of the dots, which is a relatively simple abuse of
> >
> __EXPORT_SYMBOL(__x86_indirect_thunk_foo,__x86.indirect_thunk.foo,),
> > and then have a hack either when generating or loading modules to do
> > the same replacement.
>
> Alternatively, and much simpler... HJ (or Igor), please can we change
> the GCC patches so that the __x86.indirect_thunk.rxx symbols don't have
> those awful dots in them? They are causing *lots* of pain.
>
> Or we could build *modules* with the inline thunk and lose the ability
> to ALTERNATIVE it away, I suppose.
>
> > diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
> > index ccb117a4588b..64d7a45ea954 100644
> > --- a/arch/x86/lib/retpoline.S
> > +++ b/arch/x86/lib/retpoline.S
> > @@ -8,7 +8,13 @@
> > #include
> > #include
> >
> > -.macro THUNK reg
> > +#ifdef CONFIG_TRIM_UNUSED_KSYMS
> > +#define EXPORT_REG(reg)
> __is_defined(__KSYM___x86_indirect_thunk_ ## reg)
> > +#else
> > +#define EXPORT_REG(reg) 1
> > +#endif
> > +
> > +.macro THUNK reg export
> > .section .text.__x86.indirect_thunk.\reg
> >
> > ENTRY(__x86.indirect_thunk.\reg)
> > @@ -16,15 +22,33 @@ ENTRY(__x86.indirect_thunk.\reg)
> > NOSPEC_JMP %\reg
> > CFI_ENDPROC
> > ENDPROC(__x86.indirect_thunk.\reg)
> > -EXPORT_SYMBOL(__x86.indirect_thunk.\reg)
> > +
> > +.if \export
> > + EXPORT_SYMBOL_FORCE(__x86.indirect_thunk.\reg)
> > +.endif
> > .endm
> >
> > -#ifdef CONFIG_64BIT
> > -.irp reg rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
> > - THUNK \reg
> > -.endr
> > +#ifdef __KSYM_DEPS__
> > +#define GENERATE_THUNK(reg) EXPORT_SYMBOL(__x86.indirect_thunk.
> ## reg)
> > #else
> > -.irp reg eax ebx ecx edx esi edi ebp
> > - THUNK \reg
> > -.endr
> > +#define GENERATE_THUNK(reg) THUNK reg EXPORT_REG(reg)
> > +#endif
> > +
> > +GENERATE_THUNK(_ASM_AX)
> > +GENERATE_THUNK(_ASM_BX)
> > +GENERATE_THUNK(_ASM_CX)
> > +GENERATE_THUNK(_ASM_DX)
> > +GENERATE_THUNK(_ASM_SI)
> > +GENERATE_THUNK(_ASM_DI)
> > +GENERATE_THUNK(_ASM_BP)
> > +GENERATE_THUNK(_ASM_SP)
> > +#ifdef CONFIG_64BIT
> > +GENERATE_THUNK(r8)
> > +GENERATE_THUNK(r9)
> > +GENERATE_THUNK(r10)
> > +GENERATE_THUNK(r11)
> > +GENERATE_THUNK(r12)
> > +GENERATE_THUNK(r13)
> > +GENERATE_THUNK(r14)
> > +GENERATE_THUNK(r15)
> > #endif
> > diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
> > index 719db1968d81..b13bb65e2530 100644
> > --- a/include/asm-generic/export.h
> > +++ b/include/asm-generic/export.h
> > @@ -63,33 +63,33 @@ KSYM(__kcrctab_\name):
> >
> > #if defined(__KSYM_DEPS__)
> >
> > -#define __EXPORT_SYMBOL(sym, val, sec) === __KSYM_##sym ===
> > +#define __EXPORT_SYMBOL(sym, val, sec, force) === __KSYM_##sym
> ===
> >
> > #elif defined(CONFIG_TRIM_UNUSED_KSYMS)
> >
> > #include
> > #include
> >
> > -#define __EXPORT_SYMBOL(sym, val, sec) \
> > - __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym))
> > +#define __EXPORT_SYMBOL(sym, val, sec, force) \
> > + __cond_export_sym(sym, val, sec, __or(force,
> __is_defined(__KSYM_##sym)))
> > #define __cond_export_sym(sym, val, sec, conf) \
> > ___cond_export_sym(sym, val, sec, conf)
> > #define ___cond_export_sym(sym, val, sec, enabled) \
> > __cond_export_sym_##enabled(sym, val, sec)
> > #define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym,
> val, sec
> > #define __cond_export_sym_0(sym, val, sec) /* nothing */
> > -
> > #else
> > -#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val,
> sec
> > +#define __EXPORT_SYMBOL(sym, val, sec, force) ___EXPORT_SYMBOL
> sym, val, sec
> > #endif
> >
> > #define EXPORT_SYMBOL(name) \
> > - __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),)
> > + __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), , 0)
> > #define EXPORT_SYMBOL_GPL(name) \
> > - __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl)
> > + __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl, 0)
> > #define EXPORT_DATA_SYMBOL(name) \
> > - __EXPORT_SYMBOL(name, KSYM(name),)
> > + __EXPORT_SYMBOL(name, KSYM(name), , 0)
> > #define EXPORT_DATA_SYMBOL_GPL(name)
> \
> > - __EXPORT_SYMBOL(name, KSYM(name),_gpl)
> > -
> > + __EXPORT_SYMBOL(name, KSYM(name), _gpl, 0)
> > +#define EXPORT_SYMBOL_FORCE(name) \
> > + __EXPORT_SYMBOL(name, KSYM(name), , 1)
> > #endif
> > diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
> > index 513da1a4a2da..991cd136291b 100755
> > --- a/scripts/adjust_autoksyms.sh
> > +++ b/scripts/adjust_autoksyms.sh
> > @@ -60,7 +60,7 @@ cat > "$new_ksyms_file" << EOT
> >
> > EOT
> > [ "$(ls -A "$MODVERDIR")" ] &&
> > -sed -ns -e '3{s/ /\n/g;/^$/!p;}' "$MODVERDIR"/*.mod | sort -u |
> > +sed -ns -e '3{s/ /\n/g;/^$/!p;}' "$MODVERDIR"/*.mod | sort -u | tr . _ |
> > while read sym; do
> > if [ -n "$CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX" ]; then
> > sym="${sym#_}"
On Sun, 2018-01-07 at 15:09 +0000, Lu, Hongjiu wrote:
> Sure, I can use __x86_indirect_thunk_rax.
Great, thanks.
I've made that change on top of your 20171219 patch set which is the
latest I've seen, and pushed it to my tree at
http://git.infradead.org/users/dwmw2/gcc-retpoline.git/shortlog/refs/heads/retpoline-20180107
I assume you'll want to redo those patches so that it's
__x86_indirect_thunk_rax from the start, rather than starting with dots
and then changing it, but I'll let you deal with that and push it
upstream. Building compiler now, and then I'll update and simplify the
kernel patches accordingly.
Please checkout patches for GCC 7:
https://github.com/hjl-tools/gcc/commits/hjl/indirect/gcc-7-branch/master
For GCC trunk:
https://github.com/hjl-tools/gcc/commits/hjl/indirect/master
H.J.
> -----Original Message-----
> From: David Woodhouse [mailto:[email protected]]
> Sent: Sunday, January 07, 2018 9:33 AM
> To: Lu, Hongjiu <[email protected]>; Andi Kleen <[email protected]>;
> Van De Ven, Arjan <[email protected]>; Tsimbalist, Igor V
> <[email protected]>
> Cc: Paul Turner <[email protected]>; LKML <[email protected]>;
> Linus Torvalds <[email protected]>; Greg Kroah-Hartman
> <[email protected]>; Tim Chen <[email protected]>;
> Hansen, Dave <[email protected]>; [email protected]; Kees Cook
> <[email protected]>; Rik van Riel <[email protected]>; Peter Zijlstra
> <[email protected]>; Andy Lutomirski <[email protected]>; Jiri
> Kosina <[email protected]>; [email protected]
> Subject: Re: [RFC PATCH 13/12] Retpoline vs.
> CONFIG_TRIM_UNUSED_SYMBOLS
>
> On Sun, 2018-01-07 at 15:09 +0000, Lu, Hongjiu wrote:
> > Sure, I can use __x86_indirect_thunk_rax.
>
> Great, thanks.
>
> I've made that change on top of your 20171219 patch set which is the
> latest I've seen, and pushed it to my tree at
> http://git.infradead.org/users/dwmw2/gcc-
> retpoline.git/shortlog/refs/heads/retpoline-20180107
>
> I assume you'll want to redo those patches so that it's
> __x86_indirect_thunk_rax from the start, rather than starting with dots
> and then changing it, but I'll let you deal with that and push it
> upstream. Building compiler now, and then I'll update and simplify the
> kernel patches accordingly.
On Sun, 7 Jan 2018, Lu, Hongjiu wrote:
> Please checkout patches for GCC 7:
>
> https://github.com/hjl-tools/gcc/commits/hjl/indirect/gcc-7-branch/master
>
> For GCC trunk:
>
> https://github.com/hjl-tools/gcc/commits/hjl/indirect/master
What's the plan for these vs. official GCC? Is that stuff going to part of
GCC and if so, which versions of GCC will have that?
Thanks,
tglx
> Cc: David Woodhouse <[email protected]>; Andi Kleen
> <[email protected]>; Van De Ven, Arjan <[email protected]>;
> Tsimbalist, Igor V <[email protected]>; Paul Turner
> <[email protected]>; LKML <[email protected]>; Linus Torvalds
> <[email protected]>; Greg Kroah-Hartman <gregkh@linux-
> foundation.org>; Tim Chen <[email protected]>; Hansen, Dave
> <[email protected]>; Kees Cook <[email protected]>; Rik van
> Riel <[email protected]>; Peter Zijlstra <[email protected]>; Andy
> Lutomirski <[email protected]>; Jiri Kosina <[email protected]>;
> [email protected]
> Subject: RE: [RFC PATCH 13/12] Retpoline vs.
> CONFIG_TRIM_UNUSED_SYMBOLS
>
> On Sun, 7 Jan 2018, Lu, Hongjiu wrote:
>
> > Please checkout patches for GCC 7:
> >
> > https://github.com/hjl-tools/gcc/commits/hjl/indirect/gcc-7-branch/mas
> > ter
> >
> > For GCC trunk:
> >
> > https://github.com/hjl-tools/gcc/commits/hjl/indirect/master
>
> What's the plan for these vs. official GCC? Is that stuff going to part of GCC
> and if so, which versions of GCC will have that?
If I get positive feedbacks from kernel folks with my GCC 7 patches today, I
will submit my patches for GCC 8 today. After they are checked in, I will
backport them to GCC 7/6/5/4.9.
H.J.
On Sun, 2018-01-07 at 18:32 +0000, Lu, Hongjiu wrote:
>
> > What's the plan for these vs. official GCC? Is that stuff going to part of GCC
> > and if so, which versions of GCC will have that?
>
> If I get positive feedbacks from kernel folks with my GCC 7 patches today, I
> will submit my patches for GCC 8 today. After they are checked in, I will
> backport them to GCC 7/6/5/4.9.
I've pushed a new kernel retpoline branch tested with the changes I
made myself. Building your patch set now, both locally and at
https://koji.fedoraproject.org/koji/taskinfo?taskID=24065739
On Sun, 2018-01-07 at 18:32 +0000, Lu, Hongjiu wrote:
>
> If I get positive feedbacks from kernel folks with my GCC 7 patches today, I
> will submit my patches for GCC 8 today. After they are checked in, I will
> backport them to GCC 7/6/5/4.9.
To confirm: These seem to work for me and I've resent the kernel patch
series after testing with them. Thanks.