2021-02-19 21:14:59

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
indirect call, have objtool rewrite it to:

ALTERNATIVE "call *%reg",
"call __x86_indirect_thunk_\reg", X86_FEATURE_RETPOLINE

That is, rewrite the thunk calls into actual indirect calls and emit
alternatives to patch it back into the thunk calls when RETPOLINE.

Arguably it would be simpler to do the other way around, but
unfortunately alternatives don't work that way, we cannot say:

ALTERNATIVE "call __x86_indirect_thunk_\reg",
"call *%reg", ~X86_FEATURE_RETPOLINE

That is, there is no negative form of alternatives.

Additionally, in order to not emit endless identical
.altinst_replacement chunks, use a global symbol for them, see
__x86_alt_*. This however does mean objtool itself cannot correctly
parse the alternatives it emits (should be fixable).

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/lib/retpoline.S | 28 +++++
tools/objtool/arch/x86/decode.c | 203 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 230 insertions(+), 1 deletion(-)

--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,6 +10,8 @@
#include <asm/unwind_hints.h>
#include <asm/frame.h>

+ .section .text.__x86.indirect_thunk
+
.macro RETPOLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@
@@ -25,7 +27,6 @@
.endm

.macro THUNK reg
- .section .text.__x86.indirect_thunk

.align 16
SYM_FUNC_START(__x86_indirect_thunk_\reg)
@@ -38,6 +39,19 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)

.endm

+.macro CALL_THUNK reg
+
+ .align 1
+SYM_FUNC_START(__x86_alt_call_\reg)
+ call __x86_indirect_thunk_\reg
+SYM_FUNC_END(__x86_alt_call_\reg)
+
+SYM_FUNC_START(__x86_alt_jmp_\reg)
+ jmp __x86_indirect_thunk_\reg
+SYM_FUNC_END(__x86_alt_jmp_\reg)
+
+.endm
+
/*
* Despite being an assembler file we can't just use .irp here
* because __KSYM_DEPS__ only uses the C preprocessor and would
@@ -61,3 +75,15 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>

+#undef GEN
+#define GEN(reg) CALL_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_alt_call_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_alt_jmp_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -16,6 +16,7 @@
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
+#include <arch/elf.h>

static int is_x86_64(const struct elf *elf)
{
@@ -655,6 +656,208 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}

+static const char *cfi_reg_str[16] = {
+ "rax",
+ "rcx",
+ "rdx",
+ "rbx",
+ "rsp",
+ "rbp",
+ "rsi",
+ "rdi",
+ "r8",
+ "r9",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+};
+
+static int str_to_cfi(const char *str)
+{
+ int i;
+
+ for (i = 0 ; i < ARRAY_SIZE(cfi_reg_str); i++) {
+ if (!strcmp(cfi_reg_str[i], str))
+ return i;
+ }
+
+ return CFI_NUM_REGS;
+}
+
+static const char *retpoline_insn(struct instruction *insn, struct reloc *reloc)
+{
+ static char text[5];
+ int cfi = str_to_cfi(reloc->sym->name + 21);
+ int i = 0, modrm = 0;
+
+ if (insn->len != 5 || cfi == CFI_NUM_REGS)
+ return NULL;
+
+ if (cfi >= CFI_R8) {
+ text[i++] = 0x41; /* REX.B prefix */
+ cfi -= CFI_R8;
+ }
+
+ text[i++] = 0xff; /* opcode */
+
+ if (insn->type == INSN_JUMP_DYNAMIC)
+ modrm = 0x20; /* Reg = 4 ; JMP r/m */
+ else
+ modrm = 0x10; /* Reg = 2 ; CALL r/m */
+
+ modrm |= 0xc0; /* Mod = 3; */
+ modrm += cfi; /* r/m */
+
+ text[i++] = modrm;
+
+ if (i < 5)
+ memcpy(&text[i], arch_nop_insn(5-i), 5-i);
+
+ return text;
+}
+
+struct alt_instr {
+ s32 instr_offset; /* original instruction */
+ s32 repl_offset; /* offset to replacement instruction */
+ u16 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction */
+ u8 padlen; /* length of build-time padding */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+ struct instruction *orig, struct symbol *sym,
+ u16 cpuid, u8 orig_len, u8 repl_len, u8 pad_len)
+{
+ struct section *sec, *reloc_sec;
+ struct reloc *reloc;
+ Elf_Scn *s;
+ const int size = sizeof(struct alt_instr);
+ struct alt_instr *alt;
+
+ sec = find_section_by_name(elf, ".altinstructions");
+ if (!sec) {
+ sec = elf_create_section(elf, ".altinstructions",
+ SHF_WRITE, size, 0);
+
+ if (!sec) {
+ WARN_ELF("elf_create_section");
+ return -1;
+ }
+
+ reloc_sec = elf_create_reloc_section(elf, sec, SHT_RELA);
+ if (!reloc_sec) {
+ WARN_ELF("elf_create_reloc_section");
+ return -1;
+ }
+ }
+
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
+
+ alt = sec->data->d_buf = malloc(size);
+ if (!sec->data->d_buf) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sec->data->d_buf, 0, size);
+
+ alt->cpuid = cpuid;
+ alt->instrlen = orig_len;
+ alt->replacementlen = repl_len;
+ alt->padlen = pad_len;
+
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+
+ insn_to_reloc_sym_addend(orig->sec, orig->offset, reloc);
+ if (!reloc->sym) {
+ WARN_FUNC("alt: missing containing symbol",
+ orig->sec, orig->offset);
+ return -1;
+ }
+
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = sec->sh.sh_size;
+ reloc->sec = sec->reloc;
+ elf_add_reloc(elf, reloc);
+
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+
+ reloc->sym = sym;
+ reloc->addend = 0;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = sec->sh.sh_size + 4;
+ reloc->sec = sec->reloc;
+ elf_add_reloc(elf, reloc);
+
+ sec->sh.sh_size += size;
+ sec->changed = true;
+
+ return 0;
+}
+
+#define X86_FEATURE_RETPOLINE ( 7*32+12)
+
+int arch_rewrite_retpoline(struct objtool_file *file,
+ struct instruction *insn,
+ struct reloc *reloc)
+{
+ struct symbol *sym;
+ char name[32] = "";
+
+ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+ return 0;
+
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+
+ elf_write_insn(file->elf, insn->sec, insn->offset, insn->len,
+ retpoline_insn(insn, reloc));
+
+ sprintf(name, "__x86_alt_%s_%s",
+ insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+ reloc->sym->name + 21);
+
+ sym = find_symbol_by_name(file->elf, name);
+ if (!sym) {
+ sym = elf_create_undef_symbol(file->elf, name);
+ if (!sym) {
+ WARN("elf_create_undef_symbol");
+ return -1;
+ }
+ }
+
+ elf_add_alternative(file->elf, insn, sym,
+ X86_FEATURE_RETPOLINE, 5, 5, 0);
+
+ return 0;
+}
+
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
{
struct cfi_reg *cfa = &insn->cfi.cfa;



2021-02-19 22:01:59

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Fri, Feb 19, 2021 at 09:43:06PM +0100, Peter Zijlstra wrote:
> Arguably it would be simpler to do the other way around, but
> unfortunately alternatives don't work that way, we cannot say:
>
> ALTERNATIVE "call __x86_indirect_thunk_\reg",
> "call *%reg", ~X86_FEATURE_RETPOLINE
>
> That is, there is no negative form of alternatives.

X86_FEATURE_NO_RETPOLINE?

--
Josh

2021-02-19 22:08:44

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Fri, Feb 19, 2021 at 03:55:30PM -0600, Josh Poimboeuf wrote:
> On Fri, Feb 19, 2021 at 09:43:06PM +0100, Peter Zijlstra wrote:
> > Arguably it would be simpler to do the other way around, but
> > unfortunately alternatives don't work that way, we cannot say:
> >
> > ALTERNATIVE "call __x86_indirect_thunk_\reg",
> > "call *%reg", ~X86_FEATURE_RETPOLINE
> >
> > That is, there is no negative form of alternatives.
>
> X86_FEATURE_NO_RETPOLINE?

We could, but it so happens Joerg is also wanting negative features. So
I was thikning that perhaps we can convince Boris they're not really all
that aweful after all :-)

2021-02-20 00:43:15

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Fri, Feb 19, 2021 at 11:01:58PM +0100, Peter Zijlstra wrote:
> We could, but it so happens Joerg is also wanting negative features.

Juergen.

> So I was thikning that perhaps we can convince Boris they're not
> really all that aweful after all :-)

Well, I'm not crazy about this, TBH - I totally agree with Josh:

"with objtool generating code, it's going to be a maze to figure out
where the generated code is coming from"

and without a real good reason to do this, what's the point? I know,
because we can. :-)

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2021-02-20 16:52:22

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Sat, Feb 20, 2021 at 01:39:20AM +0100, Borislav Petkov wrote:
> On Fri, Feb 19, 2021 at 11:01:58PM +0100, Peter Zijlstra wrote:
> > We could, but it so happens Joerg is also wanting negative features.
>
> Juergen.

Argh! I should stick to jgross. Sorry.

> > So I was thikning that perhaps we can convince Boris they're not
> > really all that aweful after all :-)
>
> Well, I'm not crazy about this, TBH - I totally agree with Josh:
>
> "with objtool generating code, it's going to be a maze to figure out
> where the generated code is coming from"
>
> and without a real good reason to do this, what's the point? I know,
> because we can. :-)

Well:

- straight line execution is always better than a round-trip to
somewhere else, no matter how trivial.
- supposely EIBRS (yeah, I know, there's a paper out there) should
result in no longer using retpolines.
- I really, as in _REALLY_ don't want to do a CET enabled retpoline
- IOW, retpolines should be on their way out (knock on wood)
- doing this was fun :-)
- this stuff was mostly trivial make work stuff I could do with a head
full of snot and a headache.
- if we had negative alternatives objtool doesn't need to actually
rewrite code in this case. It could simply emit alternative entries
and call it a day.
- objtool already rewrites code
- I have more cases for objtool to rewrite code (I'll see if I can
rebase and post that this weekend -- no promises).
- also https://lkml.kernel.org/r/[email protected]

2021-02-20 17:47:34

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Sat, Feb 20, 2021 at 05:48:46PM +0100, Peter Zijlstra wrote:
> - straight line execution is always better than a round-trip to
> somewhere else, no matter how trivial.

Sure, but not at that price. Especially not if it is waaay down in perf
profiles.

> - supposely EIBRS (yeah, I know, there's a paper out there) should
> result in no longer using retpolines.

Yap, supposedly both vendors have stuff like that in the works. When
that happens, we can finally use ALTERNATIVE_3 in the ratpolines. :-)

> - I really, as in _REALLY_ don't want to do a CET enabled retpoline

WTF is that? Can we deal with one atrocity at a time pls...

> - IOW, retpolines should be on their way out (knock on wood)

Yap, my hope too.

> - doing this was fun :-)

I know.

> - this stuff was mostly trivial make work stuff I could do with a head
> full of snot and a headache.

I don't want to imagine what you'd come up with when you're all healthy
and rested. :-P

> - if we had negative alternatives objtool doesn't need to actually
> rewrite code in this case. It could simply emit alternative entries
> and call it a day.

I don't mind the negative alt per se - I mind the implementation I saw.
I'm sure we can come up with something nicer, like, for example, struct
alt_instr.flags to denote that this feature is a NOT feature. IOW, I'd
like for the fact that a feature is a NOT feature or patching needs to
happen in the NOT case, to be explicitly stated with a flag. I.e.,

if (!boot_cpu_has(a->cpuid) && !(a->flags & PATCH_WHEN_X86_FEATURE_FLAG_NOT_SET))
continue;

Something like that.

> - objtool already rewrites code

Sure, as long as one can reconstruct from looking at the asm, what
objdool has changed. I fear that'll get out of control if not done with
restraint and proper documentation.

> - I have more cases for objtool to rewrite code (I'll see if I can
> rebase and post that this weekend -- no promises).

Oh noes.

> - also https://lkml.kernel.org/r/[email protected]

Oh well, I guess you can simply make objtool compile the kernel and be
done with it.

:)

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2021-02-20 22:32:47

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Sat, Feb 20, 2021 at 06:41:01PM +0100, Borislav Petkov wrote:
> > - I have more cases for objtool to rewrite code (I'll see if I can
> > rebase and post that this weekend -- no promises).
>
> Oh noes.

11 patches and one beer later, it even boots :-)

Saves more than 6k on a defconfig build.

I'll push it out to git in a bit.

--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,12 +4,12 @@

#define HAVE_JUMP_LABEL_BATCH

-#define JUMP_LABEL_NOP_SIZE 5
-
#ifdef CONFIG_X86_64
-# define STATIC_KEY_INIT_NOP P6_NOP5_ATOMIC
+# define STATIC_KEY_NOP2 P6_NOP2
+# define STATIC_KEY_NOP5 P6_NOP5_ATOMIC
#else
-# define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC
+# define STATIC_KEY_NOP2 GENERIC_NOP2
+# define STATIC_KEY_NOP5 GENERIC_NOP5_ATOMIC
#endif

#include <asm/asm.h>
@@ -20,15 +20,35 @@
#include <linux/stringify.h>
#include <linux/types.h>

+#define JUMP_TABLE_ENTRY \
+ ".pushsection __jump_table, \"aw\" \n\t" \
+ _ASM_ALIGN "\n\t" \
+ ".long 1b - . \n\t" \
+ ".long %l[l_yes] - . \n\t" \
+ _ASM_PTR "%c0 + %c1 - .\n\t" \
+ ".popsection \n\t"
+
+#ifdef CONFIG_STACK_VALIDATION
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("1:"
+ "jmp %l[l_yes] # objtool NOPs this \n\t"
+ JUMP_TABLE_ENTRY
+ : : "i" (key), "i" (2 | branch) : : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+#else
+
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
- ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- ".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
- ".popsection \n\t"
+ ".byte " __stringify(STATIC_KEY_NOP5) "\n\t"
+ JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);

return false;
@@ -36,16 +56,13 @@ static __always_inline bool arch_static_
return true;
}

+#endif /* STACK_VALIDATION */
+
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
asm_volatile_goto("1:"
- ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
- "2:\n\t"
- ".pushsection __jump_table, \"aw\" \n\t"
- _ASM_ALIGN "\n\t"
- ".long 1b - ., %l[l_yes] - . \n\t"
- _ASM_PTR "%c0 + %c1 - .\n\t"
- ".popsection \n\t"
+ "jmp %l[l_yes]\n\t"
+ JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);

return false;
@@ -53,41 +70,7 @@ static __always_inline bool arch_static_
return true;
}

-#else /* __ASSEMBLY__ */
-
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
- .if \def
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .else
- .byte STATIC_KEY_INIT_NOP
- .endif
- .pushsection __jump_table, "aw"
- _ASM_ALIGN
- .long .Lstatic_jump_\@ - ., \target - .
- _ASM_PTR \key - .
- .popsection
-.endm
-
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
- .if \def
- .byte STATIC_KEY_INIT_NOP
- .else
- /* Equivalent to "jmp.d32 \target" */
- .byte 0xe9
- .long \target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
- .endif
- .pushsection __jump_table, "aw"
- _ASM_ALIGN
- .long .Lstatic_jump_\@ - ., \target - .
- _ASM_PTR \key + 1 - .
- .popsection
-.endm
+extern int arch_jump_entry_size(struct jump_entry *entry);

#endif /* __ASSEMBLY__ */

--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -5,6 +5,7 @@
/*
* Define nops for use with alternative() and for tracing.
*
+ * *_NOP2 must be a single instruction
* *_NOP5_ATOMIC must be a single instruction.
*/

--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,53 +16,81 @@
#include <asm/alternative.h>
#include <asm/text-patching.h>

-static void bug_at(const void *ip, int line)
+int arch_jump_entry_size(struct jump_entry *entry)
{
- /*
- * The location is not an op that we were expecting.
- * Something went wrong. Crash the box, as something could be
- * corrupting the kernel.
- */
- pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line);
- BUG();
+ struct insn insn;
+
+ kernel_insn_init(&insn, (void *)jump_entry_code(entry), MAX_INSN_SIZE);
+ insn_get_length(&insn);
+ BUG_ON(insn.length != 2 && insn.length != 5);
+
+ return insn.length;
}

-static const void *
-__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type, int init)
+struct jump_label_patch {
+ const void *code;
+ int size;
+};
+
+static struct jump_label_patch
+__jump_label_patch(struct jump_entry *entry, enum jump_label_type type, int init)
{
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
- const void *expect, *code;
+ const unsigned char default_nop2[] = { STATIC_KEY_NOP2 };
+ const unsigned char default_nop5[] = { STATIC_KEY_NOP5 };
+ const void *expect, *code, *nop, *default_nop;
const void *addr, *dest;
- int line;
+ int line, size;

addr = (void *)jump_entry_code(entry);
dest = (void *)jump_entry_target(entry);

- code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+ size = arch_jump_entry_size(entry);
+ switch (size) {
+ case JMP8_INSN_SIZE:
+ code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest);
+ default_nop = default_nop2;
+ nop = ideal_nops[2];
+ break;
+
+ case JMP32_INSN_SIZE:
+ code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+ default_nop = default_nop5;
+ nop = ideal_nops[NOP_ATOMIC5];
+ break;
+
+ default: BUG();
+ }

if (init) {
expect = default_nop; line = __LINE__;
} else if (type == JUMP_LABEL_JMP) {
- expect = ideal_nop; line = __LINE__;
+ expect = nop; line = __LINE__;
} else {
expect = code; line = __LINE__;
}

- if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE))
- bug_at(addr, line);
+ if (memcmp(addr, expect, size)) {
+ /*
+ * The location is not an op that we were expecting.
+ * Something went wrong. Crash the box, as something could be
+ * corrupting the kernel.
+ */
+ pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) line:%d init:%d size:%d type:%d\n",
+ addr, addr, addr, expect, line, init, size, type);
+ BUG();
+ }

if (type == JUMP_LABEL_NOP)
- code = ideal_nop;
+ code = nop;

- return code;
+ return (struct jump_label_patch){.code = code, .size = size};
}

static inline void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
int init)
{
- const void *opcode = __jump_label_set_jump_code(entry, type, init);
+ const struct jump_label_patch jlp = __jump_label_patch(entry, type, init);

/*
* As long as only a single processor is running and the code is still
@@ -76,12 +104,11 @@ static inline void __jump_label_transfor
* always nop being the 'currently valid' instruction
*/
if (init || system_state == SYSTEM_BOOTING) {
- text_poke_early((void *)jump_entry_code(entry), opcode,
- JUMP_LABEL_NOP_SIZE);
+ text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size);
return;
}

- text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);
+ text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
}

static void __ref jump_label_transform(struct jump_entry *entry,
@@ -102,7 +129,7 @@ void arch_jump_label_transform(struct ju
bool arch_jump_label_transform_queue(struct jump_entry *entry,
enum jump_label_type type)
{
- const void *opcode;
+ struct jump_label_patch jlp;

if (system_state == SYSTEM_BOOTING) {
/*
@@ -113,9 +140,8 @@ bool arch_jump_label_transform_queue(str
}

mutex_lock(&text_mutex);
- opcode = __jump_label_set_jump_code(entry, type, 0);
- text_poke_queue((void *)jump_entry_code(entry),
- opcode, JUMP_LABEL_NOP_SIZE, NULL);
+ jlp = __jump_label_patch(entry, type, 0);
+ text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
mutex_unlock(&text_mutex);
return true;
}
@@ -137,21 +163,10 @@ __init_or_module void arch_jump_label_tr
enum jump_label_type type)
{
/*
- * This function is called at boot up and when modules are
- * first loaded. Check if the default nop, the one that is
- * inserted at compile time, is the ideal nop. If it is, then
- * we do not need to update the nop, and we can leave it as is.
- * If it is not, then we need to update the nop to the ideal nop.
+ * Rewrite the NOP on init / module-load to ensure we got the ideal
+ * nop. Don't bother with trying to figure out what size and what nop
+ * it should be for now, simply do an unconditional rewrite.
*/
- if (jlstate == JL_STATE_START) {
- const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
- const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
-
- if (memcmp(ideal_nop, default_nop, 5) != 0)
- jlstate = JL_STATE_UPDATE;
- else
- jlstate = JL_STATE_NO_UPDATE;
- }
- if (jlstate == JL_STATE_UPDATE)
+ if (jlstate == JL_STATE_UPDATE || jlstate == JL_STATE_START)
jump_label_transform(entry, type, 1);
}
--- a/arch/x86/realmode/Makefile
+++ b/arch/x86/realmode/Makefile
@@ -10,7 +10,6 @@
# Sanitizer runtimes are unavailable and cannot be linked here.
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
-OBJECT_FILES_NON_STANDARD := y

subdir- := rm

--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -171,9 +171,21 @@ static inline bool jump_entry_is_init(co
return (unsigned long)entry->key & 2UL;
}

-static inline void jump_entry_set_init(struct jump_entry *entry)
+static inline void jump_entry_set_init(struct jump_entry *entry, bool set)
{
- entry->key |= 2;
+ if (set)
+ entry->key |= 2;
+ else
+ entry->key &= ~2;
+}
+
+static inline int jump_entry_size(struct jump_entry *entry)
+{
+#ifdef JUMP_LABEL_NOP_SIZE
+ return JUMP_LABEL_NOP_SIZE;
+#else
+ return arch_jump_entry_size(entry);
+#endif
}

#endif
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -309,7 +309,7 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit)
static int addr_conflict(struct jump_entry *entry, void *start, void *end)
{
if (jump_entry_code(entry) <= (unsigned long)end &&
- jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
+ jump_entry_code(entry) + jump_entry_size(entry) > (unsigned long)start)
return 1;

return 0;
@@ -480,8 +480,8 @@ void __init jump_label_init(void)
if (jump_label_type(iter) == JUMP_LABEL_NOP)
arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);

- if (init_section_contains((void *)jump_entry_code(iter), 1))
- jump_entry_set_init(iter);
+ jump_entry_set_init(iter,
+ init_section_contains((void *)jump_entry_code(iter), 1));

iterk = jump_entry_key(iter);
if (iterk == key)
@@ -627,8 +627,8 @@ static int jump_label_add_module(struct
for (iter = iter_start; iter < iter_stop; iter++) {
struct static_key *iterk;

- if (within_module_init(jump_entry_code(iter), mod))
- jump_entry_set_init(iter);
+ jump_entry_set_init(iter,
+ within_module_init(jump_entry_code(iter), mod));

iterk = jump_entry_key(iter);
if (iterk == key)
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -9,6 +9,7 @@
#define JUMP_ENTRY_SIZE 16
#define JUMP_ORIG_OFFSET 0
#define JUMP_NEW_OFFSET 4
+#define JUMP_KEY_OFFSET 8

#define ALT_ENTRY_SIZE 13
#define ALT_ORIG_OFFSET 0
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1164,6 +1164,22 @@ static int handle_jump_alt(struct objtoo
return -1;
}

+ if (special_alt->key_addend & 2) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest_range(file->elf, orig_insn->sec,
+ orig_insn->offset, orig_insn->len);
+ if (reloc) {
+ reloc->type = R_NONE;
+ elf_write_reloc(file->elf, reloc);
+ }
+ elf_write_insn(file->elf, orig_insn->sec,
+ orig_insn->offset, orig_insn->len,
+ arch_nop_insn(orig_insn->len));
+ orig_insn->type = INSN_NOP;
+ return 0;
+ }
+
*new_insn = list_next_entry(orig_insn, list);
return 0;
}
@@ -1709,6 +1725,9 @@ static int decode_sections(struct objtoo
/*
* Must be before add_{jump,call}_destination; for they can add
* magic alternatives we can't actually parse.
+ *
+ * Also; must be before add_jump_destinations() because it will
+ * rewrite JMPs into NOPs -- see handle_jump_alt().
*/
ret = add_special_section_alts(file);
if (ret)
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -27,6 +27,7 @@ struct special_alt {
unsigned long new_off;

unsigned int orig_len, new_len; /* group only */
+ u8 key_addend;
};

int special_get_alts(struct elf *elf, struct list_head *alts);
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@ struct special_entry {
unsigned char size, orig, new;
unsigned char orig_len, new_len; /* group only */
unsigned char feature; /* ALTERNATIVE macro CPU feature */
+ unsigned char key; /* jump_label key */
};

struct special_entry entries[] = {
@@ -42,6 +43,7 @@ struct special_entry entries[] = {
.size = JUMP_ENTRY_SIZE,
.orig = JUMP_ORIG_OFFSET,
.new = JUMP_NEW_OFFSET,
+ .key = JUMP_KEY_OFFSET,
},
{
.sec = "__ex_table",
@@ -114,6 +116,18 @@ static int get_alt_entry(struct elf *elf
alt->new_off -= 0x7ffffff0;
}

+ if (entry->key) {
+ struct reloc *key_reloc;
+
+ key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key);
+ if (!key_reloc) {
+ WARN_FUNC("can't find key reloc",
+ sec, offset + entry->key);
+ return -1;
+ }
+ alt->key_addend = key_reloc->addend;
+ }
+
return 0;
}

2021-02-20 22:35:34

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Sat, Feb 20, 2021 at 06:41:01PM +0100, Borislav Petkov wrote:
> > - if we had negative alternatives objtool doesn't need to actually
> > rewrite code in this case. It could simply emit alternative entries
> > and call it a day.
>
> I don't mind the negative alt per se - I mind the implementation I saw.
> I'm sure we can come up with something nicer, like, for example, struct
> alt_instr.flags to denote that this feature is a NOT feature.

So you don't like the ~ or - on cpuid? ISTR we talked about
alt_instr::flags before, but Google isn't playing ball today so I can't
seem to find it.

I can certainly look at adding the flags thing.

2021-02-20 22:59:30

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Sat, Feb 20, 2021 at 11:28:02PM +0100, Peter Zijlstra wrote:
> On Sat, Feb 20, 2021 at 06:41:01PM +0100, Borislav Petkov wrote:
> > > - I have more cases for objtool to rewrite code (I'll see if I can
> > > rebase and post that this weekend -- no promises).
> >
> > Oh noes.
>
> 11 patches and one beer later, it even boots :-)
>
> Saves more than 6k on a defconfig build.
>
> I'll push it out to git in a bit.

https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=locking/jump_label

2021-02-21 05:50:22

by Juergen Gross

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On 20.02.21 23:32, Peter Zijlstra wrote:
> On Sat, Feb 20, 2021 at 06:41:01PM +0100, Borislav Petkov wrote:
>>> - if we had negative alternatives objtool doesn't need to actually
>>> rewrite code in this case. It could simply emit alternative entries
>>> and call it a day.
>>
>> I don't mind the negative alt per se - I mind the implementation I saw.
>> I'm sure we can come up with something nicer, like, for example, struct
>> alt_instr.flags to denote that this feature is a NOT feature.
>
> So you don't like the ~ or - on cpuid? ISTR we talked about
> alt_instr::flags before, but Google isn't playing ball today so I can't
> seem to find it.

If you want I can cook up a patch and include it in my paravirt
cleanup series.

Juergen


Attachments:
OpenPGP_0xB0DE9DD628BF132F.asc (3.06 kB)
OpenPGP_signature (505.00 B)
OpenPGP digital signature
Download all attachments

2021-02-21 09:47:08

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Sun, Feb 21, 2021 at 06:45:54AM +0100, Jürgen Groß wrote:
> If you want I can cook up a patch and include it in my paravirt
> cleanup series.

Sure, Linus already pulled the first part of your cleanup so you can
base off the rest ontop.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2021-02-21 09:56:55

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC][PATCH 6/6] objtool,x86: Rewrite retpoline thunk calls

On Sat, Feb 20, 2021 at 11:51:00PM +0100, Peter Zijlstra wrote:
> > 11 patches and one beer later, it even boots :-)

Yah, beer and coding sometimes works. But only sometimes, ask rostedt
and tglx.

:-P

> > Saves more than 6k on a defconfig build.

Uuh, niice. And that will be a lot more on a all{yes,mod}config.

> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/log/?h=locking/jump_label

Looks interesting. I'll definitely have an indepth look when you send
them proper.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette