When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
indirect call, have objtool rewrite it to:
ALTERNATIVE "call __x86_indirect_thunk_\reg",
"call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)
Additionally, in order to not emit endless identical
.altinst_replacement chunks, use a global symbol for them, see
__x86_indirect_alt_*.
This also avoids objtool from having to do code generation.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/asm-prototypes.h | 12 ++-
arch/x86/lib/retpoline.S | 42 +++++++++++
tools/objtool/arch/x86/decode.c | 122 ++++++++++++++++++++++++++++++++++
3 files changed, 173 insertions(+), 3 deletions(-)
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
#ifdef CONFIG_RETPOLINE
-#define DECL_INDIRECT_THUNK(reg) \
+#undef GEN
+#define GEN(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
#undef GEN
-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#endif /* CONFIG_RETPOLINE */
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,6 +10,8 @@
#include <asm/unwind_hints.h>
#include <asm/frame.h>
+ .section .text.__x86.indirect_thunk
+
.macro RETPOLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@
@@ -25,9 +27,9 @@
.endm
.macro THUNK reg
- .section .text.__x86.indirect_thunk
.align 32
+
SYM_FUNC_START(__x86_indirect_thunk_\reg)
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
.endm
/*
+ * This generates .altinstr_replacement symbols for use by objtool. They,
+ * however, must not actually live in .altinstr_replacement since that will be
+ * discarded after init, but module alternatives will also reference these
+ * symbols.
+ *
+ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+ */
+.macro ALT_THUNK reg
+
+ .align 1
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+ ANNOTATE_RETPOLINE_SAFE
+1: call *%\reg
+2: .skip 5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+ ANNOTATE_RETPOLINE_SAFE
+1: jmp *%\reg
+2: .skip 5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+
+.endm
+
+/*
* Despite being an assembler file we can't just use .irp here
* because __KSYM_DEPS__ only uses the C preprocessor and would
* only see one instance of "__x86_indirect_thunk_\reg" rather
@@ -61,3 +89,15 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
+#undef GEN
+#define GEN(reg) ALT_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -19,6 +19,7 @@
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
+#include <arch/elf.h>
static int is_x86_64(const struct elf *elf)
{
@@ -657,6 +658,122 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV (1 << 15)
+#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+ s32 instr_offset; /* original instruction */
+ s32 repl_offset; /* offset to replacement instruction */
+ u16 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+ struct instruction *orig, struct symbol *sym,
+ int cpuid, u8 orig_len, u8 repl_len)
+{
+ const int size = sizeof(struct alt_instr);
+ struct alt_instr *alt;
+ struct section *sec;
+ Elf_Scn *s;
+
+ sec = find_section_by_name(elf, ".altinstructions");
+ if (!sec) {
+ sec = elf_create_section(elf, ".altinstructions",
+ SHF_WRITE, size, 0);
+
+ if (!sec) {
+ WARN_ELF("elf_create_section");
+ return -1;
+ }
+ }
+
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
+
+ alt = sec->data->d_buf = malloc(size);
+ if (!sec->data->d_buf) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sec->data->d_buf, 0, size);
+
+ if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+ R_X86_64_PC32, orig->sec, orig->offset)) {
+ WARN("elf_create_reloc: alt_instr::instr_offset");
+ return -1;
+ }
+
+ if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+ R_X86_64_PC32, sym, 0)) {
+ WARN("elf_create_reloc: alt_instr::repl_offset");
+ return -1;
+ }
+
+ alt->cpuid = cpuid;
+ alt->instrlen = orig_len;
+ alt->replacementlen = repl_len;
+
+ sec->sh.sh_size += size;
+ sec->changed = true;
+
+ return 0;
+}
+
+#define X86_FEATURE_RETPOLINE ( 7*32+12)
+
+int arch_rewrite_retpolines(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct reloc *reloc;
+ struct symbol *sym;
+ char name[32] = "";
+
+ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+
+ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+ continue;
+
+ reloc = insn->reloc;
+
+ sprintf(name, "__x86_indirect_alt_%s_%s",
+ insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+ reloc->sym->name + 21);
+
+ sym = find_symbol_by_name(file->elf, name);
+ if (!sym) {
+ sym = elf_create_undef_symbol(file->elf, name);
+ if (!sym) {
+ WARN("elf_create_undef_symbol");
+ return -1;
+ }
+ }
+
+ if (elf_add_alternative(file->elf, insn, sym,
+ ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+ WARN("elf_add_alternative");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
{
struct cfi_reg *cfa = &insn->cfi.cfa;
On Fri, Mar 26, 2021 at 04:12:15PM +0100, Peter Zijlstra wrote:
> @@ -61,3 +89,15 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
> #define GEN(reg) EXPORT_THUNK(reg)
> #include <asm/GEN-for-each-reg.h>
>
> +#undef GEN
> +#define GEN(reg) ALT_THUNK reg
> +#include <asm/GEN-for-each-reg.h>
> +
> +#undef GEN
> +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
> +#include <asm/GEN-for-each-reg.h>
> +
> +#undef GEN
> +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
> +#include <asm/GEN-for-each-reg.h>
> +
Git complains about this last newline.
Otherwise everything looks pretty good to me. Let me run it through the
test matrix.
--
Josh
The following commit has been merged into the x86/core branch of tip:
Commit-ID: f31390437ce984118215169d75570e365457ec23
Gitweb: https://git.kernel.org/tip/f31390437ce984118215169d75570e365457ec23
Author: Peter Zijlstra <[email protected]>
AuthorDate: Fri, 26 Mar 2021 16:12:15 +01:00
Committer: Borislav Petkov <[email protected]>
CommitterDate: Thu, 01 Apr 2021 14:30:45 +02:00
objtool/x86: Rewrite retpoline thunk calls
When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
indirect call, have objtool rewrite it to:
ALTERNATIVE "call __x86_indirect_thunk_\reg",
"call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)
Additionally, in order to not emit endless identical
.altinst_replacement chunks, use a global symbol for them, see
__x86_indirect_alt_*.
This also avoids objtool from having to do code generation.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Reviewed-by: Miroslav Benes <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/include/asm/asm-prototypes.h | 12 ++-
arch/x86/lib/retpoline.S | 41 ++++++++-
tools/objtool/arch/x86/decode.c | 117 +++++++++++++++++++++++++-
3 files changed, 167 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 0545b07..4cb726c 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
#ifdef CONFIG_RETPOLINE
-#define DECL_INDIRECT_THUNK(reg) \
+#undef GEN
+#define GEN(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
#undef GEN
-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index d2c0d14..4d32cb0 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,6 +10,8 @@
#include <asm/unwind_hints.h>
#include <asm/frame.h>
+ .section .text.__x86.indirect_thunk
+
.macro RETPOLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@
@@ -25,9 +27,9 @@
.endm
.macro THUNK reg
- .section .text.__x86.indirect_thunk
.align 32
+
SYM_FUNC_START(__x86_indirect_thunk_\reg)
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
.endm
/*
+ * This generates .altinstr_replacement symbols for use by objtool. They,
+ * however, must not actually live in .altinstr_replacement since that will be
+ * discarded after init, but module alternatives will also reference these
+ * symbols.
+ *
+ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+ */
+.macro ALT_THUNK reg
+
+ .align 1
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+ ANNOTATE_RETPOLINE_SAFE
+1: call *%\reg
+2: .skip 5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+ ANNOTATE_RETPOLINE_SAFE
+1: jmp *%\reg
+2: .skip 5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+
+.endm
+
+/*
* Despite being an assembler file we can't just use .irp here
* because __KSYM_DEPS__ only uses the C preprocessor and would
* only see one instance of "__x86_indirect_thunk_\reg" rather
@@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
+#undef GEN
+#define GEN(reg) ALT_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+#include <asm/GEN-for-each-reg.h>
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index e5fa3a5..44375fa 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -16,6 +16,7 @@
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
+#include <arch/elf.h>
static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -610,6 +611,122 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV (1 << 15)
+#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+ s32 instr_offset; /* original instruction */
+ s32 repl_offset; /* offset to replacement instruction */
+ u16 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+ struct instruction *orig, struct symbol *sym,
+ int cpuid, u8 orig_len, u8 repl_len)
+{
+ const int size = sizeof(struct alt_instr);
+ struct alt_instr *alt;
+ struct section *sec;
+ Elf_Scn *s;
+
+ sec = find_section_by_name(elf, ".altinstructions");
+ if (!sec) {
+ sec = elf_create_section(elf, ".altinstructions",
+ SHF_WRITE, size, 0);
+
+ if (!sec) {
+ WARN_ELF("elf_create_section");
+ return -1;
+ }
+ }
+
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
+
+ alt = sec->data->d_buf = malloc(size);
+ if (!sec->data->d_buf) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sec->data->d_buf, 0, size);
+
+ if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+ R_X86_64_PC32, orig->sec, orig->offset)) {
+ WARN("elf_create_reloc: alt_instr::instr_offset");
+ return -1;
+ }
+
+ if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+ R_X86_64_PC32, sym, 0)) {
+ WARN("elf_create_reloc: alt_instr::repl_offset");
+ return -1;
+ }
+
+ alt->cpuid = cpuid;
+ alt->instrlen = orig_len;
+ alt->replacementlen = repl_len;
+
+ sec->sh.sh_size += size;
+ sec->changed = true;
+
+ return 0;
+}
+
+#define X86_FEATURE_RETPOLINE ( 7*32+12)
+
+int arch_rewrite_retpolines(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct reloc *reloc;
+ struct symbol *sym;
+ char name[32] = "";
+
+ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+
+ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+ continue;
+
+ reloc = insn->reloc;
+
+ sprintf(name, "__x86_indirect_alt_%s_%s",
+ insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+ reloc->sym->name + 21);
+
+ sym = find_symbol_by_name(file->elf, name);
+ if (!sym) {
+ sym = elf_create_undef_symbol(file->elf, name);
+ if (!sym) {
+ WARN("elf_create_undef_symbol");
+ return -1;
+ }
+ }
+
+ if (elf_add_alternative(file->elf, insn, sym,
+ ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+ WARN("elf_add_alternative");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
{
struct cfi_reg *cfa = &insn->cfi.cfa;
The following commit has been merged into the x86/core branch of tip:
Commit-ID: 9bc0bb50727c8ac69fbb33fb937431cf3518ff37
Gitweb: https://git.kernel.org/tip/9bc0bb50727c8ac69fbb33fb937431cf3518ff37
Author: Peter Zijlstra <[email protected]>
AuthorDate: Fri, 26 Mar 2021 16:12:15 +01:00
Committer: Ingo Molnar <[email protected]>
CommitterDate: Fri, 02 Apr 2021 12:47:28 +02:00
objtool/x86: Rewrite retpoline thunk calls
When the compiler emits: "CALL __x86_indirect_thunk_\reg" for an
indirect call, have objtool rewrite it to:
ALTERNATIVE "call __x86_indirect_thunk_\reg",
"call *%reg", ALT_NOT(X86_FEATURE_RETPOLINE)
Additionally, in order to not emit endless identical
.altinst_replacement chunks, use a global symbol for them, see
__x86_indirect_alt_*.
This also avoids objtool from having to do code generation.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
Reviewed-by: Miroslav Benes <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/include/asm/asm-prototypes.h | 12 ++-
arch/x86/lib/retpoline.S | 41 ++++++++-
tools/objtool/arch/x86/decode.c | 117 +++++++++++++++++++++++++-
3 files changed, 167 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 0545b07..4cb726c 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -19,11 +19,19 @@ extern void cmpxchg8b_emu(void);
#ifdef CONFIG_RETPOLINE
-#define DECL_INDIRECT_THUNK(reg) \
+#undef GEN
+#define GEN(reg) \
extern asmlinkage void __x86_indirect_thunk_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_call_ ## reg (void);
+#include <asm/GEN-for-each-reg.h>
#undef GEN
-#define GEN(reg) DECL_INDIRECT_THUNK(reg)
+#define GEN(reg) \
+ extern asmlinkage void __x86_indirect_alt_jmp_ ## reg (void);
#include <asm/GEN-for-each-reg.h>
#endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index d2c0d14..4d32cb0 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -10,6 +10,8 @@
#include <asm/unwind_hints.h>
#include <asm/frame.h>
+ .section .text.__x86.indirect_thunk
+
.macro RETPOLINE reg
ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@
@@ -25,9 +27,9 @@
.endm
.macro THUNK reg
- .section .text.__x86.indirect_thunk
.align 32
+
SYM_FUNC_START(__x86_indirect_thunk_\reg)
ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
@@ -39,6 +41,32 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
.endm
/*
+ * This generates .altinstr_replacement symbols for use by objtool. They,
+ * however, must not actually live in .altinstr_replacement since that will be
+ * discarded after init, but module alternatives will also reference these
+ * symbols.
+ *
+ * Their names matches the "__x86_indirect_" prefix to mark them as retpolines.
+ */
+.macro ALT_THUNK reg
+
+ .align 1
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
+ ANNOTATE_RETPOLINE_SAFE
+1: call *%\reg
+2: .skip 5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_call_\reg)
+
+SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
+ ANNOTATE_RETPOLINE_SAFE
+1: jmp *%\reg
+2: .skip 5-(2b-1b), 0x90
+SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
+
+.endm
+
+/*
* Despite being an assembler file we can't just use .irp here
* because __KSYM_DEPS__ only uses the C preprocessor and would
* only see one instance of "__x86_indirect_thunk_\reg" rather
@@ -61,3 +89,14 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg)
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
+#undef GEN
+#define GEN(reg) ALT_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_call_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+
+#undef GEN
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_alt_jmp_ ## reg)
+#include <asm/GEN-for-each-reg.h>
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 782894e..7e8b5be 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -19,6 +19,7 @@
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
+#include <arch/elf.h>
static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -613,6 +614,122 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV (1 << 15)
+#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+ s32 instr_offset; /* original instruction */
+ s32 repl_offset; /* offset to replacement instruction */
+ u16 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+ struct instruction *orig, struct symbol *sym,
+ int cpuid, u8 orig_len, u8 repl_len)
+{
+ const int size = sizeof(struct alt_instr);
+ struct alt_instr *alt;
+ struct section *sec;
+ Elf_Scn *s;
+
+ sec = find_section_by_name(elf, ".altinstructions");
+ if (!sec) {
+ sec = elf_create_section(elf, ".altinstructions",
+ SHF_WRITE, size, 0);
+
+ if (!sec) {
+ WARN_ELF("elf_create_section");
+ return -1;
+ }
+ }
+
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
+
+ alt = sec->data->d_buf = malloc(size);
+ if (!sec->data->d_buf) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sec->data->d_buf, 0, size);
+
+ if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+ R_X86_64_PC32, orig->sec, orig->offset)) {
+ WARN("elf_create_reloc: alt_instr::instr_offset");
+ return -1;
+ }
+
+ if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+ R_X86_64_PC32, sym, 0)) {
+ WARN("elf_create_reloc: alt_instr::repl_offset");
+ return -1;
+ }
+
+ alt->cpuid = cpuid;
+ alt->instrlen = orig_len;
+ alt->replacementlen = repl_len;
+
+ sec->sh.sh_size += size;
+ sec->changed = true;
+
+ return 0;
+}
+
+#define X86_FEATURE_RETPOLINE ( 7*32+12)
+
+int arch_rewrite_retpolines(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct reloc *reloc;
+ struct symbol *sym;
+ char name[32] = "";
+
+ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+
+ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+ continue;
+
+ reloc = insn->reloc;
+
+ sprintf(name, "__x86_indirect_alt_%s_%s",
+ insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+ reloc->sym->name + 21);
+
+ sym = find_symbol_by_name(file->elf, name);
+ if (!sym) {
+ sym = elf_create_undef_symbol(file->elf, name);
+ if (!sym) {
+ WARN("elf_create_undef_symbol");
+ return -1;
+ }
+ }
+
+ if (elf_add_alternative(file->elf, insn, sym,
+ ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+ WARN("elf_add_alternative");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
{
struct cfi_reg *cfa = &insn->cfi.cfa;
Hi Peter,
This patch seems to crash on Tigerlake platform (Chromebook delbin), I
got the following error:
[ 2.103054] pcieport 0000:00:1c.0: PME: Signaling with IRQ 122
[ 2.110148] pcieport 0000:00:1c.0: pciehp: Slot #7 AttnBtn-
PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+
IbPresDis- LLActRep+
[ 2.126754] pcieport 0000:00:1d.0: PME: Signaling with IRQ 123
[ 2.133946] ACPI: \_SB_.CP00: Found 3 idle states
[ 2.139708] BUG: kernel NULL pointer dereference, address: 000000000000012b
[ 2.140704] #PF: supervisor read access in kernel mode
[ 2.140704] #PF: error_code(0x0000) - not-present page
[ 2.140704] PGD 0 P4D 0
[ 2.140704] Oops: 0000 [#1] PREEMPT SMP NOPTI
[ 2.140704] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U
5.13.0-rc1 #31
[ 2.140704] Hardware name: Google Delbin/Delbin, BIOS
Google_Delbin.13672.156.3 05/14/2021
[ 2.140704] RIP: 0010:cpuidle_poll_time+0x9/0x6a
[ 2.140704] Code: 44 00 00 85 f6 78 19 55 48 89 e5 48 8b 05 16 44
44 01 4c 8b 58 40 4d 85 db 5d 41 ff d3 66 90 00 c3 0f 1f 44 00 00 55
48 89 e5 <48> 8b 46 20 48 85 c0 75 56 4c 63 87 28 04 00 00 b8 24 f49
[ 2.140704] RSP: 0000:ffffffff9cc03ea8 EFLAGS: 00010282
[ 2.140704] RAX: 0000000000008e7d RBX: ffffffff9cc1c5fd RCX: 000000007f894e5a
[ 2.140704] RDX: 000000007f894d4f RSI: 000000000000010b RDI: 0000000002fa1cf6
[ 2.140704] RBP: ffffffff9cc03ea8 R08: 0000000000000000 R09: 00000000ca948246
[ 2.140704] R10: 0000000000000000 R11: ffffffff9bf132cb R12: 0000000000000003
[ 2.140704] R13: ffffbbfdffc21960 R14: 0000000000000000 R15: ffffffff9cdba638
[ 2.140704] FS: 0000000000000000(0000) GS:ffff928280000000(0000)
knlGS:0000000000000000
[ 2.140704] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2.140704] CR2: 000000000000012b CR3: 000000027e414001 CR4: 0000000000770ef0
[ 2.140704] PKRU: 55555554
[ 2.140704] Call Trace:
[ 2.140704] do_idle+0x175/0x1f6
[ 2.140704] cpu_startup_entry+0x1d/0x1f
[ 2.140704] start_kernel+0x3be/0x420
[ 2.140704] secondary_startup_64_no_verify+0xb0/0xbb
[ 2.140704] Modules linked in:
[ 2.140704] CR2: 000000000000012b
[ 2.140704] ---[ end trace d15839e2bd509f00 ]---
[ 2.140704] RIP: 0010:cpuidle_poll_time+0x9/0x6a
[ 2.140704] Code: 44 00 00 85 f6 78 19 55 48 89 e5 48 8b 05 16 44
44 01 4c 8b 58 40 4d 85 db 5d 41 ff d3 66 90 00 c3 0f 1f 44 00 00 55
48 89 e5 <48> 8b 46 20 48 85 c0 75 56 4c 63 87 28 04 00 00 b8 24 f49
[ 2.140704] RSP: 0000:ffffffff9cc03ea8 EFLAGS: 00010282
[ 2.140704] RAX: 0000000000008e7d RBX: ffffffff9cc1c5fd RCX: 000000007f894e5a
[ 2.140704] RDX: 000000007f894d4f RSI: 000000000000010b RDI: 0000000002fa1cf6
[ 2.140704] RBP: ffffffff9cc03ea8 R08: 0000000000000000 R09: 00000000ca948246
[ 2.140704] R10: 0000000000000000 R11: ffffffff9bf132cb R12: 0000000000000003
[ 2.140704] R13: ffffbbfdffc21960 R14: 0000000000000000 R15: ffffffff9cdba638
[ 2.140704] FS: 0000000000000000(0000) GS:ffff928280000000(0000)
knlGS:0000000000000000
[ 2.140704] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2.140704] CR2: 000000000000012b CR3: 000000027e414001 CR4: 0000000000770ef0
[ 2.140704] PKRU: 55555554
[ 2.140704] Kernel panic - not syncing: Fatal exception
[ 2.140704] Kernel Offset: 0x1a600000 from 0xffffffff81000000
(relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 2.140704] ACPI MEMORY or I/O RESET_REG.
Git bisect pointed to this commit:
9bc0bb50727c8ac69fbb33fb937431cf3518ff37 is the first bad commit
commit 9bc0bb50727c8ac69fbb33fb937431cf3518ff37
Author: Peter Zijlstra <[email protected]>
Date: Fri Mar 26 16:12:15 2021 +0100
objtool/x86: Rewrite retpoline thunk calls
If there is anything I could do to help debug this issue (additional
debugs, logs etc.), please let me know.
Best regards,
Lukasz
On Wed, Jun 02, 2021 at 05:51:01PM +0200, Lukasz Majczak wrote:
> Hi Peter,
>
> This patch seems to crash on Tigerlake platform (Chromebook delbin), I
> got the following error:
>
> [ 2.103054] pcieport 0000:00:1c.0: PME: Signaling with IRQ 122
> [ 2.110148] pcieport 0000:00:1c.0: pciehp: Slot #7 AttnBtn-
> PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+
> IbPresDis- LLActRep+
> [ 2.126754] pcieport 0000:00:1d.0: PME: Signaling with IRQ 123
> [ 2.133946] ACPI: \_SB_.CP00: Found 3 idle states
> [ 2.139708] BUG: kernel NULL pointer dereference, address: 000000000000012b
> [ 2.140704] #PF: supervisor read access in kernel mode
> [ 2.140704] #PF: error_code(0x0000) - not-present page
> [ 2.140704] PGD 0 P4D 0
> [ 2.140704] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [ 2.140704] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U
> 5.13.0-rc1 #31
> [ 2.140704] Hardware name: Google Delbin/Delbin, BIOS
> Google_Delbin.13672.156.3 05/14/2021
> [ 2.140704] RIP: 0010:cpuidle_poll_time+0x9/0x6a
> [ 2.140704] Code: 44 00 00 85 f6 78 19 55 48 89 e5 48 8b 05 16 44
> 44 01 4c 8b 58 40 4d 85 db 5d 41 ff d3 66 90 00 c3 0f 1f 44 00 00 55
> 48 89 e5 <48> 8b 46 20 48 85 c0 75 56 4c 63 87 28 04 00 00 b8 24 f49
All code
========
0: 44 00 00 add %r8b,(%rax)
3: 85 f6 test %esi,%esi
5: 78 19 js 0x20
7: 55 push %rbp
8: 48 89 e5 mov %rsp,%rbp
b: 48 8b 05 16 44 44 01 mov 0x1444416(%rip),%rax # 0x1444428
12: 4c 8b 58 40 mov 0x40(%rax),%r11
16: 4d 85 db test %r11,%r11
19: 5d pop %rbp
1a: 41 ff d3 callq *%r11
1d: 66 90 xchg %ax,%ax
1f: 00 c3 add %al,%bl
21: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
26: 55 push %rbp
27: 48 89 e5 mov %rsp,%rbp
2a:* 48 8b 46 20 mov 0x20(%rsi),%rax <-- trapping instruction
2e: 48 85 c0 test %rax,%rax
31: 75 56 jne 0x89
33: 4c 63 87 28 04 00 00 movslq 0x428(%rdi),%r8
3a: b8 .byte 0xb8
3b: 24 49 and $0x49,%al
What does something like:
OBJ=vmlinux.o FUNC=0010:cpuidle_poll_time objdump -wdr $@ $OBJ | awk "/^\$/ { P=0; } /$FUNC[^>]*>:\$/ { P=1; O=strtonum(\"0x\" \$1); } { if (P) { o=strtonum(\"0x\" \$1); printf(\"%04x \", o-O); print \$0; } }"
look like for that build?
The 1d,1f instructions look exactly like what the alternative would've
written.
> [ 2.140704] RSP: 0000:ffffffff9cc03ea8 EFLAGS: 00010282
> [ 2.140704] RAX: 0000000000008e7d RBX: ffffffff9cc1c5fd RCX: 000000007f894e5a
> [ 2.140704] RDX: 000000007f894d4f RSI: 000000000000010b RDI: 0000000002fa1cf6
That said, your RSI is buggered, and 0x20(%rsi) rightfully blows up.
On Wed, Jun 02, 2021 at 06:56:51PM +0200, Peter Zijlstra wrote:
> On Wed, Jun 02, 2021 at 05:51:01PM +0200, Lukasz Majczak wrote:
> > Hi Peter,
> >
> > This patch seems to crash on Tigerlake platform (Chromebook delbin), I
> > got the following error:
> >
> > [ 2.103054] pcieport 0000:00:1c.0: PME: Signaling with IRQ 122
> > [ 2.110148] pcieport 0000:00:1c.0: pciehp: Slot #7 AttnBtn-
> > PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+
> > IbPresDis- LLActRep+
> > [ 2.126754] pcieport 0000:00:1d.0: PME: Signaling with IRQ 123
> > [ 2.133946] ACPI: \_SB_.CP00: Found 3 idle states
> > [ 2.139708] BUG: kernel NULL pointer dereference, address: 000000000000012b
> > [ 2.140704] #PF: supervisor read access in kernel mode
> > [ 2.140704] #PF: error_code(0x0000) - not-present page
> > [ 2.140704] PGD 0 P4D 0
> > [ 2.140704] Oops: 0000 [#1] PREEMPT SMP NOPTI
> > [ 2.140704] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U
> > 5.13.0-rc1 #31
> > [ 2.140704] Hardware name: Google Delbin/Delbin, BIOS
> > Google_Delbin.13672.156.3 05/14/2021
> > [ 2.140704] RIP: 0010:cpuidle_poll_time+0x9/0x6a
> > [ 2.140704] Code: 44 00 00 85 f6 78 19 55 48 89 e5 48 8b 05 16 44
> > 44 01 4c 8b 58 40 4d 85 db 5d 41 ff d3 66 90 00 c3 0f 1f 44 00 00 55
> > 48 89 e5 <48> 8b 46 20 48 85 c0 75 56 4c 63 87 28 04 00 00 b8 24 f49
>
> All code
> ========
> 0: 44 00 00 add %r8b,(%rax)
> 3: 85 f6 test %esi,%esi
> 5: 78 19 js 0x20
> 7: 55 push %rbp
> 8: 48 89 e5 mov %rsp,%rbp
> b: 48 8b 05 16 44 44 01 mov 0x1444416(%rip),%rax # 0x1444428
> 12: 4c 8b 58 40 mov 0x40(%rax),%r11
> 16: 4d 85 db test %r11,%r11
> 19: 5d pop %rbp
> 1a: 41 ff d3 callq *%r11
> 1d: 66 90 xchg %ax,%ax
> 1f: 00 c3 add %al,%bl
> 21: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
> 26: 55 push %rbp
> 27: 48 89 e5 mov %rsp,%rbp
> 2a:* 48 8b 46 20 mov 0x20(%rsi),%rax <-- trapping instruction
> 2e: 48 85 c0 test %rax,%rax
> 31: 75 56 jne 0x89
> 33: 4c 63 87 28 04 00 00 movslq 0x428(%rdi),%r8
> 3a: b8 .byte 0xb8
> 3b: 24 49 and $0x49,%al
>
> What does something like:
>
> OBJ=vmlinux.o FUNC=0010:cpuidle_poll_time objdump -wdr $@ $OBJ | awk "/^\$/ { P=0; } /$FUNC[^>]*>:\$/ { P=1; O=strtonum(\"0x\" \$1); } { if (P) { o=strtonum(\"0x\" \$1); printf(\"%04x \", o-O); print \$0; } }"
>
> look like for that build?
I'm being daft; we build debug stuff for this.
Can you please do something like:
$ touch drivers/cpuidle/cpuidle.c
$ OBJTOOL_ARGS="--backup" make drivers/cpuidle/cpuidle.o
and send me both: drivers/cpuidle/cpuidle.o{,.orig}
On Wed, Jun 02, 2021 at 05:51:01PM +0200, Lukasz Majczak wrote:
> Hi Peter,
>
> This patch seems to crash on Tigerlake platform (Chromebook delbin), I
> got the following error:
>
> [ 2.103054] pcieport 0000:00:1c.0: PME: Signaling with IRQ 122
> [ 2.110148] pcieport 0000:00:1c.0: pciehp: Slot #7 AttnBtn-
> PwrCtrl- MRL- AttnInd- PwrInd- HotPlug+ Surprise+ Interlock- NoCompl+
> IbPresDis- LLActRep+
> [ 2.126754] pcieport 0000:00:1d.0: PME: Signaling with IRQ 123
> [ 2.133946] ACPI: \_SB_.CP00: Found 3 idle states
> [ 2.139708] BUG: kernel NULL pointer dereference, address: 000000000000012b
> [ 2.140704] #PF: supervisor read access in kernel mode
> [ 2.140704] #PF: error_code(0x0000) - not-present page
> [ 2.140704] PGD 0 P4D 0
> [ 2.140704] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [ 2.140704] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U
> 5.13.0-rc1 #31
> [ 2.140704] Hardware name: Google Delbin/Delbin, BIOS
> Google_Delbin.13672.156.3 05/14/2021
> [ 2.140704] RIP: 0010:cpuidle_poll_time+0x9/0x6a
> [ 2.140704] Code: 44 00 00 85 f6 78 19 55 48 89 e5 48 8b 05 16 44
> 44 01 4c 8b 58 40 4d 85 db 5d 41 ff d3 66 90 00 c3 0f 1f 44 00 00 55
> 48 89 e5 <48> 8b 46 20 48 85 c0 75 56 4c 63 87 28 04 00 00 b8 24 f49
> [ 2.140704] RSP: 0000:ffffffff9cc03ea8 EFLAGS: 00010282
> [ 2.140704] RAX: 0000000000008e7d RBX: ffffffff9cc1c5fd RCX: 000000007f894e5a
> [ 2.140704] RDX: 000000007f894d4f RSI: 000000000000010b RDI: 0000000002fa1cf6
> [ 2.140704] RBP: ffffffff9cc03ea8 R08: 0000000000000000 R09: 00000000ca948246
> [ 2.140704] R10: 0000000000000000 R11: ffffffff9bf132cb R12: 0000000000000003
> [ 2.140704] R13: ffffbbfdffc21960 R14: 0000000000000000 R15: ffffffff9cdba638
> [ 2.140704] FS: 0000000000000000(0000) GS:ffff928280000000(0000)
> knlGS:0000000000000000
> [ 2.140704] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 2.140704] CR2: 000000000000012b CR3: 000000027e414001 CR4: 0000000000770ef0
> [ 2.140704] PKRU: 55555554
> [ 2.140704] Call Trace:
> [ 2.140704] do_idle+0x175/0x1f6
> [ 2.140704] cpu_startup_entry+0x1d/0x1f
> [ 2.140704] start_kernel+0x3be/0x420
> [ 2.140704] secondary_startup_64_no_verify+0xb0/0xbb
Assuming I'm looking at the right code, this is weird.
cpuidle_poll_time()'s only caller is poll_idle(), which isn't even
listed in the stack trace. Maybe the function before
cpuidle_poll_time() fell through into it somehow. Or execution got
otherwise hosed. That would also explain the bad function argument.
In addition to the data Peter requested, it would also be interesting to
see the disassembly of do_idle() with objdump -dr, to see which function
got called before it went off the rails.
--
Josh
(Manually replying to https://lore.kernel.org/lkml/CAFJ_xbq06nfaEWtVNLtg7XCJrQeQ9wCs4Zsoi5Y_HP3Dx0iTRA@mail.gmail.com/)
Hi Peter,
We're also tracking 2 recent regressions that look like they've come from this
patch.
https://github.com/ClangBuiltLinux/linux/issues/1384
https://github.com/ClangBuiltLinux/linux/issues/1388
(Both in linux-next at the moment).
The first, it looks like a boot failure. The second is a warning from the
linker on a kernel module; even readelf seems unhappy with the results of the
output from objtool.
I can more easily reproduce the latter, so I'm working on getting a smaller
reproducer. I'll let you know when I have it, but wanted to report it ASAP.
On Fri, Jun 4, 2021 at 1:50 PM Nick Desaulniers <[email protected]> wrote:
>
> (Manually replying to https://lore.kernel.org/lkml/CAFJ_xbq06nfaEWtVNLtg7XCJrQeQ9wCs4Zsoi5Y_HP3Dx0iTRA@mail.gmail.com/)
>
> Hi Peter,
> We're also tracking 2 recent regressions that look like they've come from this
> patch.
>
> https://github.com/ClangBuiltLinux/linux/issues/1384
> https://github.com/ClangBuiltLinux/linux/issues/1388
>
> (Both in linux-next at the moment).
>
> The first, it looks like a boot failure. The second is a warning from the
> linker on a kernel module; even readelf seems unhappy with the results of the
> output from objtool.
>
> I can more easily reproduce the latter, so I'm working on getting a smaller
> reproducer. I'll let you know when I have it, but wanted to report it ASAP.
Sent a pretty big attachment, privately. I was able to capture the
before/after with:
$ $ echo 'CONFIG_GCOV_KERNEL=n
CONFIG_KASAN=n
CONFIG_LTO_CLANG_THIN=y' >allmod.config
$ OBJTOOL_ARGS="--backup" make -kj"$(nproc)" KCONFIG_ALLCONFIG=1
LLVM=1 LLVM_IAS=1 all
It looks like
$ ./tools/objtool/objtool orc generate --module --no-fp
--no-unreachable --retpoline --uaccess --mcount
drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o; ld.lld -r -m elf_x86_64
-plugin-opt=-code-model=kernel -plugin-opt=-stack-alignment=8
--thinlto-cache-dir=.thinlto-cache -mllvm -import-instr-limit=5
-plugin-opt=-warn-stack-size=2048 --build-id=sha1 -T
scripts/module.lds -o drivers/gpu/drm/amd/amdgpu/amdgpu.ko
drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o
drivers/gpu/drm/amd/amdgpu/amdgpu.mod.o
is producing the linker error:
ld.lld: error: drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o:
SHT_SYMTAB_SHNDX has 79581 entries, but the symbol table associated
has 79582
Readelf having issues with the output:
$ readelf -s amdgpu.lto.o.orig
<works fine>
$ readelf -s amdgpu.lto.o
readelf: Error: Reading 73014451695 bytes extends past end of file for
string table
$ llvm-readelf -s amdgpu.lto.o
llvm-readelf: error: 'amdgpu.lto.o': unable to continue dumping, the
file is corrupt: section table goes past the end of file
`file` having issues:
$ file drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o
drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o: ELF 64-bit LSB relocatable,
x86-64, version 1 (SYSV), no section header
for comparison:
$ file ./drivers/spi/spi-ath79.lto.o
./drivers/spi/spi-ath79.lto.o: ELF 64-bit LSB relocatable, x86-64,
version 1 (SYSV), not stripped
--
Thanks,
~Nick Desaulniers
On 2021-06-04, 'Nick Desaulniers' via Clang Built Linux wrote:
>On Fri, Jun 4, 2021 at 1:50 PM Nick Desaulniers <[email protected]> wrote:
>>
>> (Manually replying to https://lore.kernel.org/lkml/CAFJ_xbq06nfaEWtVNLtg7XCJrQeQ9wCs4Zsoi5Y_HP3Dx0iTRA@mail.gmail.com/)
>>
>> Hi Peter,
>> We're also tracking 2 recent regressions that look like they've come from this
>> patch.
>>
>> https://github.com/ClangBuiltLinux/linux/issues/1384
>> https://github.com/ClangBuiltLinux/linux/issues/1388
>>
>> (Both in linux-next at the moment).
>>
>> The first, it looks like a boot failure. The second is a warning from the
>> linker on a kernel module; even readelf seems unhappy with the results of the
>> output from objtool.
>>
>> I can more easily reproduce the latter, so I'm working on getting a smaller
>> reproducer. I'll let you know when I have it, but wanted to report it ASAP.
>
>Sent a pretty big attachment, privately. I was able to capture the
>before/after with:
>
>$ $ echo 'CONFIG_GCOV_KERNEL=n
>CONFIG_KASAN=n
>CONFIG_LTO_CLANG_THIN=y' >allmod.config
>$ OBJTOOL_ARGS="--backup" make -kj"$(nproc)" KCONFIG_ALLCONFIG=1
>LLVM=1 LLVM_IAS=1 all
>
>It looks like
>
>$ ./tools/objtool/objtool orc generate --module --no-fp
>--no-unreachable --retpoline --uaccess --mcount
>drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o; ld.lld -r -m elf_x86_64
>-plugin-opt=-code-model=kernel -plugin-opt=-stack-alignment=8
>--thinlto-cache-dir=.thinlto-cache -mllvm -import-instr-limit=5
>-plugin-opt=-warn-stack-size=2048 --build-id=sha1 -T
>scripts/module.lds -o drivers/gpu/drm/amd/amdgpu/amdgpu.ko
>drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o
>drivers/gpu/drm/amd/amdgpu/amdgpu.mod.o
>
>is producing the linker error:
>
>ld.lld: error: drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o:
>SHT_SYMTAB_SHNDX has 79581 entries, but the symbol table associated
>has 79582
>
>Readelf having issues with the output:
>$ readelf -s amdgpu.lto.o.orig
><works fine>
>$ readelf -s amdgpu.lto.o
>readelf: Error: Reading 73014451695 bytes extends past end of file for
>string table
>$ llvm-readelf -s amdgpu.lto.o
>llvm-readelf: error: 'amdgpu.lto.o': unable to continue dumping, the
>file is corrupt: section table goes past the end of file
>
>`file` having issues:
>$ file drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o
>drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o: ELF 64-bit LSB relocatable,
>x86-64, version 1 (SYSV), no section header
>
>for comparison:
>$ file ./drivers/spi/spi-ath79.lto.o
>./drivers/spi/spi-ath79.lto.o: ELF 64-bit LSB relocatable, x86-64,
>version 1 (SYSV), not stripped
tools/objtool/elf.c:elf_add_symbol may not update .symtab_shndx .
Speaking of llvm-objcopy, it finalizes the content of .symtab_shndx when .symtab
is finalized. objtool may want to adopt a similar approach.
read_symbols searches for the section ".symtab_shndx". It'd be better to
use the section type SHT_SYMTAB_SHNDX.
On Fri, Jun 04, 2021 at 04:50:46PM -0700, Fangrui Song wrote:
> On 2021-06-04, 'Nick Desaulniers' via Clang Built Linux wrote:
> > is producing the linker error:
> >
> > ld.lld: error: drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o:
> > SHT_SYMTAB_SHNDX has 79581 entries, but the symbol table associated
> > has 79582
> >
> > Readelf having issues with the output:
> > $ readelf -s amdgpu.lto.o.orig
> > <works fine>
> > $ readelf -s amdgpu.lto.o
> > readelf: Error: Reading 73014451695 bytes extends past end of file for
> > string table
> > $ llvm-readelf -s amdgpu.lto.o
> > llvm-readelf: error: 'amdgpu.lto.o': unable to continue dumping, the
> > file is corrupt: section table goes past the end of file
> >
> tools/objtool/elf.c:elf_add_symbol may not update .symtab_shndx .
> Speaking of llvm-objcopy, it finalizes the content of .symtab_shndx when .symtab
> is finalized. objtool may want to adopt a similar approach.
>
> read_symbols searches for the section ".symtab_shndx". It'd be better to
> use the section type SHT_SYMTAB_SHNDX.
I think you've absolutely nailed it; but would you have more information
or a code reference to what you're speaking about? My complete ELF
and libelf knowledge is very limited and as demonstrated here, I'm not
at all sure how all that extended index stuff is supposed to work.
On Sat, Jun 5, 2021 at 3:39 AM Peter Zijlstra <[email protected]> wrote:
>
> On Fri, Jun 04, 2021 at 04:50:46PM -0700, Fangrui Song wrote:
> > On 2021-06-04, 'Nick Desaulniers' via Clang Built Linux wrote:
>
> > > is producing the linker error:
> > >
> > > ld.lld: error: drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o:
> > > SHT_SYMTAB_SHNDX has 79581 entries, but the symbol table associated
> > > has 79582
> > >
> > > Readelf having issues with the output:
> > > $ readelf -s amdgpu.lto.o.orig
> > > <works fine>
> > > $ readelf -s amdgpu.lto.o
> > > readelf: Error: Reading 73014451695 bytes extends past end of file for
> > > string table
> > > $ llvm-readelf -s amdgpu.lto.o
> > > llvm-readelf: error: 'amdgpu.lto.o': unable to continue dumping, the
> > > file is corrupt: section table goes past the end of file
> > >
>
> > tools/objtool/elf.c:elf_add_symbol may not update .symtab_shndx .
> > Speaking of llvm-objcopy, it finalizes the content of .symtab_shndx when .symtab
> > is finalized. objtool may want to adopt a similar approach.
> >
> > read_symbols searches for the section ".symtab_shndx". It'd be better to
> > use the section type SHT_SYMTAB_SHNDX.
>
> I think you've absolutely nailed it; but would you have more information
> or a code reference to what you're speaking about? My complete ELF
> and libelf knowledge is very limited and as demonstrated here, I'm not
> at all sure how all that extended index stuff is supposed to work.
The section index field of an Elf{32,64}_Sym (st_shndx) is 16-bit, so
it cannot represent a section index greater than 0xffff.
ELF actually reserves values in 0xff00~0xff00 for other purposes, so
st_shndx cannot represent a section whose index is greater or equal to
0xff00.
To overcome the 16-bit section index limitation, .symtab_shndx was designed.
http://www.sco.com/developers/gabi/latest/ch4.symtab.html says
> SHN_XINDEX
> This value is an escape value. It indicates that the symbol refers to a specific location within a section, but that the section header index for that section is too large to be represented directly in the symbol table entry. The actual section header index is found in the associated SHT_SYMTAB_SHNDX section. The entries in that section correspond one to one with the entries in the symbol table. Only those entries in SHT_SYMTAB_SHNDX that correspond to symbol table entries with SHN_XINDEX will hold valid section header indexes; all other entries will have value 0.
You may use https://github.com/llvm/llvm-project/blob/main/llvm/tools/llvm-objcopy/ELF/Object.cpp#L843
as a reference.
On Sat, Jun 05, 2021 at 06:58:39PM -0700, Fāng-ruì Sòng wrote:
> On Sat, Jun 5, 2021 at 3:39 AM Peter Zijlstra <[email protected]> wrote:
> > I think you've absolutely nailed it; but would you have more information
> > or a code reference to what you're speaking about? My complete ELF
> > and libelf knowledge is very limited and as demonstrated here, I'm not
> > at all sure how all that extended index stuff is supposed to work.
>
> The section index field of an Elf{32,64}_Sym (st_shndx) is 16-bit, so
> it cannot represent a section index greater than 0xffff.
> ELF actually reserves values in 0xff00~0xff00 for other purposes, so
> st_shndx cannot represent a section whose index is greater or equal to
> 0xff00.
Right, that's about as far as I got, but never could find details on how
the extension worked in detail, and I clearly muddled it :/
> To overcome the 16-bit section index limitation, .symtab_shndx was designed.
>
> http://www.sco.com/developers/gabi/latest/ch4.symtab.html says
>
> > SHN_XINDEX This value is an escape value. It indicates that the
> > symbol refers to a specific location within a section, but that the
> > section header index for that section is too large to be represented
> > directly in the symbol table entry. The actual section header index
> > is found in the associated SHT_SYMTAB_SHNDX section. The entries in
> > that section correspond one to one with the entries in the symbol
> > table. Only those entries in SHT_SYMTAB_SHNDX that correspond to
> > symbol table entries with SHN_XINDEX will hold valid section header
> > indexes; all other entries will have value 0.
>
> You may use https://github.com/llvm/llvm-project/blob/main/llvm/tools/llvm-objcopy/ELF/Object.cpp#L843
> as a reference.
Excellent, lemme go read up and attempt to fix this.
On Mon, Jun 07, 2021 at 09:56:48AM +0200, Peter Zijlstra wrote:
> On Sat, Jun 05, 2021 at 06:58:39PM -0700, Fāng-ruì Sòng wrote:
> > On Sat, Jun 5, 2021 at 3:39 AM Peter Zijlstra <[email protected]> wrote:
>
> > > I think you've absolutely nailed it; but would you have more information
> > > or a code reference to what you're speaking about? My complete ELF
> > > and libelf knowledge is very limited and as demonstrated here, I'm not
> > > at all sure how all that extended index stuff is supposed to work.
> >
> > The section index field of an Elf{32,64}_Sym (st_shndx) is 16-bit, so
> > it cannot represent a section index greater than 0xffff.
> > ELF actually reserves values in 0xff00~0xff00 for other purposes, so
> > st_shndx cannot represent a section whose index is greater or equal to
> > 0xff00.
>
> Right, that's about as far as I got, but never could find details on how
> the extension worked in detail, and I clearly muddled it :/
OK, so I'm all confused again...
So a .symtab entry has:
st_name -- strtab offset for the name string
st_value -- where this symbol lives
st_size -- size of symbol in bytes
st_shndx -- section index to interpret the @st_value above
st_info -- type+bind
st_other -- visibility
The thing is, we're adding UNDEF symbols, for the linker to resolve.
UNDEF has:
st_value := 0
st_size := 0
st_shndx := 0
st_info := GLOBAL + NOTYPE
st_other := 0
Per that, sh_shndx isn't >= SHN_LORESERVE, and I figured we all good.
Is the problem that .symtab_shndx is expected to contain the exact same
number of entries as .symtab? And I'm adding to .symtab and not to
.symtab_shndx, hence getting them out of sync?
Let me try adding 0s to .symtab_shndx. See if that makes readelf
happier.
On Mon, Jun 07, 2021 at 11:22:11AM +0200, Peter Zijlstra wrote:
> On Mon, Jun 07, 2021 at 09:56:48AM +0200, Peter Zijlstra wrote:
> > On Sat, Jun 05, 2021 at 06:58:39PM -0700, Fāng-ruì Sòng wrote:
> > > On Sat, Jun 5, 2021 at 3:39 AM Peter Zijlstra <[email protected]> wrote:
> >
> > > > I think you've absolutely nailed it; but would you have more information
> > > > or a code reference to what you're speaking about? My complete ELF
> > > > and libelf knowledge is very limited and as demonstrated here, I'm not
> > > > at all sure how all that extended index stuff is supposed to work.
> > >
> > > The section index field of an Elf{32,64}_Sym (st_shndx) is 16-bit, so
> > > it cannot represent a section index greater than 0xffff.
> > > ELF actually reserves values in 0xff00~0xff00 for other purposes, so
> > > st_shndx cannot represent a section whose index is greater or equal to
> > > 0xff00.
> >
> > Right, that's about as far as I got, but never could find details on how
> > the extension worked in detail, and I clearly muddled it :/
>
> OK, so I'm all confused again...
>
> So a .symtab entry has:
>
> st_name -- strtab offset for the name string
> st_value -- where this symbol lives
> st_size -- size of symbol in bytes
> st_shndx -- section index to interpret the @st_value above
> st_info -- type+bind
> st_other -- visibility
>
> The thing is, we're adding UNDEF symbols, for the linker to resolve.
> UNDEF has:
>
> st_value := 0
> st_size := 0
> st_shndx := 0
> st_info := GLOBAL + NOTYPE
> st_other := 0
>
> Per that, sh_shndx isn't >= SHN_LORESERVE, and I figured we all good.
>
>
> Is the problem that .symtab_shndx is expected to contain the exact same
> number of entries as .symtab? And I'm adding to .symtab and not to
> .symtab_shndx, hence getting them out of sync?
>
> Let me try adding 0s to .symtab_shndx. See if that makes readelf
> happier.
That does indeed seem to do the trick. Bit daft if you ask me, anybody
reading that file ought to have a handy bucket of 0s available, but
whatever.
---
tools/objtool/elf.c | 25 ++++++++++++++++++++++++-
1 file changed, 24 insertions(+), 1 deletion(-)
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 743c2e9d0f56..41bca1d13d8e 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -717,7 +717,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
{
- struct section *symtab;
+ struct section *symtab, *symtab_shndx;
struct symbol *sym;
Elf_Data *data;
Elf_Scn *s;
@@ -769,6 +769,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
symtab->len += data->d_size;
symtab->changed = true;
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx) {
+ s = elf_getscn(elf->elf, symtab_shndx->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return NULL;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return NULL;
+ }
+
+ data->d_buf = &sym->sym.st_size; /* conveniently 0 */
+ data->d_size = sizeof(Elf32_Word);
+ data->d_align = 4;
+ data->d_type = ELF_T_WORD;
+
+ symtab_shndx->len += 4;
+ symtab_shndx->changed = true;
+ }
+
sym->sec = find_section_by_index(elf, 0);
elf_add_symbol(elf, sym);
On 2021-06-07, Peter Zijlstra wrote:
>On Mon, Jun 07, 2021 at 11:22:11AM +0200, Peter Zijlstra wrote:
>> On Mon, Jun 07, 2021 at 09:56:48AM +0200, Peter Zijlstra wrote:
>> > On Sat, Jun 05, 2021 at 06:58:39PM -0700, Fāng-ruì Sòng wrote:
>> > > On Sat, Jun 5, 2021 at 3:39 AM Peter Zijlstra <[email protected]> wrote:
>> >
>> > > > I think you've absolutely nailed it; but would you have more information
>> > > > or a code reference to what you're speaking about? My complete ELF
>> > > > and libelf knowledge is very limited and as demonstrated here, I'm not
>> > > > at all sure how all that extended index stuff is supposed to work.
>> > >
>> > > The section index field of an Elf{32,64}_Sym (st_shndx) is 16-bit, so
>> > > it cannot represent a section index greater than 0xffff.
>> > > ELF actually reserves values in 0xff00~0xff00 for other purposes, so
>> > > st_shndx cannot represent a section whose index is greater or equal to
>> > > 0xff00.
>> >
>> > Right, that's about as far as I got, but never could find details on how
>> > the extension worked in detail, and I clearly muddled it :/
>>
>> OK, so I'm all confused again...
>>
>> So a .symtab entry has:
>>
>> st_name -- strtab offset for the name string
>> st_value -- where this symbol lives
>> st_size -- size of symbol in bytes
>> st_shndx -- section index to interpret the @st_value above
>> st_info -- type+bind
>> st_other -- visibility
>>
>> The thing is, we're adding UNDEF symbols, for the linker to resolve.
>> UNDEF has:
>>
>> st_value := 0
>> st_size := 0
>> st_shndx := 0
>> st_info := GLOBAL + NOTYPE
>> st_other := 0
>>
>> Per that, sh_shndx isn't >= SHN_LORESERVE, and I figured we all good.
>>
>>
>> Is the problem that .symtab_shndx is expected to contain the exact same
>> number of entries as .symtab? And I'm adding to .symtab and not to
>> .symtab_shndx, hence getting them out of sync?
Yes. http://www.sco.com/developers/gabi/latest/ch4.sheader.html says
"Each value corresponds one to one with a symbol table entry and appear in the same order as those entries."
>> Let me try adding 0s to .symtab_shndx. See if that makes readelf
>> happier.
>
>That does indeed seem to do the trick. Bit daft if you ask me, anybody
>reading that file ought to have a handy bucket of 0s available, but
>whatever.
Does the representation use the section index directly? (sym->sym.st_shndx)
This can be fragile when the number of sections changes..., e.g. elf_add_section
So in llvm-objcopy's representation, the section index is represented as
the section object.
struct Symbol {
...
SectionBase *DefinedIn = nullptr;
...
};
In the writer stage, sections are assigned 32-bit indexes and the writer
knows that an SHN_XINDEX for a symbol is needed if the index is >= 0xff00.
>---
> tools/objtool/elf.c | 25 ++++++++++++++++++++++++-
> 1 file changed, 24 insertions(+), 1 deletion(-)
>
>diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
>index 743c2e9d0f56..41bca1d13d8e 100644
>--- a/tools/objtool/elf.c
>+++ b/tools/objtool/elf.c
>@@ -717,7 +717,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
>
> struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
> {
>- struct section *symtab;
>+ struct section *symtab, *symtab_shndx;
> struct symbol *sym;
> Elf_Data *data;
> Elf_Scn *s;
>@@ -769,6 +769,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
> symtab->len += data->d_size;
> symtab->changed = true;
>
>+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
>+ if (symtab_shndx) {
>+ s = elf_getscn(elf->elf, symtab_shndx->idx);
>+ if (!s) {
>+ WARN_ELF("elf_getscn");
>+ return NULL;
>+ }
>+
>+ data = elf_newdata(s);
>+ if (!data) {
>+ WARN_ELF("elf_newdata");
>+ return NULL;
>+ }
>+
>+ data->d_buf = &sym->sym.st_size; /* conveniently 0 */
>+ data->d_size = sizeof(Elf32_Word);
>+ data->d_align = 4;
>+ data->d_type = ELF_T_WORD;
>+
>+ symtab_shndx->len += 4;
>+ symtab_shndx->changed = true;
>+ }
>+
> sym->sec = find_section_by_index(elf, 0);
>
> elf_add_symbol(elf, sym);
On Sat, Jun 05, 2021 at 06:58:39PM -0700, Fāng-ruì Sòng wrote:
> You may use https://github.com/llvm/llvm-project/blob/main/llvm/tools/llvm-objcopy/ELF/Object.cpp#L843
> as a reference.
BTW, Error::success(), is that a successfull error, or an erroneous
success? :-))
On Mon, Jun 07, 2021 at 10:23:11AM -0700, Fāng-ruì Sòng wrote:
> On 2021-06-07, Peter Zijlstra wrote:
> > That does indeed seem to do the trick. Bit daft if you ask me, anybody
> > reading that file ought to have a handy bucket of 0s available, but
> > whatever.
>
> Does the representation use the section index directly? (sym->sym.st_shndx)
> This can be fragile when the number of sections changes..., e.g. elf_add_section
No, things are supposed to use sym->sec, which is a pointer to our
struct section representation.
> So in llvm-objcopy's representation, the section index is represented as
> the section object.
>
> struct Symbol {
> ...
> SectionBase *DefinedIn = nullptr;
> ...
> };
Somewhat like that.
> In the writer stage, sections are assigned 32-bit indexes and the writer
> knows that an SHN_XINDEX for a symbol is needed if the index is >= 0xff00.
I think we only ever append sections, so pre-existing section numbers
stay correct. If libelf somehow does something else, we rely on it to
then keep the section numbers internally consistent.
And the only symbol write is this append of undef symbols, which are
always on section 0.
On Mon, Jun 7, 2021 at 11:19 AM Peter Zijlstra <[email protected]> wrote:
>
> On Sat, Jun 05, 2021 at 06:58:39PM -0700, Fāng-ruì Sòng wrote:
>
> > You may use https://github.com/llvm/llvm-project/blob/main/llvm/tools/llvm-objcopy/ELF/Object.cpp#L843
> > as a reference.
>
> BTW, Error::success(), is that a successfull error, or an erroneous
> success? :-))
A success (no error). Error::success() is a factory member function.
Its purpose is to create an "unchecked" Error instance and require the
caller to explicitly check for the error state.
On Mon, Jun 07, 2021 at 11:27:27AM -0700, Fāng-ruì Sòng wrote:
> On Mon, Jun 7, 2021 at 11:19 AM Peter Zijlstra <[email protected]> wrote:
> >
> > On Sat, Jun 05, 2021 at 06:58:39PM -0700, Fāng-ruì Sòng wrote:
> >
> > > You may use https://github.com/llvm/llvm-project/blob/main/llvm/tools/llvm-objcopy/ELF/Object.cpp#L843
> > > as a reference.
> >
> > BTW, Error::success(), is that a successfull error, or an erroneous
> > success? :-))
>
> A success (no error). Error::success() is a factory member function.
> Its purpose is to create an "unchecked" Error instance and require the
> caller to explicitly check for the error state.
I got that (see the smily face), but it reads really weird when you're
not used to it.
On Mon, Jun 7, 2021 at 2:46 AM Peter Zijlstra <[email protected]> wrote:
>
Thanks, the below diff resolves the linker error reported in
https://github.com/ClangBuiltLinux/linux/issues/1388
Both readelf implementations seem happy with the results, too.
Tested-by: Nick Desaulniers <[email protected]>
Nathan,
Can you please test the below diff and see if that resolves your boot
issue reported in:
https://github.com/ClangBuiltLinux/linux/issues/1384
> ---
> tools/objtool/elf.c | 25 ++++++++++++++++++++++++-
> 1 file changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
> index 743c2e9d0f56..41bca1d13d8e 100644
> --- a/tools/objtool/elf.c
> +++ b/tools/objtool/elf.c
> @@ -717,7 +717,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
>
> struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
> {
> - struct section *symtab;
> + struct section *symtab, *symtab_shndx;
> struct symbol *sym;
> Elf_Data *data;
> Elf_Scn *s;
> @@ -769,6 +769,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
> symtab->len += data->d_size;
> symtab->changed = true;
>
> + symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
> + if (symtab_shndx) {
> + s = elf_getscn(elf->elf, symtab_shndx->idx);
> + if (!s) {
> + WARN_ELF("elf_getscn");
> + return NULL;
> + }
> +
> + data = elf_newdata(s);
> + if (!data) {
> + WARN_ELF("elf_newdata");
> + return NULL;
> + }
> +
> + data->d_buf = &sym->sym.st_size; /* conveniently 0 */
> + data->d_size = sizeof(Elf32_Word);
> + data->d_align = 4;
> + data->d_type = ELF_T_WORD;
> +
> + symtab_shndx->len += 4;
> + symtab_shndx->changed = true;
> + }
> +
> sym->sec = find_section_by_index(elf, 0);
>
> elf_add_symbol(elf, sym);
The only thing that's still different is that the `file` command still
prints "no section header."
$ find . -name \*.lto.o | xargs file | rev | cut -d , -f 1 | rev |
sort | uniq -c
1 no section header
8377 not stripped
1 too many section headers (33683)
1 too many section headers (50758)
$ file --version
file-5.39
That's drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o, fs/xfs/xfs.lto.o,
drivers/gpu/drm/i915/i915.lto.o, respectively. I'm not sure that's a
problem, yet, and whether 9bc0bb50727c8ac69fbb33fb937431cf3518ff37 is
even related yet; those might just be huge drivers and figured it was
reporting somewhere in case it ever comes up again. CONFIG_LTO
implies -ffunction-sections -fdata-sections, and
CONFIG_LD_DEAD_CODE_DATA_ELIMINATION explicitly sets those, too.
--
Thanks,
~Nick Desaulniers
On Mon, Jun 07, 2021 at 01:54:37PM -0700, Nick Desaulniers wrote:
> The only thing that's still different is that the `file` command still
> prints "no section header."
>
> $ find . -name \*.lto.o | xargs file | rev | cut -d , -f 1 | rev |
> sort | uniq -c
> 1 no section header
That's not due to objtool, is it?
$ file amdgpu.lto.o.orig
amdgpu.lto.o.orig: ELF 64-bit LSB relocatable, x86-64, version 1 (SYSV), no section header
On 6/7/2021 1:54 PM, 'Nick Desaulniers' via Clang Built Linux wrote:
> On Mon, Jun 7, 2021 at 2:46 AM Peter Zijlstra <[email protected]> wrote:
>>
>
> Thanks, the below diff resolves the linker error reported in
> https://github.com/ClangBuiltLinux/linux/issues/1388
>
> Both readelf implementations seem happy with the results, too.
>
> Tested-by: Nick Desaulniers <[email protected]>
>
> Nathan,
> Can you please test the below diff and see if that resolves your boot
> issue reported in:
> https://github.com/ClangBuiltLinux/linux/issues/1384
Unfortunately, it does not appear to resolve that issue.
$ git log -2 --decorate=no --oneline
eea6a9d6d277 Peter's fix
614124bea77e Linux 5.13-rc5
$ strings /mnt/c/Users/natec/Linux/kernel-investigation | grep microsoft
5.13.0-rc5-microsoft-standard-WSL2-00001-geea6a9d6d277
(nathan@archlinux-ax161) #3 SMP Tue Jun 8 09:46:19 MST 2021
My VM still never makes it to userspace.
>> ---
>> tools/objtool/elf.c | 25 ++++++++++++++++++++++++-
>> 1 file changed, 24 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
>> index 743c2e9d0f56..41bca1d13d8e 100644
>> --- a/tools/objtool/elf.c
>> +++ b/tools/objtool/elf.c
>> @@ -717,7 +717,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
>>
>> struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
>> {
>> - struct section *symtab;
>> + struct section *symtab, *symtab_shndx;
>> struct symbol *sym;
>> Elf_Data *data;
>> Elf_Scn *s;
>> @@ -769,6 +769,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
>> symtab->len += data->d_size;
>> symtab->changed = true;
>>
>> + symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
>> + if (symtab_shndx) {
>> + s = elf_getscn(elf->elf, symtab_shndx->idx);
>> + if (!s) {
>> + WARN_ELF("elf_getscn");
>> + return NULL;
>> + }
>> +
>> + data = elf_newdata(s);
>> + if (!data) {
>> + WARN_ELF("elf_newdata");
>> + return NULL;
>> + }
>> +
>> + data->d_buf = &sym->sym.st_size; /* conveniently 0 */
>> + data->d_size = sizeof(Elf32_Word);
>> + data->d_align = 4;
>> + data->d_type = ELF_T_WORD;
>> +
>> + symtab_shndx->len += 4;
>> + symtab_shndx->changed = true;
>> + }
>> +
>> sym->sec = find_section_by_index(elf, 0);
>>
>> elf_add_symbol(elf, sym);
>
>
> The only thing that's still different is that the `file` command still
> prints "no section header."
>
> $ find . -name \*.lto.o | xargs file | rev | cut -d , -f 1 | rev |
> sort | uniq -c
> 1 no section header
> 8377 not stripped
> 1 too many section headers (33683)
> 1 too many section headers (50758)
> $ file --version
> file-5.39
>
> That's drivers/gpu/drm/amd/amdgpu/amdgpu.lto.o, fs/xfs/xfs.lto.o,
> drivers/gpu/drm/i915/i915.lto.o, respectively. I'm not sure that's a
> problem, yet, and whether 9bc0bb50727c8ac69fbb33fb937431cf3518ff37 is
> even related yet; those might just be huge drivers and figured it was
> reporting somewhere in case it ever comes up again. CONFIG_LTO
> implies -ffunction-sections -fdata-sections, and
> CONFIG_LD_DEAD_CODE_DATA_ELIMINATION explicitly sets those, too.
>
On 6/8/2021 10:22 AM, Peter Zijlstra wrote:
> On Tue, Jun 08, 2021 at 09:58:03AM -0700, Nathan Chancellor wrote:
>> On 6/7/2021 1:54 PM, 'Nick Desaulniers' via Clang Built Linux wrote:
>>> On Mon, Jun 7, 2021 at 2:46 AM Peter Zijlstra <[email protected]> wrote:
>>>>
>>>
>>> Thanks, the below diff resolves the linker error reported in
>>> https://github.com/ClangBuiltLinux/linux/issues/1388
>>>
>>> Both readelf implementations seem happy with the results, too.
>>>
>>> Tested-by: Nick Desaulniers <[email protected]>
>>>
>>> Nathan,
>>> Can you please test the below diff and see if that resolves your boot
>>> issue reported in:
>>> https://github.com/ClangBuiltLinux/linux/issues/1384
>>
>> Unfortunately, it does not appear to resolve that issue.
>>
>> $ git log -2 --decorate=no --oneline
>> eea6a9d6d277 Peter's fix
>> 614124bea77e Linux 5.13-rc5
>>
>> $ strings /mnt/c/Users/natec/Linux/kernel-investigation | grep microsoft
>> 5.13.0-rc5-microsoft-standard-WSL2-00001-geea6a9d6d277
>> (nathan@archlinux-ax161) #3 SMP Tue Jun 8 09:46:19 MST 2021
>>
>> My VM still never makes it to userspace.
>
> Since it's a VM, can you use the gdb-stub to ask it where it's stuck?
>
Unfortunately, this is the VM provided by the Windows Subsystem for
Linux so examining it is nigh-impossible :/ I am considering bisecting
the transforms that objtool does to try and figure out the one that
causes the machine to fail to boot or try to reproduce in a different
hypervisor, unless you have any other ideas.
Cheers,
Nathan
On Tue, Jun 08, 2021 at 10:29:56AM -0700, Nathan Chancellor wrote:
> On 6/8/2021 10:22 AM, Peter Zijlstra wrote:
> > Since it's a VM, can you use the gdb-stub to ask it where it's stuck?
> >
>
> Unfortunately, this is the VM provided by the Windows Subsystem for Linux so
> examining it is nigh-impossible :/ I am considering bisecting the transforms
> that objtool does to try and figure out the one that causes the machine to
> fail to boot or try to reproduce in a different hypervisor, unless you have
> any other ideas.
Does breaking Windows earn points similar to breaking the binary
drivers? :-) :-)
The below should kill this latest transform and would quickly confirm if
the that is causing your problem. If that's not it, what was your last
known working version?
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e5947fbb9e7a..d0f231b9c5a1 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1857,10 +1857,10 @@ static int decode_sections(struct objtool_file *file)
* Must be after add_special_section_alts(), since this will emit
* alternatives. Must be after add_{jump,call}_destination(), since
* those create the call insn lists.
- */
ret = arch_rewrite_retpolines(file);
if (ret)
return ret;
+ */
return 0;
}
On Tue, Jun 8, 2021 at 10:30 AM Nathan Chancellor <[email protected]> wrote:
>
> On 6/8/2021 10:22 AM, Peter Zijlstra wrote:
> > On Tue, Jun 08, 2021 at 09:58:03AM -0700, Nathan Chancellor wrote:
> >> On 6/7/2021 1:54 PM, 'Nick Desaulniers' via Clang Built Linux wrote:
> >>> Nathan,
> >>> Can you please test the below diff and see if that resolves your boot
> >>> issue reported in:
> >>> https://github.com/ClangBuiltLinux/linux/issues/1384
> >>
> >> Unfortunately, it does not appear to resolve that issue.
> >>
> >> $ git log -2 --decorate=no --oneline
> >> eea6a9d6d277 Peter's fix
> >> 614124bea77e Linux 5.13-rc5
> >>
> >> $ strings /mnt/c/Users/natec/Linux/kernel-investigation | grep microsoft
> >> 5.13.0-rc5-microsoft-standard-WSL2-00001-geea6a9d6d277
> >> (nathan@archlinux-ax161) #3 SMP Tue Jun 8 09:46:19 MST 2021
> >>
> >> My VM still never makes it to userspace.
> >
> > Since it's a VM, can you use the gdb-stub to ask it where it's stuck?
> >
>
> Unfortunately, this is the VM provided by the Windows Subsystem for
> Linux so examining it is nigh-impossible :/ I am considering bisecting
> the transforms that objtool does to try and figure out the one that
> causes the machine to fail to boot or try to reproduce in a different
> hypervisor, unless you have any other ideas.
Assuming this is an optimization and not required to boot/run; you
could test that quickly by putting a return statement as the first
statement in the list_for_each_entry loop in arch_rewrite_retpolines.
If that works, you could instead use a counter to try to see which
symbol is bad; once you bisect a counter value where things start/stop
booting, you could try to print the corresponding symbol (ie `name`).
(Optimization Fuel) (Sorry if any of that is unclear, let's follow up
off thread if so). Maybe that symbol will give us further clues? I
think that would tell us whether it's a problematic jump vs call, and
via which register.
--
Thanks,
~Nick Desaulniers
On Tue, Jun 08, 2021 at 09:58:03AM -0700, Nathan Chancellor wrote:
> On 6/7/2021 1:54 PM, 'Nick Desaulniers' via Clang Built Linux wrote:
> > On Mon, Jun 7, 2021 at 2:46 AM Peter Zijlstra <[email protected]> wrote:
> > >
> >
> > Thanks, the below diff resolves the linker error reported in
> > https://github.com/ClangBuiltLinux/linux/issues/1388
> >
> > Both readelf implementations seem happy with the results, too.
> >
> > Tested-by: Nick Desaulniers <[email protected]>
> >
> > Nathan,
> > Can you please test the below diff and see if that resolves your boot
> > issue reported in:
> > https://github.com/ClangBuiltLinux/linux/issues/1384
>
> Unfortunately, it does not appear to resolve that issue.
>
> $ git log -2 --decorate=no --oneline
> eea6a9d6d277 Peter's fix
> 614124bea77e Linux 5.13-rc5
>
> $ strings /mnt/c/Users/natec/Linux/kernel-investigation | grep microsoft
> 5.13.0-rc5-microsoft-standard-WSL2-00001-geea6a9d6d277
> (nathan@archlinux-ax161) #3 SMP Tue Jun 8 09:46:19 MST 2021
>
> My VM still never makes it to userspace.
Since it's a VM, can you use the gdb-stub to ask it where it's stuck?
On 6/8/2021 11:17 AM, Peter Zijlstra wrote:
> On Tue, Jun 08, 2021 at 10:29:56AM -0700, Nathan Chancellor wrote:
>> Unfortunately, this is the VM provided by the Windows Subsystem for Linux so
>> examining it is nigh-impossible :/ I am considering bisecting the transforms
>> that objtool does to try and figure out the one that causes the machine to
>> fail to boot or try to reproduce in a different hypervisor, unless you have
>> any other ideas.
>
> Does breaking Windows earn points similar to breaking the binary
> drivers? :-) :-)
:)
> The below should kill this latest transform and would quickly confirm if
> the that is causing your problem. If that's not it, what was your last
> known working version?
Yes, that diff gets me back to booting. I will see if I can figure out
the exact rewrite that blows everything up.
> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
> index e5947fbb9e7a..d0f231b9c5a1 100644
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -1857,10 +1857,10 @@ static int decode_sections(struct objtool_file *file)
> * Must be after add_special_section_alts(), since this will emit
> * alternatives. Must be after add_{jump,call}_destination(), since
> * those create the call insn lists.
> - */
> ret = arch_rewrite_retpolines(file);
> if (ret)
> return ret;
> + */
>
> return 0;
> }
>
Cheers,
Nathan
On 6/9/2021 8:08 AM, Peter Zijlstra wrote:
> On Wed, Jun 09, 2021 at 02:23:28PM +0200, Lukasz Majczak wrote:
>> śr., 9 cze 2021 o 09:20 Peter Zijlstra <[email protected]> napisał(a):
>>>
>>> On Wed, Jun 09, 2021 at 09:11:18AM +0200, Lukasz Majczak wrote:
>>>
>>>> I'm sorry I was on vacation last week - do you still need the requested debugs?
>>>
>>> If the patch here:
>>>
>>> https://lkml.kernel.org/r/YL3q1qFO9QIRL/[email protected]
>>>
>>> does not fix things for you (don't think it actually will), then yes,
>>> please send me the information requested.
>>
>> Ok, it didn't help. Peter, Josh I have sent you a private email with
>> requested information.
>
> OK, I think I've found it. Check this one:
>
> 5d5: 0f 85 00 00 00 00 jne 5db <cpuidle_reflect+0x22> 5d7: R_X86_64_PLT32 __x86_indirect_thunk_r11-0x4
>
>
> +Relocation section '.rela.altinstructions' at offset 0 contains 14 entries:
> + Offset Info Type Symbol's Value Symbol's Name + Addend
>
> +0000000000000018 0000000200000002 R_X86_64_PC32 0000000000000000 .text + 5d5
> +000000000000001c 0000009200000002 R_X86_64_PC32 0000000000000000 __x86_indirect_alt_call_r11 + 0
>
> Apparently we get conditional branches to retpoline thunks and objtool
> completely messes that up. I'm betting this also explains the problems
> Nathan is having.
Yes, the below patch gets my kernel back to booting so it seems the root
cause is the same.
> *groan*,.. not sure what to do about this, except return to having
> objtool generate code, which everybody hated on. For now I'll make it
> skip the conditional branches.
>
> I wonder if the compiler will also generate conditional tail calls, and
> what that does with static_call... now I have to check all that.
>
> ---
Tested-by: Nathan Chancellor <[email protected]>
> diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
> index 24295d39713b..523aa4157f80 100644
> --- a/tools/objtool/arch/x86/decode.c
> +++ b/tools/objtool/arch/x86/decode.c
> @@ -747,6 +747,10 @@ int arch_rewrite_retpolines(struct objtool_file *file)
>
> list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
>
> + if (insn->type != INSN_JUMP_DYNAMIC &&
> + insn->type != INSN_CALL_DYNAMIC)
> + continue;
> +
> if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
> continue;
>
>
wt., 8 cze 2021 o 20:49 Nathan Chancellor <[email protected]> napisał(a):
>
> On 6/8/2021 11:17 AM, Peter Zijlstra wrote:
> > On Tue, Jun 08, 2021 at 10:29:56AM -0700, Nathan Chancellor wrote:
> >> Unfortunately, this is the VM provided by the Windows Subsystem for Linux so
> >> examining it is nigh-impossible :/ I am considering bisecting the transforms
> >> that objtool does to try and figure out the one that causes the machine to
> >> fail to boot or try to reproduce in a different hypervisor, unless you have
> >> any other ideas.
> >
> > Does breaking Windows earn points similar to breaking the binary
> > drivers? :-) :-)
>
> :)
>
> > The below should kill this latest transform and would quickly confirm if
> > the that is causing your problem. If that's not it, what was your last
> > known working version?
>
> Yes, that diff gets me back to booting. I will see if I can figure out
> the exact rewrite that blows everything up.
>
> > diff --git a/tools/objtool/check.c b/tools/objtool/check.c
> > index e5947fbb9e7a..d0f231b9c5a1 100644
> > --- a/tools/objtool/check.c
> > +++ b/tools/objtool/check.c
> > @@ -1857,10 +1857,10 @@ static int decode_sections(struct objtool_file *file)
> > * Must be after add_special_section_alts(), since this will emit
> > * alternatives. Must be after add_{jump,call}_destination(), since
> > * those create the call insn lists.
> > - */
> > ret = arch_rewrite_retpolines(file);
> > if (ret)
> > return ret;
> > + */
> >
> > return 0;
> > }
> >
>
> Cheers,
> Nathan
Hi Peter,
I'm sorry I was on vacation last week - do you still need the requested debugs?
Best regards,
Lukasz
On Wed, Jun 09, 2021 at 09:11:18AM +0200, Lukasz Majczak wrote:
> I'm sorry I was on vacation last week - do you still need the requested debugs?
If the patch here:
https://lkml.kernel.org/r/YL3q1qFO9QIRL/[email protected]
does not fix things for you (don't think it actually will), then yes,
please send me the information requested.
On Wed, Jun 09, 2021 at 02:23:28PM +0200, Lukasz Majczak wrote:
> śr., 9 cze 2021 o 09:20 Peter Zijlstra <[email protected]> napisał(a):
> >
> > On Wed, Jun 09, 2021 at 09:11:18AM +0200, Lukasz Majczak wrote:
> >
> > > I'm sorry I was on vacation last week - do you still need the requested debugs?
> >
> > If the patch here:
> >
> > https://lkml.kernel.org/r/YL3q1qFO9QIRL/[email protected]
> >
> > does not fix things for you (don't think it actually will), then yes,
> > please send me the information requested.
>
> Ok, it didn't help. Peter, Josh I have sent you a private email with
> requested information.
OK, I think I've found it. Check this one:
5d5: 0f 85 00 00 00 00 jne 5db <cpuidle_reflect+0x22> 5d7: R_X86_64_PLT32 __x86_indirect_thunk_r11-0x4
+Relocation section '.rela.altinstructions' at offset 0 contains 14 entries:
+ Offset Info Type Symbol's Value Symbol's Name + Addend
+0000000000000018 0000000200000002 R_X86_64_PC32 0000000000000000 .text + 5d5
+000000000000001c 0000009200000002 R_X86_64_PC32 0000000000000000 __x86_indirect_alt_call_r11 + 0
Apparently we get conditional branches to retpoline thunks and objtool
completely messes that up. I'm betting this also explains the problems
Nathan is having.
*groan*,.. not sure what to do about this, except return to having
objtool generate code, which everybody hated on. For now I'll make it
skip the conditional branches.
I wonder if the compiler will also generate conditional tail calls, and
what that does with static_call... now I have to check all that.
---
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 24295d39713b..523aa4157f80 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -747,6 +747,10 @@ int arch_rewrite_retpolines(struct objtool_file *file)
list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+ insn->type != INSN_CALL_DYNAMIC)
+ continue;
+
if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
continue;
On Wed, Jun 09, 2021 at 05:08:05PM +0200, Peter Zijlstra wrote:
> I wonder if the compiler will also generate conditional tail calls, and
> what that does with static_call... now I have to check all that.
OK.. static call patching infra will give us a nice WARN before it dies.
śr., 9 cze 2021 o 09:20 Peter Zijlstra <[email protected]> napisał(a):
>
> On Wed, Jun 09, 2021 at 09:11:18AM +0200, Lukasz Majczak wrote:
>
> > I'm sorry I was on vacation last week - do you still need the requested debugs?
>
> If the patch here:
>
> https://lkml.kernel.org/r/YL3q1qFO9QIRL/[email protected]
>
> does not fix things for you (don't think it actually will), then yes,
> please send me the information requested.
Ok, it didn't help. Peter, Josh I have sent you a private email with
requested information.
Best regards
Lukasz