2022-02-24 16:14:45

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH v2 34/39] objtool: Validate IBT assumptions

Intel IBT requires that every indirect JMP/CALL targets an ENDBR
instructions, failing this #CP happens and we die. Similarly, all
exception entries should be ENDBR.

Find all code relocations and ensure they're either an ENDBR
instruction or ANNOTATE_NOENDBR. For the exceptions look for
UNWIND_HINT_IRET_REGS at sym+0 not being ENDBR.

Additionally, look for direct JMP/CALL instructions and warn if they
target an ENDBR instruction. This extra constraint comes from the
desire to poison unused ENDBR instructions.

NOTE: the changes in add_{call,jump}_destination() are to add a common
path after setting insn->{jump,call}_dest with both (source and
destination) instructions available.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
tools/objtool/builtin-check.c | 4
tools/objtool/check.c | 255 +++++++++++++++++++++++++++++---
tools/objtool/include/objtool/builtin.h | 3
tools/objtool/include/objtool/objtool.h | 3
4 files changed, 243 insertions(+), 22 deletions(-)

--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -20,7 +20,8 @@
#include <objtool/objtool.h>

bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- lto, vmlinux, mcount, noinstr, backup, sls, dryrun;
+ lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
+ ibt;

static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -47,6 +48,7 @@ const struct option check_options[] = {
OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"),
+ OPT_BOOLEAN(0, "ibt", &ibt, "validate ENDBR placement"),
OPT_END(),
};

--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -380,6 +380,7 @@ static int decode_instructions(struct ob
memset(insn, 0, sizeof(*insn));
INIT_LIST_HEAD(&insn->alts);
INIT_LIST_HEAD(&insn->stack_ops);
+ INIT_LIST_HEAD(&insn->call_node);

insn->sec = sec;
insn->offset = offset;
@@ -1176,6 +1177,14 @@ static int add_jump_destinations(struct
unsigned long dest_off;

for_each_insn(file, insn) {
+ if (insn->type == INSN_ENDBR && insn->func) {
+ if (insn->offset == insn->func->offset) {
+ file->nr_endbr++;
+ } else {
+ file->nr_endbr_int++;
+ }
+ }
+
if (!is_static_jump(insn))
continue;

@@ -1192,10 +1201,14 @@ static int add_jump_destinations(struct
} else if (insn->func) {
/* internal or external sibling call (with reloc) */
add_call_dest(file, insn, reloc->sym, true);
- continue;
+
+ dest_sec = reloc->sym->sec;
+ dest_off = reloc->sym->offset +
+ arch_dest_reloc_offset(reloc->addend);
+
} else if (reloc->sym->sec->idx) {
dest_sec = reloc->sym->sec;
- dest_off = reloc->sym->sym.st_value +
+ dest_off = reloc->sym->offset +
arch_dest_reloc_offset(reloc->addend);
} else {
/* non-func asm code jumping to another file */
@@ -1205,6 +1218,10 @@ static int add_jump_destinations(struct
insn->jump_dest = find_insn(file, dest_sec, dest_off);
if (!insn->jump_dest) {

+ /* external symbol */
+ if (!vmlinux && insn->func)
+ continue;
+
/*
* This is a special case where an alt instruction
* jumps past the end of the section. These are
@@ -1219,6 +1236,16 @@ static int add_jump_destinations(struct
return -1;
}

+ if (ibt && insn->jump_dest->type == INSN_ENDBR &&
+ insn->jump_dest->func &&
+ insn->jump_dest->offset == insn->jump_dest->func->offset) {
+ if (reloc) {
+ WARN_FUNC("Direct RELOC jump to ENDBR", insn->sec, insn->offset);
+ } else {
+ WARN_FUNC("Direct IMM jump to ENDBR", insn->sec, insn->offset);
+ }
+ }
+
/*
* Cross-function jump.
*/
@@ -1246,7 +1273,8 @@ static int add_jump_destinations(struct
insn->jump_dest->func->pfunc = insn->func;

} else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
- insn->jump_dest->offset == insn->jump_dest->func->offset) {
+ ((insn->jump_dest->offset == insn->jump_dest->func->offset) ||
+ (insn->jump_dest->offset == insn->jump_dest->func->offset + 4))) {
/* internal sibling call (without reloc) */
add_call_dest(file, insn, insn->jump_dest->func, true);
}
@@ -1256,23 +1284,12 @@ static int add_jump_destinations(struct
return 0;
}

-static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
-{
- struct symbol *call_dest;
-
- call_dest = find_func_by_offset(sec, offset);
- if (!call_dest)
- call_dest = find_symbol_by_offset(sec, offset);
-
- return call_dest;
-}
-
/*
* Find the destination instructions for all calls.
*/
static int add_call_destinations(struct objtool_file *file)
{
- struct instruction *insn;
+ struct instruction *insn, *target = NULL;
unsigned long dest_off;
struct symbol *dest;
struct reloc *reloc;
@@ -1284,7 +1301,21 @@ static int add_call_destinations(struct
reloc = insn_reloc(file, insn);
if (!reloc) {
dest_off = arch_jump_destination(insn);
- dest = find_call_destination(insn->sec, dest_off);
+
+ target = find_insn(file, insn->sec, dest_off);
+ if (!target) {
+ WARN_FUNC("direct call to nowhere", insn->sec, insn->offset);
+ return -1;
+ }
+ dest = target->func;
+ if (!dest)
+ dest = find_symbol_containing(insn->sec, dest_off);
+ if (!dest) {
+ WARN_FUNC("IMM can't find call dest symbol at %s+0x%lx",
+ insn->sec, insn->offset,
+ insn->sec->name, dest_off);
+ return -1;
+ }

add_call_dest(file, insn, dest, false);

@@ -1303,10 +1334,22 @@ static int add_call_destinations(struct
}

} else if (reloc->sym->type == STT_SECTION) {
- dest_off = arch_dest_reloc_offset(reloc->addend);
- dest = find_call_destination(reloc->sym->sec, dest_off);
+ struct section *dest_sec;
+
+ dest_sec = reloc->sym->sec;
+ dest_off = reloc->sym->offset +
+ arch_dest_reloc_offset(reloc->addend);
+
+ target = find_insn(file, dest_sec, dest_off);
+ if (!target) {
+ WARN_FUNC("direct call to nowhere", insn->sec, insn->offset);
+ return -1;
+ }
+ dest = target->func;
+ if (!dest)
+ dest = find_symbol_containing(dest_sec, dest_off);
if (!dest) {
- WARN_FUNC("can't find call dest symbol at %s+0x%lx",
+ WARN_FUNC("RELOC can't find call dest symbol at %s+0x%lx",
insn->sec, insn->offset,
reloc->sym->sec->name,
dest_off);
@@ -1317,9 +1360,27 @@ static int add_call_destinations(struct

} else if (reloc->sym->retpoline_thunk) {
add_retpoline_call(file, insn);
+ continue;
+
+ } else {
+ struct section *dest_sec;
+
+ dest_sec = reloc->sym->sec;
+ dest_off = reloc->sym->offset +
+ arch_dest_reloc_offset(reloc->addend);
+
+ target = find_insn(file, dest_sec, dest_off);

- } else
add_call_dest(file, insn, reloc->sym, false);
+ }
+
+ if (ibt && target && target->type == INSN_ENDBR) {
+ if (reloc) {
+ WARN_FUNC("Direct RELOC call to ENDBR", insn->sec, insn->offset);
+ } else {
+ WARN_FUNC("Direct IMM call to ENDBR", insn->sec, insn->offset);
+ }
+ }
}

return 0;
@@ -3053,6 +3114,8 @@ static struct instruction *next_insn_to_
return next_insn_same_sec(file, insn);
}

+static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn);
+
/*
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
@@ -3101,6 +3164,17 @@ static int validate_branch(struct objtoo

if (insn->hint) {
state.cfi = *insn->cfi;
+ if (ibt) {
+ struct symbol *sym;
+
+ if (insn->cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL &&
+ (sym = find_symbol_by_offset(insn->sec, insn->offset)) &&
+ insn->type != INSN_ENDBR && !insn->noendbr) {
+ WARN_FUNC("IRET_REGS hint without ENDBR: %s",
+ insn->sec, insn->offset,
+ sym->name);
+ }
+ }
} else {
/* XXX track if we actually changed state.cfi */

@@ -3260,7 +3334,12 @@ static int validate_branch(struct objtoo
state.df = false;
break;

+ case INSN_NOP:
+ break;
+
default:
+ if (ibt)
+ validate_ibt_insn(file, insn);
break;
}

@@ -3506,6 +3585,130 @@ static int validate_functions(struct obj
return warnings;
}

+static struct instruction *
+validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
+{
+ struct instruction *dest;
+ struct section *sec;
+ unsigned long off;
+
+ sec = reloc->sym->sec;
+ off = reloc->sym->offset + reloc->addend;
+
+ dest = find_insn(file, sec, off);
+ if (!dest)
+ return NULL;
+
+ if (dest->type == INSN_ENDBR)
+ return NULL;
+
+ if (reloc->sym->static_call_tramp)
+ return NULL;
+
+ return dest;
+}
+
+static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset,
+ struct instruction *target)
+{
+ WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg,
+ target->func ? target->func->name : target->sec->name,
+ target->func ? target->offset - target->func->offset : target->offset);
+}
+
+static void validate_ibt_target(struct objtool_file *file, struct instruction *insn,
+ struct instruction *target)
+{
+ if (target->func && target->func == insn->func) {
+ /*
+ * Anything from->to self is either _THIS_IP_ or IRET-to-self.
+ *
+ * There is no sane way to annotate _THIS_IP_ since the compiler treats the
+ * relocation as a constant and is happy to fold in offsets, skewing any
+ * annotation we do, leading to vast amounts of false-positives.
+ *
+ * There's also compiler generated _THIS_IP_ through KCOV and
+ * such which we have no hope of annotating.
+ *
+ * As such, blanked accept self-references without issue.
+ */
+ return;
+ }
+
+ /*
+ * Annotated non-control flow target.
+ */
+ if (target->noendbr)
+ return;
+
+ warn_noendbr("", insn->sec, insn->offset, target);
+}
+
+static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
+{
+ struct reloc *reloc = insn_reloc(file, insn);
+ struct instruction *target;
+
+ for (;;) {
+ if (!reloc)
+ return;
+
+ target = validate_ibt_reloc(file, reloc);
+ if (target)
+ validate_ibt_target(file, insn, target);
+
+ reloc = find_reloc_by_dest_range(file->elf, insn->sec, reloc->offset + 1,
+ (insn->offset + insn->len) - (reloc->offset + 1));
+ }
+}
+
+static int validate_ibt(struct objtool_file *file)
+{
+ struct section *sec;
+ struct reloc *reloc;
+
+ for_each_sec(file, sec) {
+ bool is_data;
+
+ /* already done in validate_branch() */
+ if (sec->sh.sh_flags & SHF_EXECINSTR)
+ continue;
+
+ if (!sec->reloc)
+ continue;
+
+ if (!strncmp(sec->name, ".orc", 4))
+ continue;
+
+ if (!strncmp(sec->name, ".discard", 8))
+ continue;
+
+ if (!strncmp(sec->name, ".debug", 6))
+ continue;
+
+ if (!strcmp(sec->name, "_error_injection_whitelist"))
+ continue;
+
+ if (!strcmp(sec->name, "_kprobe_blacklist"))
+ continue;
+
+ is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata");
+
+ list_for_each_entry(reloc, &sec->reloc->reloc_list, list) {
+ struct instruction *target;
+
+ target = validate_ibt_reloc(file, reloc);
+ if (is_data && target && !target->noendbr) {
+ warn_noendbr("data ", reloc->sym->sec,
+ reloc->sym->offset + reloc->addend,
+ target);
+ }
+ }
+ }
+
+ return 0;
+}
+
static int validate_reachable_instructions(struct objtool_file *file)
{
struct instruction *insn;
@@ -3533,6 +3736,11 @@ int check(struct objtool_file *file)
return 1;
}

+ if (ibt && !lto) {
+ fprintf(stderr, "--ibt requires: --lto\n");
+ return 1;
+ }
+
arch_initial_func_cfi_state(&initial_func_cfi);
init_cfi_state(&init_cfi);
init_cfi_state(&func_cfi);
@@ -3579,6 +3787,13 @@ int check(struct objtool_file *file)
goto out;
warnings += ret;

+ if (ibt) {
+ ret = validate_ibt(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
if (!warnings) {
ret = validate_reachable_instructions(file);
if (ret < 0)
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -9,7 +9,8 @@

extern const struct option check_options[];
extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- lto, vmlinux, mcount, noinstr, backup, sls, dryrun;
+ lto, vmlinux, mcount, noinstr, backup, sls, dryrun,
+ ibt;

extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);

--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -28,6 +28,9 @@ struct objtool_file {
struct list_head mcount_loc_list;
bool ignore_unreachables, c_file, hints, rodata;

+ unsigned int nr_endbr;
+ unsigned int nr_endbr_int;
+
unsigned long jl_short, jl_long;
unsigned long jl_nop_short, jl_nop_long;




2022-02-27 06:37:57

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Thu, Feb 24, 2022 at 03:52:12PM +0100, Peter Zijlstra wrote:
> +++ b/tools/objtool/check.c
> @@ -380,6 +380,7 @@ static int decode_instructions(struct ob
> memset(insn, 0, sizeof(*insn));
> INIT_LIST_HEAD(&insn->alts);
> INIT_LIST_HEAD(&insn->stack_ops);
> + INIT_LIST_HEAD(&insn->call_node);

Is this needed? 'call_node' isn't actually a list head, otherwise this
would presumably be fixing a major bug.

> insn->sec = sec;
> insn->offset = offset;
> @@ -1176,6 +1177,14 @@ static int add_jump_destinations(struct
> unsigned long dest_off;
>
> for_each_insn(file, insn) {
> + if (insn->type == INSN_ENDBR && insn->func) {
> + if (insn->offset == insn->func->offset) {
> + file->nr_endbr++;
> + } else {
> + file->nr_endbr_int++;
> + }
> + }
> +

This doesn't have much to do with adding jump destinations. I'm
thinking this would fit better in decode_instructions() in the
sym_for_each_insn() loop.

> if (!is_static_jump(insn))
> continue;
>
> @@ -1192,10 +1201,14 @@ static int add_jump_destinations(struct
> } else if (insn->func) {
> /* internal or external sibling call (with reloc) */
> add_call_dest(file, insn, reloc->sym, true);
> - continue;
> +
> + dest_sec = reloc->sym->sec;
> + dest_off = reloc->sym->offset +
> + arch_dest_reloc_offset(reloc->addend);
> +
> } else if (reloc->sym->sec->idx) {
> dest_sec = reloc->sym->sec;
> - dest_off = reloc->sym->sym.st_value +
> + dest_off = reloc->sym->offset +
> arch_dest_reloc_offset(reloc->addend);
> } else {
> /* non-func asm code jumping to another file */
> @@ -1205,6 +1218,10 @@ static int add_jump_destinations(struct
> insn->jump_dest = find_insn(file, dest_sec, dest_off);
> if (!insn->jump_dest) {
>
> + /* external symbol */
> + if (!vmlinux && insn->func)
> + continue;
> +
> /*
> * This is a special case where an alt instruction
> * jumps past the end of the section. These are
> @@ -1219,6 +1236,16 @@ static int add_jump_destinations(struct
> return -1;
> }
>
> + if (ibt && insn->jump_dest->type == INSN_ENDBR &&
> + insn->jump_dest->func &&
> + insn->jump_dest->offset == insn->jump_dest->func->offset) {
> + if (reloc) {
> + WARN_FUNC("Direct RELOC jump to ENDBR", insn->sec, insn->offset);
> + } else {
> + WARN_FUNC("Direct IMM jump to ENDBR", insn->sec, insn->offset);
> + }
> + }
> +

I have several concerns about all the above (and corresponding changes
elsewhere), but it looks like this was moved to separate patches, for
ease of NACKing :-)

> /*
> * Cross-function jump.
> */
> @@ -1246,7 +1273,8 @@ static int add_jump_destinations(struct
> insn->jump_dest->func->pfunc = insn->func;
>
> } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
> - insn->jump_dest->offset == insn->jump_dest->func->offset) {
> + ((insn->jump_dest->offset == insn->jump_dest->func->offset) ||
> + (insn->jump_dest->offset == insn->jump_dest->func->offset + 4))) {
> /* internal sibling call (without reloc) */
> add_call_dest(file, insn, insn->jump_dest->func, true);

How about something more precise/readable/portable:

static bool same_func(struct instruction *insn1, struct instruction *insn2)
{
return insn1->func->pfunc == insn2->func->pfunc;
}

static bool is_first_func_insn(struct instruction *insn)
{
return insn->offset == insn->func->offset ||
(insn->type == INSN_ENDBR &&
insn->offset == insn->func->offset + insn->len);
}

...

} else if (!same_func(insn, insn->jump_dest) &&
is_first_func_insn(insn->jump_dest))


> +static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn);

I'd rather avoid forward declares and stay with the existing convention.

> +
> /*
> * Follow the branch starting at the given instruction, and recursively follow
> * any other branches (jumps). Meanwhile, track the frame pointer state at
> @@ -3101,6 +3164,17 @@ static int validate_branch(struct objtoo
>
> if (insn->hint) {
> state.cfi = *insn->cfi;
> + if (ibt) {
> + struct symbol *sym;
> +
> + if (insn->cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL &&
> + (sym = find_symbol_by_offset(insn->sec, insn->offset)) &&
> + insn->type != INSN_ENDBR && !insn->noendbr) {
> + WARN_FUNC("IRET_REGS hint without ENDBR: %s",
> + insn->sec, insn->offset,
> + sym->name);
> + }

No need to print sym->name here, WARN_FUNC() already does it?

> + }
> } else {
> /* XXX track if we actually changed state.cfi */
>
> @@ -3260,7 +3334,12 @@ static int validate_branch(struct objtoo
> state.df = false;
> break;
>
> + case INSN_NOP:
> + break;
> +
> default:
> + if (ibt)
> + validate_ibt_insn(file, insn);

This is kind of subtle. It would be more robust/clear to move this call
out of the switch statement and check explicitly for the exclusion of
jump/call instructions from within validate_ibt_insn().

> break;
> }
>
> @@ -3506,6 +3585,130 @@ static int validate_functions(struct obj
> return warnings;
> }
>
> +static struct instruction *
> +validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
> +{
> + struct instruction *dest;
> + struct section *sec;
> + unsigned long off;
> +
> + sec = reloc->sym->sec;
> + off = reloc->sym->offset + reloc->addend;

This math assumes non-PC-relative. If it's R_X86_64_PC32 or
R_X86_64_PLT32 then it needs +4 added.

There are actually a few cases of this in startup_64(). Those are
harmless, but there might conceivably be other code which isn't?

> +
> + dest = find_insn(file, sec, off);
> + if (!dest)
> + return NULL;
> +
> + if (dest->type == INSN_ENDBR)
> + return NULL;
> +
> + if (reloc->sym->static_call_tramp)
> + return NULL;
> +
> + return dest;
> +}
> +
> +static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset,
> + struct instruction *target)
> +{
> + WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg,
> + target->func ? target->func->name : target->sec->name,
> + target->func ? target->offset - target->func->offset : target->offset);
> +}
> +
> +static void validate_ibt_target(struct objtool_file *file, struct instruction *insn,
> + struct instruction *target)
> +{
> + if (target->func && target->func == insn->func) {

(Here and elsewhere) Instead of 'target' can we call it 'dest' for
consistency with existing code?

> + /*
> + * Anything from->to self is either _THIS_IP_ or IRET-to-self.
> + *
> + * There is no sane way to annotate _THIS_IP_ since the compiler treats the
> + * relocation as a constant and is happy to fold in offsets, skewing any
> + * annotation we do, leading to vast amounts of false-positives.
> + *
> + * There's also compiler generated _THIS_IP_ through KCOV and
> + * such which we have no hope of annotating.
> + *
> + * As such, blanked accept self-references without issue.

"blanket"

> + */
> + return;
> + }
> +
> + /*
> + * Annotated non-control flow target.
> + */
> + if (target->noendbr)
> + return;

I don't think the comment really adds anything. What's a "non-control
flow target" anyway...

> +
> + warn_noendbr("", insn->sec, insn->offset, target);
> +}
> +
> +static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
> +{
> + struct reloc *reloc = insn_reloc(file, insn);
> + struct instruction *target;
> +
> + for (;;) {
> + if (!reloc)
> + return;
> +
> + target = validate_ibt_reloc(file, reloc);
> + if (target)
> + validate_ibt_target(file, insn, target);
> +
> + reloc = find_reloc_by_dest_range(file->elf, insn->sec, reloc->offset + 1,
> + (insn->offset + insn->len) - (reloc->offset + 1));
> + }

I'm confused about what this loop is trying to do. Why would an
instruction have more than one reloc? It at least needs a comment.

Also a proper for() loop would be easier to follow:

for (reloc = insn_reloc(file, insn);
reloc;
reloc = find_reloc_by_dest_range(file->elf, insn->sec,
reloc->offset + 1,
(insn->offset + insn->len) - (reloc->offset + 1)) {

> +}
> +
> +static int validate_ibt(struct objtool_file *file)
> +{
> + struct section *sec;
> + struct reloc *reloc;
> +
> + for_each_sec(file, sec) {
> + bool is_data;
> +
> + /* already done in validate_branch() */
> + if (sec->sh.sh_flags & SHF_EXECINSTR)
> + continue;
> +
> + if (!sec->reloc)
> + continue;
> +
> + if (!strncmp(sec->name, ".orc", 4))
> + continue;
> +
> + if (!strncmp(sec->name, ".discard", 8))
> + continue;
> +
> + if (!strncmp(sec->name, ".debug", 6))
> + continue;
> +
> + if (!strcmp(sec->name, "_error_injection_whitelist"))
> + continue;
> +
> + if (!strcmp(sec->name, "_kprobe_blacklist"))
> + continue;
> +
> + is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata");
> +
> + list_for_each_entry(reloc, &sec->reloc->reloc_list, list) {
> + struct instruction *target;
> +
> + target = validate_ibt_reloc(file, reloc);
> + if (is_data && target && !target->noendbr) {
> + warn_noendbr("data ", reloc->sym->sec,
> + reloc->sym->offset + reloc->addend,

Another case where the addend math would be wrong if it were
pc-relative. Not sure if that's possible here or not.

--
Josh

2022-02-27 18:36:08

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Sat, Feb 26, 2022 at 07:13:48PM -0800, Josh Poimboeuf wrote:
> On Thu, Feb 24, 2022 at 03:52:12PM +0100, Peter Zijlstra wrote:
> > +++ b/tools/objtool/check.c
> > @@ -380,6 +380,7 @@ static int decode_instructions(struct ob
> > memset(insn, 0, sizeof(*insn));
> > INIT_LIST_HEAD(&insn->alts);
> > INIT_LIST_HEAD(&insn->stack_ops);
> > + INIT_LIST_HEAD(&insn->call_node);
>
> Is this needed? 'call_node' isn't actually a list head, otherwise this
> would presumably be fixing a major bug.

Somewhere there's an unconditional list_del_init() on call_node, could
be that moved to another patch and now it don't make immediate sense,
I'll move them together again.

> > insn->sec = sec;
> > insn->offset = offset;
> > @@ -1176,6 +1177,14 @@ static int add_jump_destinations(struct
> > unsigned long dest_off;
> >
> > for_each_insn(file, insn) {
> > + if (insn->type == INSN_ENDBR && insn->func) {
> > + if (insn->offset == insn->func->offset) {
> > + file->nr_endbr++;
> > + } else {
> > + file->nr_endbr_int++;
> > + }
> > + }
> > +
>
> This doesn't have much to do with adding jump destinations. I'm
> thinking this would fit better in decode_instructions() in the
> sym_for_each_insn() loop.

Fair enough I suppose. I'm not quite sure how it ended up where it did.

> > @@ -1219,6 +1236,16 @@ static int add_jump_destinations(struct
> > return -1;
> > }
> >
> > + if (ibt && insn->jump_dest->type == INSN_ENDBR &&
> > + insn->jump_dest->func &&
> > + insn->jump_dest->offset == insn->jump_dest->func->offset) {
> > + if (reloc) {
> > + WARN_FUNC("Direct RELOC jump to ENDBR", insn->sec, insn->offset);
> > + } else {
> > + WARN_FUNC("Direct IMM jump to ENDBR", insn->sec, insn->offset);
> > + }
> > + }
> > +
>
> I have several concerns about all the above (and corresponding changes
> elsewhere), but it looks like this was moved to separate patches, for
> ease of NACKing :-)

Right, we talked about that, I'll move the whole UD1 poisoning to the
end and use NOP4 instead, which removes the need for this.

> > /*
> > * Cross-function jump.
> > */
> > @@ -1246,7 +1273,8 @@ static int add_jump_destinations(struct
> > insn->jump_dest->func->pfunc = insn->func;
> >
> > } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
> > - insn->jump_dest->offset == insn->jump_dest->func->offset) {
> > + ((insn->jump_dest->offset == insn->jump_dest->func->offset) ||
> > + (insn->jump_dest->offset == insn->jump_dest->func->offset + 4))) {
> > /* internal sibling call (without reloc) */
> > add_call_dest(file, insn, insn->jump_dest->func, true);
>
> How about something more precise/readable/portable:
>
> static bool same_func(struct instruction *insn1, struct instruction *insn2)
> {
> return insn1->func->pfunc == insn2->func->pfunc;
> }
>
> static bool is_first_func_insn(struct instruction *insn)
> {
> return insn->offset == insn->func->offset ||
> (insn->type == INSN_ENDBR &&
> insn->offset == insn->func->offset + insn->len);
> }
>
> ...
>
> } else if (!same_func(insn, insn->jump_dest) &&
> is_first_func_insn(insn->jump_dest))
>

Done.

> > +static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn);
>
> I'd rather avoid forward declares and stay with the existing convention.
>
> > +
> > /*
> > * Follow the branch starting at the given instruction, and recursively follow
> > * any other branches (jumps). Meanwhile, track the frame pointer state at
> > @@ -3101,6 +3164,17 @@ static int validate_branch(struct objtoo
> >
> > if (insn->hint) {
> > state.cfi = *insn->cfi;
> > + if (ibt) {
> > + struct symbol *sym;
> > +
> > + if (insn->cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL &&
> > + (sym = find_symbol_by_offset(insn->sec, insn->offset)) &&
> > + insn->type != INSN_ENDBR && !insn->noendbr) {
> > + WARN_FUNC("IRET_REGS hint without ENDBR: %s",
> > + insn->sec, insn->offset,
> > + sym->name);
> > + }
>
> No need to print sym->name here, WARN_FUNC() already does it?

Almost; perhaps the change to make is to either introduce WARN_SYM or
make WARN_FUNC also print !STT_FUNC symbols ?

> > @@ -3260,7 +3334,12 @@ static int validate_branch(struct objtoo
> > state.df = false;
> > break;
> >
> > + case INSN_NOP:
> > + break;
> > +
> > default:
> > + if (ibt)
> > + validate_ibt_insn(file, insn);
>
> This is kind of subtle. It would be more robust/clear to move this call
> out of the switch statement and check explicitly for the exclusion of
> jump/call instructions from within validate_ibt_insn().

Can do I suppose.

> > break;
> > }
> >
> > @@ -3506,6 +3585,130 @@ static int validate_functions(struct obj
> > return warnings;
> > }
> >
> > +static struct instruction *
> > +validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
> > +{
> > + struct instruction *dest;
> > + struct section *sec;
> > + unsigned long off;
> > +
> > + sec = reloc->sym->sec;
> > + off = reloc->sym->offset + reloc->addend;
>
> This math assumes non-PC-relative. If it's R_X86_64_PC32 or
> R_X86_64_PLT32 then it needs +4 added.

Right; so I actually had that PC32 thing in there for a while, but ran
into other trouble. I'll go try and figure it out.


> > +static void validate_ibt_target(struct objtool_file *file, struct instruction *insn,
> > + struct instruction *target)
> > +{
> > + if (target->func && target->func == insn->func) {
>
> (Here and elsewhere) Instead of 'target' can we call it 'dest' for
> consistency with existing code?

Done.

> > + /*
> > + * Anything from->to self is either _THIS_IP_ or IRET-to-self.
> > + *
> > + * There is no sane way to annotate _THIS_IP_ since the compiler treats the
> > + * relocation as a constant and is happy to fold in offsets, skewing any
> > + * annotation we do, leading to vast amounts of false-positives.
> > + *
> > + * There's also compiler generated _THIS_IP_ through KCOV and
> > + * such which we have no hope of annotating.
> > + *
> > + * As such, blanked accept self-references without issue.
>
> "blanket"

Duh.

> > +static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
> > +{
> > + struct reloc *reloc = insn_reloc(file, insn);
> > + struct instruction *target;
> > +
> > + for (;;) {
> > + if (!reloc)
> > + return;
> > +
> > + target = validate_ibt_reloc(file, reloc);
> > + if (target)
> > + validate_ibt_target(file, insn, target);
> > +
> > + reloc = find_reloc_by_dest_range(file->elf, insn->sec, reloc->offset + 1,
> > + (insn->offset + insn->len) - (reloc->offset + 1));
> > + }
>
> I'm confused about what this loop is trying to do. Why would an
> instruction have more than one reloc? It at least needs a comment.

Because there are some :/ 'mov' can have an immediate and a
displacement, both needing a relocation.

> Also a proper for() loop would be easier to follow:
>
> for (reloc = insn_reloc(file, insn);
> reloc;
> reloc = find_reloc_by_dest_range(file->elf, insn->sec,
> reloc->offset + 1,
> (insn->offset + insn->len) - (reloc->offset + 1)) {

Sure.

> > +}
> > +
> > +static int validate_ibt(struct objtool_file *file)
> > +{
> > + struct section *sec;
> > + struct reloc *reloc;
> > +
> > + for_each_sec(file, sec) {
> > + bool is_data;
> > +
> > + /* already done in validate_branch() */
> > + if (sec->sh.sh_flags & SHF_EXECINSTR)
> > + continue;
> > +
> > + if (!sec->reloc)
> > + continue;
> > +
> > + if (!strncmp(sec->name, ".orc", 4))
> > + continue;
> > +
> > + if (!strncmp(sec->name, ".discard", 8))
> > + continue;
> > +
> > + if (!strncmp(sec->name, ".debug", 6))
> > + continue;
> > +
> > + if (!strcmp(sec->name, "_error_injection_whitelist"))
> > + continue;
> > +
> > + if (!strcmp(sec->name, "_kprobe_blacklist"))
> > + continue;
> > +
> > + is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata");
> > +
> > + list_for_each_entry(reloc, &sec->reloc->reloc_list, list) {
> > + struct instruction *target;
> > +
> > + target = validate_ibt_reloc(file, reloc);
> > + if (is_data && target && !target->noendbr) {
> > + warn_noendbr("data ", reloc->sym->sec,
> > + reloc->sym->offset + reloc->addend,
>
> Another case where the addend math would be wrong if it were
> pc-relative. Not sure if that's possible here or not.

I'll check.

2022-02-28 00:46:18

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Sun, Feb 27, 2022 at 06:00:03PM +0100, Peter Zijlstra wrote:
> > > @@ -3101,6 +3164,17 @@ static int validate_branch(struct objtoo
> > >
> > > if (insn->hint) {
> > > state.cfi = *insn->cfi;
> > > + if (ibt) {
> > > + struct symbol *sym;
> > > +
> > > + if (insn->cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL &&
> > > + (sym = find_symbol_by_offset(insn->sec, insn->offset)) &&
> > > + insn->type != INSN_ENDBR && !insn->noendbr) {
> > > + WARN_FUNC("IRET_REGS hint without ENDBR: %s",
> > > + insn->sec, insn->offset,
> > > + sym->name);
> > > + }
> >
> > No need to print sym->name here, WARN_FUNC() already does it?
>
> Almost; perhaps the change to make is to either introduce WARN_SYM or
> make WARN_FUNC also print !STT_FUNC symbols ?

In the case of no function, WARN_FUNC() falls back to printing sec+off.
Is that not good enough?

> > > +static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
> > > +{
> > > + struct reloc *reloc = insn_reloc(file, insn);
> > > + struct instruction *target;
> > > +
> > > + for (;;) {
> > > + if (!reloc)
> > > + return;
> > > +
> > > + target = validate_ibt_reloc(file, reloc);
> > > + if (target)
> > > + validate_ibt_target(file, insn, target);
> > > +
> > > + reloc = find_reloc_by_dest_range(file->elf, insn->sec, reloc->offset + 1,
> > > + (insn->offset + insn->len) - (reloc->offset + 1));
> > > + }
> >
> > I'm confused about what this loop is trying to do. Why would an
> > instruction have more than one reloc? It at least needs a comment.
>
> Because there are some :/ 'mov' can have an immediate and a
> displacement, both needing a relocation.

<boom> mind blown. How did I not know this?

--
Josh

2022-02-28 09:58:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Sun, Feb 27, 2022 at 06:00:03PM +0100, Peter Zijlstra wrote:
> On Sat, Feb 26, 2022 at 07:13:48PM -0800, Josh Poimboeuf wrote:
> > > +static struct instruction *
> > > +validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
> > > +{
> > > + struct instruction *dest;
> > > + struct section *sec;
> > > + unsigned long off;
> > > +
> > > + sec = reloc->sym->sec;
> > > + off = reloc->sym->offset + reloc->addend;
> >
> > This math assumes non-PC-relative. If it's R_X86_64_PC32 or
> > R_X86_64_PLT32 then it needs +4 added.
>
> Right; so I actually had that PC32 thing in there for a while, but ran
> into other trouble. I'll go try and figure it out.

Things like .rela.initcall*.init use PC32 but don't need the +4. If we
get that wrong it'll seal all the initcall and boot doesn't get very
far at all :-)

How do you feel about something like:

sec = reloc->sym->sec;
off = reloc->sym->offset;

if ((reloc->sec->base->sh.sh_flags & SHF_EXECINSTR) &&
(reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32))
off += arch_dest_reloc_offset(reloc->addend);
else
off += reloc->addend;


hmm ?

2022-02-28 10:56:29

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Sun, Feb 27, 2022 at 02:20:55PM -0800, Josh Poimboeuf wrote:
> On Sun, Feb 27, 2022 at 06:00:03PM +0100, Peter Zijlstra wrote:
> > > > @@ -3101,6 +3164,17 @@ static int validate_branch(struct objtoo
> > > >
> > > > if (insn->hint) {
> > > > state.cfi = *insn->cfi;
> > > > + if (ibt) {
> > > > + struct symbol *sym;
> > > > +
> > > > + if (insn->cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL &&
> > > > + (sym = find_symbol_by_offset(insn->sec, insn->offset)) &&
> > > > + insn->type != INSN_ENDBR && !insn->noendbr) {
> > > > + WARN_FUNC("IRET_REGS hint without ENDBR: %s",
> > > > + insn->sec, insn->offset,
> > > > + sym->name);
> > > > + }
> > >
> > > No need to print sym->name here, WARN_FUNC() already does it?
> >
> > Almost; perhaps the change to make is to either introduce WARN_SYM or
> > make WARN_FUNC also print !STT_FUNC symbols ?
>
> In the case of no function, WARN_FUNC() falls back to printing sec+off.
> Is that not good enough?

I got really tired of doing the manual symbol lookup... I don't suppose
it matters too much now that I've more or less completed the triage, but
it was useful.

2022-02-28 19:19:45

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Mon, Feb 28, 2022 at 10:47:55AM +0100, Peter Zijlstra wrote:
> On Sun, Feb 27, 2022 at 02:20:55PM -0800, Josh Poimboeuf wrote:
> > On Sun, Feb 27, 2022 at 06:00:03PM +0100, Peter Zijlstra wrote:
> > > > > @@ -3101,6 +3164,17 @@ static int validate_branch(struct objtoo
> > > > >
> > > > > if (insn->hint) {
> > > > > state.cfi = *insn->cfi;
> > > > > + if (ibt) {
> > > > > + struct symbol *sym;
> > > > > +
> > > > > + if (insn->cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL &&
> > > > > + (sym = find_symbol_by_offset(insn->sec, insn->offset)) &&
> > > > > + insn->type != INSN_ENDBR && !insn->noendbr) {
> > > > > + WARN_FUNC("IRET_REGS hint without ENDBR: %s",
> > > > > + insn->sec, insn->offset,
> > > > > + sym->name);
> > > > > + }
> > > >
> > > > No need to print sym->name here, WARN_FUNC() already does it?
> > >
> > > Almost; perhaps the change to make is to either introduce WARN_SYM or
> > > make WARN_FUNC also print !STT_FUNC symbols ?
> >
> > In the case of no function, WARN_FUNC() falls back to printing sec+off.
> > Is that not good enough?
>
> I got really tired of doing the manual symbol lookup... I don't suppose
> it matters too much now that I've more or less completed the triage, but
> it was useful.

Maybe it would be reasonable to change WARN_FUNC to do that? i.e. fall
back from func+off to sym+off to sec+off.

--
Josh

2022-02-28 19:51:05

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Mon, Feb 28, 2022 at 10:26:07AM +0100, Peter Zijlstra wrote:
> On Sun, Feb 27, 2022 at 06:00:03PM +0100, Peter Zijlstra wrote:
> > On Sat, Feb 26, 2022 at 07:13:48PM -0800, Josh Poimboeuf wrote:
> > > > +static struct instruction *
> > > > +validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc)
> > > > +{
> > > > + struct instruction *dest;
> > > > + struct section *sec;
> > > > + unsigned long off;
> > > > +
> > > > + sec = reloc->sym->sec;
> > > > + off = reloc->sym->offset + reloc->addend;
> > >
> > > This math assumes non-PC-relative. If it's R_X86_64_PC32 or
> > > R_X86_64_PLT32 then it needs +4 added.
> >
> > Right; so I actually had that PC32 thing in there for a while, but ran
> > into other trouble. I'll go try and figure it out.
>
> Things like .rela.initcall*.init use PC32 but don't need the +4. If we
> get that wrong it'll seal all the initcall and boot doesn't get very
> far at all :-)

Ah...

> How do you feel about something like:
>
> sec = reloc->sym->sec;
> off = reloc->sym->offset;
>
> if ((reloc->sec->base->sh.sh_flags & SHF_EXECINSTR) &&
> (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32))
> off += arch_dest_reloc_offset(reloc->addend);
> else
> off += reloc->addend;
>
>
> hmm ?

Looks good to me.

--
Josh

2022-02-28 20:47:57

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 34/39] objtool: Validate IBT assumptions

On Mon, Feb 28, 2022 at 10:36:55AM -0800, Josh Poimboeuf wrote:
> On Mon, Feb 28, 2022 at 10:47:55AM +0100, Peter Zijlstra wrote:
> > On Sun, Feb 27, 2022 at 02:20:55PM -0800, Josh Poimboeuf wrote:
> > > On Sun, Feb 27, 2022 at 06:00:03PM +0100, Peter Zijlstra wrote:
> > > > > > @@ -3101,6 +3164,17 @@ static int validate_branch(struct objtoo
> > > > > >
> > > > > > if (insn->hint) {
> > > > > > state.cfi = *insn->cfi;
> > > > > > + if (ibt) {
> > > > > > + struct symbol *sym;
> > > > > > +
> > > > > > + if (insn->cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL &&
> > > > > > + (sym = find_symbol_by_offset(insn->sec, insn->offset)) &&
> > > > > > + insn->type != INSN_ENDBR && !insn->noendbr) {
> > > > > > + WARN_FUNC("IRET_REGS hint without ENDBR: %s",
> > > > > > + insn->sec, insn->offset,
> > > > > > + sym->name);
> > > > > > + }
> > > > >
> > > > > No need to print sym->name here, WARN_FUNC() already does it?
> > > >
> > > > Almost; perhaps the change to make is to either introduce WARN_SYM or
> > > > make WARN_FUNC also print !STT_FUNC symbols ?
> > >
> > > In the case of no function, WARN_FUNC() falls back to printing sec+off.
> > > Is that not good enough?
> >
> > I got really tired of doing the manual symbol lookup... I don't suppose
> > it matters too much now that I've more or less completed the triage, but
> > it was useful.
>
> Maybe it would be reasonable to change WARN_FUNC to do that? i.e. fall
> back from func+off to sym+off to sec+off.

I'll make it happen.