2023-02-01 14:10:50

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC] x86/alternative: Support relocations in alternatives


(sorry for the repost, forgot lkml)

A little while ago someone (Kirill) ran into the whole 'alternatives don't
do relocations nonsense' again and I got annoyed enough to actually look
at the code.

Since the whole alternative machinery already fully decodes the
instructions it is simple enough to adjust immediates and displacement
when needed. Specifically, the immediates for IP modifying instructions
(JMP, CALL, Jcc) and the displacement for RIP-relative instructions.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/kernel/alternative.c | 199 ++++++++++++++++++++++-----------------
tools/objtool/arch/x86/special.c | 8 +-
2 files changed, 112 insertions(+), 95 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7d8c3cbde368..f9a579fe47b0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -122,71 +122,6 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
void text_poke_early(void *addr, const void *opcode, size_t len);

-/*
- * Are we looking at a near JMP with a 1 or 4-byte displacement.
- */
-static inline bool is_jmp(const u8 opcode)
-{
- return opcode == 0xeb || opcode == 0xe9;
-}
-
-static void __init_or_module
-recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
-{
- u8 *next_rip, *tgt_rip;
- s32 n_dspl, o_dspl;
- int repl_len;
-
- if (a->replacementlen != 5)
- return;
-
- o_dspl = *(s32 *)(insn_buff + 1);
-
- /* next_rip of the replacement JMP */
- next_rip = repl_insn + a->replacementlen;
- /* target rip of the replacement JMP */
- tgt_rip = next_rip + o_dspl;
- n_dspl = tgt_rip - orig_insn;
-
- DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
-
- if (tgt_rip - orig_insn >= 0) {
- if (n_dspl - 2 <= 127)
- goto two_byte_jmp;
- else
- goto five_byte_jmp;
- /* negative offset */
- } else {
- if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
- goto two_byte_jmp;
- else
- goto five_byte_jmp;
- }
-
-two_byte_jmp:
- n_dspl -= 2;
-
- insn_buff[0] = 0xeb;
- insn_buff[1] = (s8)n_dspl;
- add_nops(insn_buff + 2, 3);
-
- repl_len = 2;
- goto done;
-
-five_byte_jmp:
- n_dspl -= 5;
-
- insn_buff[0] = 0xe9;
- *(s32 *)&insn_buff[1] = n_dspl;
-
- repl_len = 5;
-
-done:
-
- DPRINTK("final displ: 0x%08x, JMP 0x%lx",
- n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
-}
-
/*
* optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
*
@@ -253,6 +188,78 @@ static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
}
}

+#define apply_reloc_n(n_, p_, d_) \
+ do { \
+ s32 v = *(s##n_ *)(p_); \
+ v += (d_); \
+ BUG_ON((v >> 31) != (v >> (n_-1))); \
+ *(s##n_ *)(p_) = (s##n_)v; \
+ } while (0)
+
+
+static void __always_inline
+apply_reloc(int n, void *ptr, uintptr_t diff)
+{
+ switch (n) {
+ case 1: apply_reloc_n(8, ptr, diff); break;
+ case 2: apply_reloc_n(16, ptr, diff); break;
+ case 4: apply_reloc_n(32, ptr, diff); break;
+ default: BUG();
+ }
+}
+
+static void __init_or_module noinline
+apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
+{
+ struct insn insn;
+ int i = 0;
+
+ for (;;) {
+ if (insn_decode_kernel(&insn, &instr[i]))
+ return;
+
+ switch (insn.opcode.bytes[0]) {
+ case 0x0f:
+ if (insn.opcode.bytes[1] < 0x80 ||
+ insn.opcode.bytes[1] > 0x8f)
+ break;
+
+ fallthrough; /* Jcc.d32 */
+ case 0x70 ... 0x7f: /* Jcc.d8 */
+ case JMP8_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case CALL_INSN_OPCODE:
+ u8 *target = src + i + insn.length + insn.immediate.value;
+ if (target < src || target > src + src_len) {
+ apply_reloc(insn.immediate.nbytes,
+ instr + i + insn_offset_immediate(&insn),
+ src - dest);
+ }
+ }
+
+ if (insn_rip_relative(&insn)) {
+ u8 *target = src + i + insn.length + insn.displacement.value;
+ if (target < src || target > src + src_len) {
+ apply_reloc(insn.displacement.nbytes,
+ instr + i + insn_offset_displacement(&insn),
+ src - dest);
+ }
+ }
+
+ /*
+ * See if this and any potentially following NOPs can be
+ * optimized.
+ */
+ if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
+ i += optimize_nops_range(instr, len, i);
+ else
+ i += insn.length;
+
+ if (i >= len)
+ return;
+ }
+}
+
/*
* Replace instructions with better alternatives for this CPU type. This runs
* before SMP is initialized to avoid SMP problems with self modifying code.
@@ -296,8 +303,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
* - feature not present but ALTINSTR_FLAG_INV is set to mean,
* patch if feature is *NOT* present.
*/
- if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV))
- goto next;
+ if (!boot_cpu_has(feature) == !(a->cpuid & ALTINSTR_FLAG_INV)) {
+ optimize_nops(instr, a->instrlen);
+ continue;
+ }

DPRINTK("feat: %s%d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d)",
(a->cpuid & ALTINSTR_FLAG_INV) ? "!" : "",
@@ -306,37 +315,20 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
instr, instr, a->instrlen,
replacement, a->replacementlen);

- DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
- DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);

memcpy(insn_buff, replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;

- /*
- * 0xe8 is a relative jump; fix the offset.
- *
- * Instruction length is checked before the opcode to avoid
- * accessing uninitialized bytes for zero-length replacements.
- */
- if (a->replacementlen == 5 && *insn_buff == 0xe8) {
- *(s32 *)(insn_buff + 1) += replacement - instr;
- DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
- *(s32 *)(insn_buff + 1),
- (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
- }
-
- if (a->replacementlen && is_jmp(replacement[0]))
- recompute_jump(a, instr, replacement, insn_buff);
-
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
insn_buff[insn_buff_sz] = 0x90;

+ apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen);
+
+ DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
+ DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);

text_poke_early(instr, insn_buff, insn_buff_sz);
-
-next:
- optimize_nops(instr, a->instrlen);
}
}

@@ -1334,6 +1326,35 @@ static noinline void __init int3_selftest(void)
unregister_die_notifier(&int3_exception_nb);
}

+static __initdata int __alt_reloc_selftest_addr;
+
+__visible noinline void __init __alt_reloc_selftest(void *arg)
+{
+ WARN_ON(arg != &__alt_reloc_selftest_addr);
+}
+
+static noinline void __init alt_reloc_selftest(void)
+{
+ /*
+ * Tests apply_relocation().
+ *
+ * This has a relative immediate (CALL) in a place other than the first
+ * instruction and additionally on x86_64 we get a RIP-relative LEA:
+ *
+ * lea 0x0(%rip),%rdi # 5d0: R_X86_64_PC32 .init.data+0x5566c
+ * call +0 # 5d5: R_X86_64_PLT32 __alt_reloc_selftest-0x4
+ *
+ * Getting this wrong will either crash and burn or tickle the WARN
+ * above.
+ */
+ asm_inline volatile (
+ ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS)
+ : /* output */
+ : [mem] "m" (__alt_reloc_selftest_addr)
+ : _ASM_ARG1
+ );
+}
+
void __init alternative_instructions(void)
{
int3_selftest();
@@ -1421,6 +1442,8 @@ void __init alternative_instructions(void)

restart_nmi();
alternatives_patched = 1;
+
+ alt_reloc_selftest();
}

/**
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 7c97b7391279..799ad6bb72e5 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -42,13 +42,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
struct instruction *insn,
struct reloc *reloc)
{
- /*
- * The x86 alternatives code adjusts the offsets only when it
- * encounters a branch instruction at the very beginning of the
- * replacement group.
- */
- return insn->offset == special_alt->new_off &&
- (insn->type == INSN_CALL || is_jump(insn));
+ return true;
}

/*


2023-02-01 17:02:46

by Kirill A. Shutemov

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Wed, Feb 01, 2023 at 03:10:33PM +0100, Peter Zijlstra wrote:
> +static void __init_or_module noinline
> +apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
> +{
> + struct insn insn;
> + int i = 0;
> +
> + for (;;) {
> + if (insn_decode_kernel(&insn, &instr[i]))
> + return;
> +
> + switch (insn.opcode.bytes[0]) {
> + case 0x0f:
> + if (insn.opcode.bytes[1] < 0x80 ||
> + insn.opcode.bytes[1] > 0x8f)
> + break;
> +
> + fallthrough; /* Jcc.d32 */
> + case 0x70 ... 0x7f: /* Jcc.d8 */
> + case JMP8_INSN_OPCODE:
> + case JMP32_INSN_OPCODE:
> + case CALL_INSN_OPCODE:
> + u8 *target = src + i + insn.length + insn.immediate.value;

LLVM doesn't like this declaration of 'target'. I've moved to the top of
the function.

I've tested the patch with both GCC and LLVM (after 'target' fix) with my
LAM case. See the patch below.

Works fine. Thanks.

Tested-by: Kirill A. Shutemov <[email protected]>

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 6450a2723bcd..ec7613141db6 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -35,17 +35,12 @@ static inline unsigned long __untagged_addr(unsigned long addr)
{
long sign;

- /*
- * Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
- * in alternative instructions. The relocation gets wrong when gets
- * copied to the target place.
- */
asm (ALTERNATIVE("",
"sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */
- "or %%gs:tlbstate_untag_mask, %[sign]\n\t"
+ "or " __percpu_arg([mask]) ", %[sign]\n\t"
"and %[sign], %[addr]\n\t", X86_FEATURE_LAM)
: [addr] "+r" (addr), [sign] "=r" (sign)
- : "m" (tlbstate_untag_mask), "[sign]" (addr));
+ : [mask] "m" (tlbstate_untag_mask), "[sign]" (addr));

return addr;
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 50b88aa3ebee..bb1437ac2f5a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -212,6 +212,7 @@ static void __init_or_module noinline
apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
{
struct insn insn;
+ u8 *target;
int i = 0;

for (;;) {
@@ -229,7 +230,7 @@ apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
case JMP8_INSN_OPCODE:
case JMP32_INSN_OPCODE:
case CALL_INSN_OPCODE:
- u8 *target = src + i + insn.length + insn.immediate.value;
+ target = src + i + insn.length + insn.immediate.value;
if (target < src || target > src + src_len) {
apply_reloc(insn.immediate.nbytes,
instr + i + insn_offset_immediate(&insn),
--
Kiryl Shutsemau / Kirill A. Shutemov

2023-02-03 13:13:25

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Wed, Feb 01, 2023 at 03:10:33PM +0100, Peter Zijlstra wrote:
> (sorry for the repost, forgot lkml)

Btw, you need to rediff this against tip/master. There's alt_instr.flags
change in there which conflicts with yours.

> +static void __always_inline
> +apply_reloc(int n, void *ptr, uintptr_t diff)
> +{
> + switch (n) {
> + case 1: apply_reloc_n(8, ptr, diff); break;
> + case 2: apply_reloc_n(16, ptr, diff); break;
> + case 4: apply_reloc_n(32, ptr, diff); break;
> + default: BUG();
> + }
> +}
> +
> +static void __init_or_module noinline
> +apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)

First param is instr, third is dest.

at the call site you have

apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen);

and instr is third param. Let's call the function params:

static void __init_or_module noinline
apply_relocation(u8 *insn_buff, size_t len, u8 *instr, u8 *repl, size_t repl_len)

for less confusion pls.

> +{
> + struct insn insn;
> + int i = 0;
> +
> + for (;;) {
> + if (insn_decode_kernel(&insn, &instr[i]))

I guess say a warning here so that we catch when it goes into the fields early.

> + return;
> +
> + switch (insn.opcode.bytes[0]) {
> + case 0x0f:
> + if (insn.opcode.bytes[1] < 0x80 ||
> + insn.opcode.bytes[1] > 0x8f)
> + break;
> +
> + fallthrough; /* Jcc.d32 */
> + case 0x70 ... 0x7f: /* Jcc.d8 */
> + case JMP8_INSN_OPCODE:
> + case JMP32_INSN_OPCODE:
> + case CALL_INSN_OPCODE:
> + u8 *target = src + i + insn.length + insn.immediate.value;
> + if (target < src || target > src + src_len) {
> + apply_reloc(insn.immediate.nbytes,
> + instr + i + insn_offset_immediate(&insn),
> + src - dest);
> + }

Uff, it took me a while to parse this. So this can be simpler. The basic
math is:

new_offset = next_rip - target_address;

where
next_rip = instr + insn.length;

and I admit that my function was a bit clumsy but I think yours can be
made simpler while keeping it cleaner.

Also, calling this all "reloc" here is kinda confusing because this is not a
relocation but the offset from the next RIP to the target of the
JMP/CALL.

Lemme think about it a bit more.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2023-02-03 15:05:38

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Fri, Feb 03, 2023 at 02:13:13PM +0100, Borislav Petkov wrote:
> On Wed, Feb 01, 2023 at 03:10:33PM +0100, Peter Zijlstra wrote:
> > (sorry for the repost, forgot lkml)
>
> Btw, you need to rediff this against tip/master. There's alt_instr.flags
> change in there which conflicts with yours.
>
> > +static void __always_inline
> > +apply_reloc(int n, void *ptr, uintptr_t diff)
> > +{
> > + switch (n) {
> > + case 1: apply_reloc_n(8, ptr, diff); break;
> > + case 2: apply_reloc_n(16, ptr, diff); break;
> > + case 4: apply_reloc_n(32, ptr, diff); break;
> > + default: BUG();
> > + }
> > +}
> > +
> > +static void __init_or_module noinline
> > +apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
>
> First param is instr, third is dest.
>
> at the call site you have
>
> apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen);
>
> and instr is third param. Let's call the function params:
>
> static void __init_or_module noinline
> apply_relocation(u8 *insn_buff, size_t len, u8 *instr, u8 *repl, size_t repl_len)

How about:

apply_relocation(u8 *buf, size_t len, u8 *dst, u8 *src, size_t src_len)

Because I get horribly confused by the whole instr and repl thing.

> > +{
> > + struct insn insn;
> > + int i = 0;
> > +
> > + for (;;) {
> > + if (insn_decode_kernel(&insn, &instr[i]))
>
> I guess say a warning here so that we catch when it goes into the fields early.

Sure..

> > + return;
> > +
> > + switch (insn.opcode.bytes[0]) {
> > + case 0x0f:
> > + if (insn.opcode.bytes[1] < 0x80 ||
> > + insn.opcode.bytes[1] > 0x8f)
> > + break;
> > +
> > + fallthrough; /* Jcc.d32 */
> > + case 0x70 ... 0x7f: /* Jcc.d8 */
> > + case JMP8_INSN_OPCODE:
> > + case JMP32_INSN_OPCODE:
> > + case CALL_INSN_OPCODE:
> > + u8 *target = src + i + insn.length + insn.immediate.value;
> > + if (target < src || target > src + src_len) {
> > + apply_reloc(insn.immediate.nbytes,
> > + instr + i + insn_offset_immediate(&insn),
> > + src - dest);
> > + }
>
> Uff, it took me a while to parse this. So this can be simpler. The basic
> math is:
>
> new_offset = next_rip - target_address;
>
> where
> next_rip = instr + insn.length;
>
> and I admit that my function was a bit clumsy but I think yours can be
> made simpler while keeping it cleaner.

I'm not sure what you're saying here... so let me walk through the whole
thing (specifically the immediate case, since that's what you quote). So
what we need is:

src_imm = target - src_next_ip (1)

what we want is:

dst_imm = target - dst_next_ip (2)

so what we do is rework (1) as an expression for target like:

target = src_imm + src_next_ip (1a)

and substitute in (2) to get:

dst_imm = (src_imm + src_next_ip) - dst_next_ip (3)

Now, since the instruction stream is 'identical' at src and dest
we can state that:

src_next_ip = src + ip_offset
dst_next_ip = dst + ip_offset (4)

Substitute (4) in (3) and observe ip_offset being cancelled out to
obtain:

dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset)
= src_imm + src - dst + ip_offset - ip_offset
= src_imm + src - dst (5)

The very thing I did.

IOW, we can correct the displacement without caring about which actual
instruction in the stream we're correcting since the relative offset is
given by 'src - dst'. IOW, we don't give a crap about insn.length in
this case ;-)

> Also, calling this all "reloc" here is kinda confusing because this is not a
> relocation but the offset from the next RIP to the target of the
> JMP/CALL.

Well, we do relocate the instructions... They go from one place to
another.

2023-02-03 15:17:19

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Fri, Feb 03, 2023 at 04:04:55PM +0100, Peter Zijlstra wrote:

> > > + switch (insn.opcode.bytes[0]) {
> > > + case 0x0f:
> > > + if (insn.opcode.bytes[1] < 0x80 ||
> > > + insn.opcode.bytes[1] > 0x8f)
> > > + break;
> > > +
> > > + fallthrough; /* Jcc.d32 */
> > > + case 0x70 ... 0x7f: /* Jcc.d8 */
> > > + case JMP8_INSN_OPCODE:
> > > + case JMP32_INSN_OPCODE:
> > > + case CALL_INSN_OPCODE:
> > > + u8 *target = src + i + insn.length + insn.immediate.value;
> > > + if (target < src || target > src + src_len) {
> > > + apply_reloc(insn.immediate.nbytes,
> > > + instr + i + insn_offset_immediate(&insn),
> > > + src - dest);
> > > + }
> >
> > Uff, it took me a while to parse this. So this can be simpler. The basic
> > math is:
> >
> > new_offset = next_rip - target_address;
> >
> > where
> > next_rip = instr + insn.length;
> >
> > and I admit that my function was a bit clumsy but I think yours can be
> > made simpler while keeping it cleaner.
>
> I'm not sure what you're saying here... so let me walk through the whole
> thing (specifically the immediate case, since that's what you quote). So
> what we need is:
>
> src_imm = target - src_next_ip (1)
>
> what we want is:
>
> dst_imm = target - dst_next_ip (2)
>
> so what we do is rework (1) as an expression for target like:
>
> target = src_imm + src_next_ip (1a)
>
> and substitute in (2) to get:
>
> dst_imm = (src_imm + src_next_ip) - dst_next_ip (3)
>
> Now, since the instruction stream is 'identical' at src and dest
> we can state that:
>
> src_next_ip = src + ip_offset
> dst_next_ip = dst + ip_offset (4)
>
> Substitute (4) in (3) and observe ip_offset being cancelled out to
> obtain:
>
> dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset)
> = src_imm + src - dst + ip_offset - ip_offset
> = src_imm + src - dst (5)
>
> The very thing I did.
>
> IOW, we can correct the displacement without caring about which actual
> instruction in the stream we're correcting since the relative offset is
> given by 'src - dst'. IOW, we don't give a crap about insn.length in
> this case ;-)

Specifically, in the above case ip_offset would be given by:

ip_offset = i + insn.length

where i is the offset in the buffer to the current instruction and
insn.length is well, the length of the current instruction ;-)

2023-02-03 16:04:47

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Fri, Feb 03, 2023 at 04:04:55PM +0100, Peter Zijlstra wrote:
> How about:
>
> apply_relocation(u8 *buf, size_t len, u8 *dst, u8 *src, size_t src_len)
>
> Because I get horribly confused by the whole instr and repl thing.

And I get confused by the @src thing. :-)

When it says repl, I know it is the replacement instruction and its
length. You could add kernel-doc as a help I guess...

Rest on IRC. :)

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2023-02-03 16:46:57

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Fri, Feb 03, 2023 at 05:04:35PM +0100, Borislav Petkov wrote:
> Rest on IRC. :)

Here's what I'm thinking. It still fails somewhere while booting so it
is not good yet but the idea is to show what I mean:

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 0eff4033eea4..2ede814de089 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -208,14 +208,23 @@ apply_reloc(int n, void *ptr, uintptr_t diff)
}
}

+static void __always_inline apply_new_offset(int immed_size, void *ptr, s32 new_offset)
+{
+ switch (immed_size) {
+ case 1: *(s8 *)ptr = (s8) new_offset; break;
+ case 2: *(s16 *)ptr = (s16)new_offset; break;
+ case 4: *(s32 *)ptr = new_offset; break;
+ default: BUG();
+ }
+}
static void __init_or_module noinline
-apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
+apply_relocation(u8 *insn_buff, size_t len, u8 *instr, u8 *repl, size_t repl_len)
{
struct insn insn;
int i = 0;

for (;;) {
- if (insn_decode_kernel(&insn, &instr[i]))
+ if (insn_decode_kernel(&insn, &insn_buff[i]))
return;

switch (insn.opcode.bytes[0]) {
@@ -229,20 +238,27 @@ apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
case JMP8_INSN_OPCODE:
case JMP32_INSN_OPCODE:
case CALL_INSN_OPCODE:
- u8 *target = src + i + insn.length + insn.immediate.value;
- if (target < src || target > src + src_len) {
- apply_reloc(insn.immediate.nbytes,
- instr + i + insn_offset_immediate(&insn),
- src - dest);
- }
+ u8 *target = repl + i + insn.length + insn.immediate.value;
+ u8 *next_rip = instr + insn.length;
+ s32 new_offset;
+
+ if (instr < target)
+ new_offset = target - next_rip;
+ else
+ new_offset = next_rip - target;
+
+ apply_new_offset(insn.immediate.nbytes,
+ insn_buff + i + insn_offset_immediate(&insn),
+ new_offset);
+
}

if (insn_rip_relative(&insn)) {

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2023-02-03 23:26:01

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Fri, Feb 03, 2023 at 05:46:47PM +0100, Borislav Petkov wrote:
> On Fri, Feb 03, 2023 at 05:04:35PM +0100, Borislav Petkov wrote:
> > Rest on IRC. :)
>
> Here's what I'm thinking. It still fails somewhere while booting so it
> is not good yet but the idea is to show what I mean:

Yeah, forget it. I need both next_rip at the place we're patching and
next_rip in the .altinstr_replacement section. And by the time I do
that, it won't get any prettier.

And I think yours solves that more elegantly but please document the
math transformation to compute the new offset.

Also, pls do this:

/*
* Do not recompute the offset if the target is within the
* patched insn block.
*/
if (target < repl || target > repl + repl_len)

to hint that you don't have to replace the offsets which are already
correct when a whole set of insns is being patched in.

FILL_RETURN_BUFFER was one example. :)

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2023-02-06 14:29:20

by Borislav Petkov

[permalink] [raw]
Subject: Re: [RFC] x86/alternative: Support relocations in alternatives

On Wed, Feb 01, 2023 at 03:10:33PM +0100, Peter Zijlstra wrote:
> +apply_relocation(u8 *instr, size_t len, u8 *dest, u8 *src, size_t src_len)
> +{
> + struct insn insn;
> + int i = 0;
> +
> + for (;;) {
> + if (insn_decode_kernel(&insn, &instr[i]))
> + return;
> +
> + switch (insn.opcode.bytes[0]) {
> + case 0x0f:
> + if (insn.opcode.bytes[1] < 0x80 ||
> + insn.opcode.bytes[1] > 0x8f)
> + break;
> +
> + fallthrough; /* Jcc.d32 */
> + case 0x70 ... 0x7f: /* Jcc.d8 */
> + case JMP8_INSN_OPCODE:
> + case JMP32_INSN_OPCODE:
> + case CALL_INSN_OPCODE:
> + u8 *target = src + i + insn.length + insn.immediate.value;
> + if (target < src || target > src + src_len) {
> + apply_reloc(insn.immediate.nbytes,
> + instr + i + insn_offset_immediate(&insn),
> + src - dest);

Ok, here's an addition to convert to 2-byte JMPs. I'll do a proper diff
after you refresh yours ontop but this is how it looks like. It
basically does one more pass on the instr[] array bytes after the
relocation and only for JMP rel32off insns, i.e., opcode 0xe9.

Example would be:

[ 1.059455] SMP alternatives: feat: 3*32+21, old: (fpu_clone+0x115/0x290 (ffffffff8102e2e5) len: 5), repl: (ffffffff898ac1b6, len: 5)
[ 1.060747] SMP alternatives: apply_reloc: n: 4, ptr: 0xffffffff82203d93, diff: 0x887ded1
^^^^^^^^^^

That's the diff you pass to apply_reloc_n()

[ 1.062692] SMP alternatives: ffffffff8102e2e5: old_insn: e9 dd b8 6a 08
[ 1.063452] SMP alternatives: ffffffff898ac1b6: rpl_insn: e9 41 21 78 f7

But the end offset is simply 0x15. (0x12 with the 5-byte JMP).

So we can just as well do JMP rel8off which takes a signed byte as an
offset. And slap a 3-byte NOP after that:

[ 1.064211] SMP alternatives: ffffffff8102e2e5: final_insn: eb 15 0f 1f 00

...
case JMP32_INSN_OPCODE:
case CALL_INSN_OPCODE:
u8 *target = src + i + insn.length + insn.immediate.value;
u8 opcode = instr[i];

if (target < src || target > src + src_len) {
apply_reloc(insn.immediate.nbytes,
instr + i + insn_offset_immediate(&insn),
src - dest);

#define JMP_SIZE_DIFF JMP32_INSN_SIZE - JMP8_INSN_SIZE
if (opcode == JMP32_INSN_OPCODE) {
s32 jmp_off;

jmp_off = *(s32 *)(instr + i + insn_offset_immediate(&insn));
jmp_off += JMP_SIZE_DIFF;

/* Turn it into a 2-byte JMP if new offset allows. */
if (jmp_off >= -128 && jmp_off <= 127) {
instr[i] = JMP8_INSN_OPCODE;
instr[i + 1] = (s8)jmp_off;
add_nops(instr + i + 2, JMP_SIZE_DIFF);
}
}
}
}

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette