2023-11-02 11:29:17

by Uros Bizjak

[permalink] [raw]
Subject: [PATCH 2/3] x86/callthunks: Handle %rip-relative relocations in call thunk template

Contrary to alternatives, relocations are currently not supported in
call thunk templates. Implement minimal support for relocations
when copying template from its storage location to handle %rip-relative
addresses.

Support for relocations will be needed when PER_CPU_VAR macro switches
to %rip-relative addressing.

The patch allows unification of ASM_INCREMENT_CALL_DEPTH, which already
uses PER_CPU_VAR macro, with INCREMENT_CALL_DEPTH, used in call thunk
template, which is currently limited to use absolute address.

Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Uros Bizjak <[email protected]>
---
arch/x86/kernel/callthunks.c | 63 ++++++++++++++++++++++++++++++++----
1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index d0922cf94c90..bda09d82bff7 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -24,6 +24,8 @@

static int __initdata_or_module debug_callthunks;

+#define MAX_PATCH_LEN (255-1)
+
#define prdbg(fmt, args...) \
do { \
if (debug_callthunks) \
@@ -166,13 +168,51 @@ static const u8 nops[] = {
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
};

+#define apply_reloc_n(n_, p_, d_) \
+ do { \
+ s32 v = *(s##n_ *)(p_); \
+ v += (d_); \
+ BUG_ON((v >> 31) != (v >> (n_-1))); \
+ *(s##n_ *)(p_) = (s##n_)v; \
+ } while (0)
+
+static __always_inline
+void apply_reloc(int n, void *ptr, uintptr_t diff)
+{
+ switch (n) {
+ case 4: apply_reloc_n(32, ptr, diff); break;
+ default: BUG();
+ }
+}
+
+static void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src)
+{
+ for (int next, i = 0; i < len; i = next) {
+ struct insn insn;
+
+ if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
+ return;
+
+ next = i + insn.length;
+
+ if (insn_rip_relative(&insn))
+ apply_reloc(insn.displacement.nbytes,
+ buf + i + insn_offset_displacement(&insn),
+ src - dest);
+ }
+}
+
static void *patch_dest(void *dest, bool direct)
{
unsigned int tsize = SKL_TMPL_SIZE;
+ u8 insn_buff[MAX_PATCH_LEN];
u8 *pad = dest - tsize;

+ memcpy(insn_buff, skl_call_thunk_template, tsize);
+ apply_relocation(insn_buff, tsize, pad, skl_call_thunk_template);
+
/* Already patched? */
- if (!bcmp(pad, skl_call_thunk_template, tsize))
+ if (!bcmp(pad, insn_buff, tsize))
return pad;

/* Ensure there are nops */
@@ -182,9 +222,9 @@ static void *patch_dest(void *dest, bool direct)
}

if (direct)
- memcpy(pad, skl_call_thunk_template, tsize);
+ memcpy(pad, insn_buff, tsize);
else
- text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
+ text_poke_copy_locked(pad, insn_buff, tsize, true);
return pad;
}

@@ -281,20 +321,26 @@ void *callthunks_translate_call_dest(void *dest)
static bool is_callthunk(void *addr)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];
unsigned long dest;
+ u8 *pad;

dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
if (!thunks_initialized || skip_addr((void *)dest))
return false;

- return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
+ *pad = dest - tmpl_size;
+
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, pad, skl_call_thunk_template);
+
+ return !bcmp(pad, insn_buff, tmpl_size);
}

int x86_call_depth_emit_accounting(u8 **pprog, void *func)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];

if (!thunks_initialized)
return 0;
@@ -303,7 +349,10 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
if (func && is_callthunk(func))
return 0;

- memcpy(*pprog, tmpl, tmpl_size);
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, *pprog, skl_call_thunk_template);
+
+ memcpy(*pprog, insn_buff, tmpl_size);
*pprog += tmpl_size;
return tmpl_size;
}
--
2.41.0


2023-11-02 11:48:14

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/callthunks: Handle %rip-relative relocations in call thunk template

On Thu, Nov 02, 2023 at 12:25:47PM +0100, Uros Bizjak wrote:

> @@ -166,13 +168,51 @@ static const u8 nops[] = {
> 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
> };
>
> +#define apply_reloc_n(n_, p_, d_) \
> + do { \
> + s32 v = *(s##n_ *)(p_); \
> + v += (d_); \
> + BUG_ON((v >> 31) != (v >> (n_-1))); \
> + *(s##n_ *)(p_) = (s##n_)v; \
> + } while (0)
> +
> +static __always_inline
> +void apply_reloc(int n, void *ptr, uintptr_t diff)
> +{
> + switch (n) {
> + case 4: apply_reloc_n(32, ptr, diff); break;
> + default: BUG();
> + }
> +}
> +
> +static void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src)
> +{
> + for (int next, i = 0; i < len; i = next) {
> + struct insn insn;
> +
> + if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
> + return;
> +
> + next = i + insn.length;
> +
> + if (insn_rip_relative(&insn))
> + apply_reloc(insn.displacement.nbytes,
> + buf + i + insn_offset_displacement(&insn),
> + src - dest);
> + }
> +}

Isn't it simpler to use apply_relocation() from alternative.c?

Remove static, add decl, stuff like that?

2023-11-02 11:50:49

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/callthunks: Handle %rip-relative relocations in call thunk template

On Thu, Nov 2, 2023 at 12:44 PM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Nov 02, 2023 at 12:25:47PM +0100, Uros Bizjak wrote:
>
> > @@ -166,13 +168,51 @@ static const u8 nops[] = {
> > 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
> > };
> >
> > +#define apply_reloc_n(n_, p_, d_) \
> > + do { \
> > + s32 v = *(s##n_ *)(p_); \
> > + v += (d_); \
> > + BUG_ON((v >> 31) != (v >> (n_-1))); \
> > + *(s##n_ *)(p_) = (s##n_)v; \
> > + } while (0)
> > +
> > +static __always_inline
> > +void apply_reloc(int n, void *ptr, uintptr_t diff)
> > +{
> > + switch (n) {
> > + case 4: apply_reloc_n(32, ptr, diff); break;
> > + default: BUG();
> > + }
> > +}
> > +
> > +static void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src)
> > +{
> > + for (int next, i = 0; i < len; i = next) {
> > + struct insn insn;
> > +
> > + if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
> > + return;
> > +
> > + next = i + insn.length;
> > +
> > + if (insn_rip_relative(&insn))
> > + apply_reloc(insn.displacement.nbytes,
> > + buf + i + insn_offset_displacement(&insn),
> > + src - dest);
> > + }
> > +}
>
> Isn't it simpler to use apply_relocation() from alternative.c?

Yes, I was looking at that function, but somehow thought that it is a
bit overkill here, since we just need a %rip-relative reloc.

> Remove static, add decl, stuff like that?

On second thought, you are right. Should I move the above function
somewhere (reloc.c?) , or can I just use it from alternative.c and add
decl (where?) ?

Thanks,
Uros.

2023-11-02 11:57:25

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/callthunks: Handle %rip-relative relocations in call thunk template

On Thu, Nov 02, 2023 at 12:50:01PM +0100, Uros Bizjak wrote:

> > Remove static, add decl, stuff like that?
>
> On second thought, you are right. Should I move the above function
> somewhere (reloc.c?) , or can I just use it from alternative.c and add
> decl (where?) ?

Yeah, leave it there for now, perhaps asm/text-patching.h ?

2023-11-02 12:37:55

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86/callthunks: Handle %rip-relative relocations in call thunk template

On Thu, Nov 2, 2023 at 12:56 PM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Nov 02, 2023 at 12:50:01PM +0100, Uros Bizjak wrote:
>
> > > Remove static, add decl, stuff like that?
> >
> > On second thought, you are right. Should I move the above function
> > somewhere (reloc.c?) , or can I just use it from alternative.c and add
> > decl (where?) ?
>
> Yeah, leave it there for now, perhaps asm/text-patching.h ?

The new version boots OK and looks like the attached patch.

Uros.


Attachments:
reloc.diff.txt (3.50 kB)