2024-03-16 23:21:24

by Joan Bruguera Micó

[permalink] [raw]
Subject: [PATCH 0/2] x86/bpf: Fixes for the BPF JIT with retbleed=stuff

Fixes two issues that cause kernels panic when using the BPF JIT with
the call depth tracking / stuffing mitigation for Skylake processors
(`retbleed=stuff`). Both issues can be triggered by running simple
BPF programs (e.g. running the test suite should trigger both).

The first (resubmit) fixes a trivial issue related to calculating the
destination IP for call instructions with call depth tracking.

The second is related to using the correct IP for relocations, related
to the recently introduced %rip-relative addressing for PER_CPU_VAR.

Joan Bruguera Micó (2):
x86/bpf: Fix IP after emitting call depth accounting
x86/bpf: Fix IP for relocating call depth accounting

arch/x86/include/asm/alternative.h | 4 ++--
arch/x86/kernel/callthunks.c | 4 ++--
arch/x86/net/bpf_jit_comp.c | 22 ++++++++++------------
3 files changed, 14 insertions(+), 16 deletions(-)

--
2.44.0



2024-03-16 23:21:35

by Joan Bruguera Micó

[permalink] [raw]
Subject: [PATCH 1/2] x86/bpf: Fix IP after emitting call depth accounting

Adjust the IP passed to `emit_patch` so it calculates the correct offset
for the CALL instruction if `x86_call_depth_emit_accounting` emits code.
Otherwise we will skip some instructions and most likely crash.

Fixes: b2e9dfe54be4 ("x86/bpf: Emit call depth accounting if required")
Link: https://lore.kernel.org/lkml/[email protected]/
Signed-off-by: Joan Bruguera Micó <[email protected]>
---
arch/x86/net/bpf_jit_comp.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a7ba8e178645..09f7dc9d4d65 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -479,9 +479,10 @@ static int emit_call(u8 **pprog, void *func, void *ip)

static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
+ void *adjusted_ip;
OPTIMIZER_HIDE_VAR(func);
- x86_call_depth_emit_accounting(pprog, func);
- return emit_patch(pprog, func, ip, 0xE8);
+ adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func);
+ return emit_patch(pprog, func, adjusted_ip, 0xE8);
}

static int emit_jump(u8 **pprog, void *func, void *ip)
--
2.44.0


2024-03-16 23:21:46

by Joan Bruguera Micó

[permalink] [raw]
Subject: [PATCH 2/2] x86/bpf: Fix IP for relocating call depth accounting

The recently introduced support for %rip-relative relocations in the
call thunk template assumes that the code is being patched in-place,
so the destination of the relocation matches the address of the code.
This is not true for the call depth accounting emitted by the BPF JIT,
so the calculated address is wrong and usually causes a page fault.

Pass the destination IP when the BPF JIT emits call depth accounting.

Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")
Signed-off-by: Joan Bruguera Micó <[email protected]>
---
arch/x86/include/asm/alternative.h | 4 ++--
arch/x86/kernel/callthunks.c | 4 ++--
arch/x86/net/bpf_jit_comp.c | 19 ++++++++-----------
3 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index fcd20c6dc7f9..67b68d0d17d1 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
struct module *mod);
extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
#else
static __always_inline void callthunks_patch_builtin_calls(void) {}
static __always_inline void
@@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
return dest;
}
static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
- void *func)
+ void *func, void *ip)
{
return 0;
}
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 30335182b6b0..e92ff0c11db8 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
return !bcmp(pad, insn_buff, tmpl_size);
}

-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
return 0;

memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
- apply_relocation(insn_buff, tmpl_size, *pprog,
+ apply_relocation(insn_buff, tmpl_size, ip,
skl_call_thunk_template, tmpl_size);

memcpy(*pprog, insn_buff, tmpl_size);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 09f7dc9d4d65..f2e8769f5eee 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -481,7 +481,7 @@ static int emit_rsb_call(u8 **pprog, void *func, void *ip)
{
void *adjusted_ip;
OPTIMIZER_HIDE_VAR(func);
- adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func);
+ adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func, ip);
return emit_patch(pprog, func, adjusted_ip, 0xE8);
}

@@ -1973,20 +1973,17 @@ st: if (is_imm8(insn->off))

/* call */
case BPF_JMP | BPF_CALL: {
- int offs;
+ u8 *ip = image + addrs[i - 1];

func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
- if (!imm32)
- return -EINVAL;
- offs = 7 + x86_call_depth_emit_accounting(&prog, func);
- } else {
- if (!imm32)
- return -EINVAL;
- offs = x86_call_depth_emit_accounting(&prog, func);
+ ip += 7;
}
- if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+ if (!imm32)
+ return -EINVAL;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+ if (emit_call(&prog, func, ip))
return -EINVAL;
break;
}
@@ -2836,7 +2833,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* Direct-call fentry stub, as such it needs accounting for the
* __fentry__ call.
*/
- x86_call_depth_emit_accounting(&prog, NULL);
+ x86_call_depth_emit_accounting(&prog, NULL, image);
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
--
2.44.0


2024-03-18 07:12:35

by Uros Bizjak

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/bpf: Fix IP for relocating call depth accounting

On Sun, Mar 17, 2024 at 12:21 AM Joan Bruguera Micó
<[email protected]> wrote:
>
> The recently introduced support for %rip-relative relocations in the
> call thunk template assumes that the code is being patched in-place,
> so the destination of the relocation matches the address of the code.
> This is not true for the call depth accounting emitted by the BPF JIT,
> so the calculated address is wrong and usually causes a page fault.
>
> Pass the destination IP when the BPF JIT emits call depth accounting.
>
> Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")
> Signed-off-by: Joan Bruguera Micó <[email protected]>

Reviewed-by: Uros Bizjak <[email protected]>

> ---
> arch/x86/include/asm/alternative.h | 4 ++--
> arch/x86/kernel/callthunks.c | 4 ++--
> arch/x86/net/bpf_jit_comp.c | 19 ++++++++-----------
> 3 files changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
> index fcd20c6dc7f9..67b68d0d17d1 100644
> --- a/arch/x86/include/asm/alternative.h
> +++ b/arch/x86/include/asm/alternative.h
> @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
> extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
> struct module *mod);
> extern void *callthunks_translate_call_dest(void *dest);
> -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
> +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
> #else
> static __always_inline void callthunks_patch_builtin_calls(void) {}
> static __always_inline void
> @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
> return dest;
> }
> static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
> - void *func)
> + void *func, void *ip)
> {
> return 0;
> }
> diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
> index 30335182b6b0..e92ff0c11db8 100644
> --- a/arch/x86/kernel/callthunks.c
> +++ b/arch/x86/kernel/callthunks.c
> @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
> return !bcmp(pad, insn_buff, tmpl_size);
> }
>
> -int x86_call_depth_emit_accounting(u8 **pprog, void *func)
> +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
> {
> unsigned int tmpl_size = SKL_TMPL_SIZE;
> u8 insn_buff[MAX_PATCH_LEN];
> @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
> return 0;
>
> memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
> - apply_relocation(insn_buff, tmpl_size, *pprog,
> + apply_relocation(insn_buff, tmpl_size, ip,
> skl_call_thunk_template, tmpl_size);
>
> memcpy(*pprog, insn_buff, tmpl_size);
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 09f7dc9d4d65..f2e8769f5eee 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -481,7 +481,7 @@ static int emit_rsb_call(u8 **pprog, void *func, void *ip)
> {
> void *adjusted_ip;
> OPTIMIZER_HIDE_VAR(func);
> - adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func);
> + adjusted_ip = ip + x86_call_depth_emit_accounting(pprog, func, ip);
> return emit_patch(pprog, func, adjusted_ip, 0xE8);
> }
>
> @@ -1973,20 +1973,17 @@ st: if (is_imm8(insn->off))
>
> /* call */
> case BPF_JMP | BPF_CALL: {
> - int offs;
> + u8 *ip = image + addrs[i - 1];
>
> func = (u8 *) __bpf_call_base + imm32;
> if (tail_call_reachable) {
> RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
> - if (!imm32)
> - return -EINVAL;
> - offs = 7 + x86_call_depth_emit_accounting(&prog, func);
> - } else {
> - if (!imm32)
> - return -EINVAL;
> - offs = x86_call_depth_emit_accounting(&prog, func);
> + ip += 7;
> }
> - if (emit_call(&prog, func, image + addrs[i - 1] + offs))
> + if (!imm32)
> + return -EINVAL;
> + ip += x86_call_depth_emit_accounting(&prog, func, ip);
> + if (emit_call(&prog, func, ip))
> return -EINVAL;
> break;
> }
> @@ -2836,7 +2833,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
> * Direct-call fentry stub, as such it needs accounting for the
> * __fentry__ call.
> */
> - x86_call_depth_emit_accounting(&prog, NULL);
> + x86_call_depth_emit_accounting(&prog, NULL, image);
> }
> EMIT1(0x55); /* push rbp */
> EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
> --
> 2.44.0
>

2024-03-21 09:05:21

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/2] x86/bpf: Fix IP for relocating call depth accounting


* Joan Bruguera Mic? <[email protected]> wrote:

> The recently introduced support for %rip-relative relocations in the
> call thunk template assumes that the code is being patched in-place,
> so the destination of the relocation matches the address of the code.
> This is not true for the call depth accounting emitted by the BPF JIT,
> so the calculated address is wrong and usually causes a page fault.
>
> Pass the destination IP when the BPF JIT emits call depth accounting.
>
> Fixes: 17bce3b2ae2d ("x86/callthunks: Handle %rip-relative relocations in call thunk template")
> Signed-off-by: Joan Bruguera Mic? <[email protected]>
> ---
> arch/x86/include/asm/alternative.h | 4 ++--
> arch/x86/kernel/callthunks.c | 4 ++--

For the generic x86 changes - I guess you want to push this upstream via
the networking tree:

Acked-by: Ingo Molnar <[email protected]>

Thanks,

Ingo