2021-10-20 11:07:46

by Peter Zijlstra

[permalink] [raw]
Subject: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and
unconditionally emits a thunk call, this is sub-optimal and doesn't
match the regular, compiler generated, code.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/nospec-branch.h | 59 -----------------------------
arch/x86/net/bpf_jit_comp.c | 71 ++++++++++++++++++++---------------
arch/x86/net/bpf_jit_comp32.c | 22 ++++++++--
3 files changed, 59 insertions(+), 93 deletions(-)

--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -303,63 +303,4 @@ static inline void mds_idle_clear_cpu_bu

#endif /* __ASSEMBLY__ */

-/*
- * Below is used in the eBPF JIT compiler and emits the byte sequence
- * for the following assembly:
- *
- * With retpolines configured:
- *
- * callq do_rop
- * spec_trap:
- * pause
- * lfence
- * jmp spec_trap
- * do_rop:
- * mov %rcx,(%rsp) for x86_64
- * mov %edx,(%esp) for x86_32
- * retq
- *
- * Without retpolines configured:
- *
- * jmp *%rcx for x86_64
- * jmp *%edx for x86_32
- */
-#ifdef CONFIG_RETPOLINE
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 17
-# define RETPOLINE_RCX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
- EMIT1(0xC3); /* retq */ \
-} while (0)
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* call do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
- EMIT1(0xC3); /* ret */ \
-} while (0)
-# endif
-#else /* !CONFIG_RETPOLINE */
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 2
-# define RETPOLINE_RCX_BPF_JIT() \
- EMIT2(0xFF, 0xE1); /* jmp *%rcx */
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
- EMIT2(0xFF, 0xE2) /* jmp *%edx */
-# endif
-#endif
-
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -396,6 +396,37 @@ static int get_pop_bytes(bool *callee_re
return bytes;
}

+#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
+
+#ifdef CONFIG_RETPOLINE
+#define INDIRECT_SIZE (2 + 3*cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+#else
+#define INDIRECT_SIZE (2)
+#endif
+
+static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
+{
+ u8 *prog = *pprog;
+
+#ifdef CONFIG_RETPOLINE
+ static void * const reg_thunk[] = {
+#define GEN(reg) __x86_indirect_thunk_ ## reg,
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+ };
+
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ EMIT_LFENCE();
+ EMIT2(0xFF, 0xE0 + reg);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+ emit_jump(&prog, reg_thunk[reg], ip);
+ } else
+#endif
+ EMIT2(0xFF, 0xE0 + reg);
+
+ *pprog = prog;
+}
+
/*
* Generate the following code:
*
@@ -411,10 +442,10 @@ static int get_pop_bytes(bool *callee_re
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
- u32 stack_depth)
+ u32 stack_depth, u8 *ip)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
+ u8 *prog = *pprog, *start = *pprog;
int pop_bytes = 0;
int off1 = 42;
int off2 = 31;
@@ -448,7 +479,7 @@ static void emit_bpf_tail_call_indirect(
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
+#define OFFSET1 (off1 + INDIRECT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */

/*
@@ -457,7 +488,7 @@ static void emit_bpf_tail_call_indirect(
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
+#define OFFSET2 (off2 + INDIRECT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -471,7 +502,7 @@ static void emit_bpf_tail_call_indirect(
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
+#define OFFSET3 (off3 + INDIRECT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */

*pprog = prog;
@@ -493,7 +524,7 @@ static void emit_bpf_tail_call_indirect(
* rdi == ctx (1st arg)
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- RETPOLINE_RCX_BPF_JIT();
+ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));

/* out: */
*pprog = prog;
@@ -1220,8 +1251,7 @@ static int do_jit(struct bpf_prog *bpf_p
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
if (boot_cpu_has(X86_FEATURE_XMM2))
- /* Emit 'lfence' */
- EMIT3(0x0F, 0xAE, 0xE8);
+ EMIT_LFENCE();
break;

/* ST: *(u8*)(dst_reg + off) = imm */
@@ -1411,7 +1441,8 @@ st: if (is_imm8(insn->off))
else
emit_bpf_tail_call_indirect(&prog,
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ image + addrs[i - 1]);
break;

/* cond jump */
@@ -2117,24 +2148,6 @@ int arch_prepare_bpf_trampoline(struct b
return ret;
}

-static int emit_fallback_jump(u8 **pprog)
-{
- u8 *prog = *pprog;
- int err = 0;
-
-#ifdef CONFIG_RETPOLINE
- /* Note that this assumes the the compiler uses external
- * thunks for indirect calls. Both clang and GCC use the same
- * naming convention for external thunks.
- */
- err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
-#else
- EMIT2(0xFF, 0xE2); /* jmp rdx */
-#endif
- *pprog = prog;
- return err;
-}
-
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *prog = *pprog;
@@ -2156,9 +2169,7 @@ static int emit_bpf_dispatcher(u8 **ppro
if (err)
return err;

- err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
- if (err)
- return err;
+ emit_indirect_jump(&prog, 2 /* rdx */, prog);

*pprog = prog;
return 0;
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -15,6 +15,7 @@
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/asm-prototypes.h>
#include <linux/bpf.h>

/*
@@ -1267,6 +1268,19 @@ static void emit_epilogue(u8 **pprog, u3
*pprog = prog;
}

+static void emit_jmp_edx(u8 **pprog, u8 *ip)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+#ifdef CONFIG_RETPOLINE
+ EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
+#else
+ EMIT2(0xFF, 0xE2);
+#endif
+ *pprog = prog;
+}
+
/*
* Generate the following code:
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
@@ -1280,9 +1294,9 @@ static void emit_epilogue(u8 **pprog, u3
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
{
- u8 *prog = *pprog;
+ u8 *prog = *pprog, *start = *pprog;
int cnt = 0;
const u8 *r1 = bpf2ia32[BPF_REG_1];
const u8 *r2 = bpf2ia32[BPF_REG_2];
@@ -1362,7 +1376,7 @@ static void emit_bpf_tail_call(u8 **ppro
* eax == ctx (1st arg)
* edx == prog->bpf_func + prologue_size
*/
- RETPOLINE_EDX_BPF_JIT();
+ emit_jmp_edx(&prog, ip + (prog - start));

if (jmp_label1 == -1)
jmp_label1 = cnt;
@@ -2122,7 +2136,7 @@ static int do_jit(struct bpf_prog *bpf_p
break;
}
case BPF_JMP | BPF_TAIL_CALL:
- emit_bpf_tail_call(&prog);
+ emit_bpf_tail_call(&prog, image + addrs[i - 1]);
break;

/* cond jump */



2021-10-20 11:11:17

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c

> +#ifdef CONFIG_RETPOLINE
> +#define INDIRECT_SIZE (2 + 3*cpu_feature_enabled(X86_FEATURE_RETPOLINE))
> +#else
> +#define INDIRECT_SIZE (2)
> +#endif

> @@ -411,10 +442,10 @@ static int get_pop_bytes(bool *callee_re
> * out:
> */
> static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
> - u32 stack_depth)
> + u32 stack_depth, u8 *ip)
> {
> int tcc_off = -4 - round_up(stack_depth, 8);
> - u8 *prog = *pprog;
> + u8 *prog = *pprog, *start = *pprog;
> int pop_bytes = 0;
> int off1 = 42;
> int off2 = 31;
> @@ -448,7 +479,7 @@ static void emit_bpf_tail_call_indirect(
> EMIT2(0x89, 0xD2); /* mov edx, edx */
> EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
> offsetof(struct bpf_array, map.max_entries));
> -#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
> +#define OFFSET1 (off1 + INDIRECT_SIZE) /* Number of bytes to jump */
> EMIT2(X86_JBE, OFFSET1); /* jbe out */
>
> /*
> @@ -457,7 +488,7 @@ static void emit_bpf_tail_call_indirect(
> */
> EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
> EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
> -#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
> +#define OFFSET2 (off2 + INDIRECT_SIZE)
> EMIT2(X86_JA, OFFSET2); /* ja out */
> EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
> EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
> @@ -471,7 +502,7 @@ static void emit_bpf_tail_call_indirect(
> * goto out;
> */
> EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
> -#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
> +#define OFFSET3 (off3 + INDIRECT_SIZE)
> EMIT2(X86_JE, OFFSET3); /* je out */
>
> *pprog = prog;
> @@ -493,7 +524,7 @@ static void emit_bpf_tail_call_indirect(
> * rdi == ctx (1st arg)
> * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
> */
> - RETPOLINE_RCX_BPF_JIT();
> + emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
>
> /* out: */
> *pprog = prog;

Alexei; could the above not be further improved with something like the
below?

Despite several hours trying and Song helping, I can't seem to run
anything bpf, that stuff is cursed. So I've no idea if the below
actually works, but it seems reasonable.

---

--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -398,12 +398,6 @@ static int get_pop_bytes(bool *callee_re

#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)

-#ifdef CONFIG_RETPOLINE
-#define INDIRECT_SIZE (2 + 3*cpu_feature_enabled(X86_FEATURE_RETPOLINE))
-#else
-#define INDIRECT_SIZE (2)
-#endif
-
static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
u8 *prog = *pprog;
@@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
{
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog, *start = *pprog;
- int pop_bytes = 0;
- int off1 = 42;
- int off2 = 31;
- int off3 = 9;
-
- /* count the additional bytes used for popping callee regs from stack
- * that need to be taken into account for each of the offsets that
- * are used for bailing out of the tail call
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
- off2 += pop_bytes;
- off3 += pop_bytes;
-
- if (stack_depth) {
- off1 += 7;
- off2 += 7;
- off3 += 7;
- }
+ static int out_label = -1;
+ int offset;

/*
* rdi - pointer to ctx
@@ -479,8 +456,9 @@ static void emit_bpf_tail_call_indirect(
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (off1 + INDIRECT_SIZE) /* Number of bytes to jump */
- EMIT2(X86_JBE, OFFSET1); /* jbe out */
+
+ offset = out_label - (prog - start) + 2;
+ EMIT2(X86_JBE, offset); /* jbe out */

/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -488,8 +466,9 @@ static void emit_bpf_tail_call_indirect(
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (off2 + INDIRECT_SIZE)
- EMIT2(X86_JA, OFFSET2); /* ja out */
+
+ offset = out_label - (prog - start) + 2;
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */

@@ -502,8 +481,9 @@ static void emit_bpf_tail_call_indirect(
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
-#define OFFSET3 (off3 + INDIRECT_SIZE)
- EMIT2(X86_JE, OFFSET3); /* je out */
+
+ offset = out_label - (prog - start) + 2;
+ EMIT2(X86_JE, offset); /* je out */

*pprog = prog;
pop_callee_regs(pprog, callee_regs_used);
@@ -527,6 +507,8 @@ static void emit_bpf_tail_call_indirect(
emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));

/* out: */
+ out_label = (prog - start);
+
*pprog = prog;
}

2021-10-20 16:59:14

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
> Alexei; could the above not be further improved with something like the
> below?
>
> Despite several hours trying and Song helping, I can't seem to run
> anything bpf, that stuff is cursed. So I've no idea if the below
> actually works, but it seems reasonable.

The below fix gets it to run with test_verififer.

I do like it, it does seem less fiddly and more robust against future
changes, though it probably needs a comment for 'out_label' clarifying
it only works because this function is always called at least twice for
a given bpf_tail_call emission.

---

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 08f32c9fceaa..c9230c5bbca5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -457,7 +457,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));

- offset = out_label - (prog - start) + 2;
+ offset = out_label - (prog - start + 2);
EMIT2(X86_JBE, offset); /* jbe out */

/*
@@ -467,7 +467,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */

- offset = out_label - (prog - start) + 2;
+ offset = out_label - (prog - start + 2);
EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -482,7 +482,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */

- offset = out_label - (prog - start) + 2;
+ offset = out_label - (prog - start + 2);
EMIT2(X86_JE, offset); /* je out */

*pprog = prog;

2021-10-20 19:26:06

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 09:56:55AM -0700, Josh Poimboeuf wrote:
> On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
> > Alexei; could the above not be further improved with something like the
> > below?
> >
> > Despite several hours trying and Song helping, I can't seem to run
> > anything bpf, that stuff is cursed. So I've no idea if the below
> > actually works, but it seems reasonable.
>
> The below fix gets it to run with test_verififer.

Argh, I;m fast running out of brown paper bags at this rate :/

>
> I do like it, it does seem less fiddly and more robust against future
> changes, though it probably needs a comment for 'out_label' clarifying
> it only works because this function is always called at least twice for
> a given bpf_tail_call emission.

Right.

2021-10-21 00:09:59

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
> > - RETPOLINE_RCX_BPF_JIT();
> > + emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
> >
> > /* out: */
> > *pprog = prog;
>
> Alexei; could the above not be further improved with something like the
> below?

sorry for delay. I was traveling last week
and Daniel is on PTO this week.

> Despite several hours trying and Song helping, I can't seem to run
> anything bpf, that stuff is cursed. So I've no idea if the below
> actually works, but it seems reasonable.

It's certainly delicate.

> @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> {
> int tcc_off = -4 - round_up(stack_depth, 8);
> u8 *prog = *pprog, *start = *pprog;
> - int pop_bytes = 0;
> - int off1 = 42;
> - int off2 = 31;
> - int off3 = 9;
> -
> - /* count the additional bytes used for popping callee regs from stack
> - * that need to be taken into account for each of the offsets that
> - * are used for bailing out of the tail call
> - */
> - pop_bytes = get_pop_bytes(callee_regs_used);
> - off1 += pop_bytes;
> - off2 += pop_bytes;
> - off3 += pop_bytes;
> -
> - if (stack_depth) {
> - off1 += 7;
> - off2 += 7;
> - off3 += 7;
> - }
> + static int out_label = -1;

Interesting idea!
All insn emits trying to do the right thing from the start.
Here the logic assumes that there will be at least two passes over image.
I think that is correct, but we never had such assumption.
A comment is certainly must have.
The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
are really warranted though. Might be overkill.

Nice that Josh's test_verifier is passing, but it doesn't provide
a ton of coverage. test_progs has a lot more.
Once you have a git branch with all the changes I can give it a go.
Also you can rely on our BPF CI.
Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
In patchwork there will be "bpf/vmtest-bpf-next" link that
builds kernel, selftests and runs everything.
It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
clang nightly and other deps like pahole.

2021-10-21 00:12:17

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> +
> + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
> + EMIT_LFENCE();
> + EMIT2(0xFF, 0xE0 + reg);
> + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
> + emit_jump(&prog, reg_thunk[reg], ip);
> + } else

One more question.
What's a deal with AMD? I thought the retpoline is effective on it as well.
lfence is an optimization or retpoline turned out to be not enough
in some cases?

2021-10-21 00:22:33

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 05:07:53PM -0700, Alexei Starovoitov wrote:
> On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> > +
> > + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
> > + EMIT_LFENCE();
> > + EMIT2(0xFF, 0xE0 + reg);
> > + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
> > + emit_jump(&prog, reg_thunk[reg], ip);
> > + } else
>
> One more question.
> What's a deal with AMD? I thought the retpoline is effective on it as well.
> lfence is an optimization or retpoline turned out to be not enough
> in some cases?

Yes, it's basically an optimization. AMD recommends it presumably
because it's quite a bit faster than a retpoline.

According to AMD it shrinks the speculative execution window enough so
that Spectre v2 isn't a threat.

--
Josh

2021-10-21 08:49:04

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 05:05:02PM -0700, Alexei Starovoitov wrote:
> On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:

> > @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> > {
> > int tcc_off = -4 - round_up(stack_depth, 8);
> > u8 *prog = *pprog, *start = *pprog;
> > - int pop_bytes = 0;
> > - int off1 = 42;
> > - int off2 = 31;
> > - int off3 = 9;
> > -
> > - /* count the additional bytes used for popping callee regs from stack
> > - * that need to be taken into account for each of the offsets that
> > - * are used for bailing out of the tail call
> > - */
> > - pop_bytes = get_pop_bytes(callee_regs_used);
> > - off1 += pop_bytes;
> > - off2 += pop_bytes;
> > - off3 += pop_bytes;
> > -
> > - if (stack_depth) {
> > - off1 += 7;
> > - off2 += 7;
> > - off3 += 7;
> > - }
> > + static int out_label = -1;
>
> Interesting idea!

I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
lot more robust than the 64bit one and I couldn't figure out why the
difference.

> All insn emits trying to do the right thing from the start.
> Here the logic assumes that there will be at least two passes over image.
> I think that is correct, but we never had such assumption.

That's not exactly true; I think image is NULL on every first run, so
all insn that depend on it will be wrong to start with. Equally there's
a number of insn that seem to depend on addrs[i], that also requires at
least two passes.

> A comment is certainly must have.

I can certainly add one, although I think we'll disagree on the comment
style :-)

> The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
> are really warranted though. Might be overkill.

Is there concurrency on the jit?

> Once you have a git branch with all the changes I can give it a go.

Ok, I'll go polish this thing and stick it in the tree mentioned in the
cover letter.

> Also you can rely on our BPF CI.
> Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
> In patchwork there will be "bpf/vmtest-bpf-next" link that
> builds kernel, selftests and runs everything.

What's a patchwork and where do I find it?

> It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
> clang nightly and other deps like pahole.

nice.

2021-10-21 09:01:44

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Wed, Oct 20, 2021 at 05:18:08PM -0700, Josh Poimboeuf wrote:
> On Wed, Oct 20, 2021 at 05:07:53PM -0700, Alexei Starovoitov wrote:
> > On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> > > +
> > > + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
> > > + EMIT_LFENCE();
> > > + EMIT2(0xFF, 0xE0 + reg);
> > > + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
> > > + emit_jump(&prog, reg_thunk[reg], ip);
> > > + } else
> >
> > One more question.
> > What's a deal with AMD? I thought the retpoline is effective on it as well.
> > lfence is an optimization or retpoline turned out to be not enough
> > in some cases?
>
> Yes, it's basically an optimization. AMD recommends it presumably
> because it's quite a bit faster than a retpoline.
>
> According to AMD it shrinks the speculative execution window enough so
> that Spectre v2 isn't a threat.

Right, also note that we've been using alternatives to patch the thunk
to lfence;jmp for AMD pretty much forever.

Inlining it is better tho; just a shame clang seems to insist on r11,
which means we cannot fit it in the thunk call site for them :/

2021-10-21 18:07:03

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 1:47 AM Peter Zijlstra <[email protected]> wrote:
>
> On Wed, Oct 20, 2021 at 05:05:02PM -0700, Alexei Starovoitov wrote:
> > On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
>
> > > @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> > > {
> > > int tcc_off = -4 - round_up(stack_depth, 8);
> > > u8 *prog = *pprog, *start = *pprog;
> > > - int pop_bytes = 0;
> > > - int off1 = 42;
> > > - int off2 = 31;
> > > - int off3 = 9;
> > > -
> > > - /* count the additional bytes used for popping callee regs from stack
> > > - * that need to be taken into account for each of the offsets that
> > > - * are used for bailing out of the tail call
> > > - */
> > > - pop_bytes = get_pop_bytes(callee_regs_used);
> > > - off1 += pop_bytes;
> > > - off2 += pop_bytes;
> > > - off3 += pop_bytes;
> > > -
> > > - if (stack_depth) {
> > > - off1 += 7;
> > > - off2 += 7;
> > > - off3 += 7;
> > > - }
> > > + static int out_label = -1;
> >
> > Interesting idea!
>
> I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> lot more robust than the 64bit one and I couldn't figure out why the
> difference.

Interesting. Daniel will recognize that trick then :)

> > All insn emits trying to do the right thing from the start.
> > Here the logic assumes that there will be at least two passes over image.
> > I think that is correct, but we never had such assumption.
>
> That's not exactly true; I think image is NULL on every first run, so
> all insn that depend on it will be wrong to start with. Equally there's
> a number of insn that seem to depend on addrs[i], that also requires at
> least two passes.

Right. The image will be allocated after size converges and
addrs[] is inited with 64.
So there is certainly more than one pass.
I was saying that emit* helpers didn't have that assumption.
Looks like 32-bit JIT had.

> > A comment is certainly must have.
>
> I can certainly add one, although I think we'll disagree on the comment
> style :-)

I think we're on the same page actually.

> > The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
> > are really warranted though. Might be overkill.
>
> Is there concurrency on the jit?

The JIT of different progs can happen in parallel.

> > Once you have a git branch with all the changes I can give it a go.
>
> Ok, I'll go polish this thing and stick it in the tree mentioned in the
> cover letter.
>
> > Also you can rely on our BPF CI.
> > Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
> > In patchwork there will be "bpf/vmtest-bpf-next" link that
> > builds kernel, selftests and runs everything.
>
> What's a patchwork and where do I find it?

https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
Click on any patch, search for 'bpf/vmtest-bpf-next' and follow the
'VM_Test' link.
The summary of the test run is available without logging in into github.
To see detailed logs you need to be logged in with your github account.
It's a silly limitation they have.
They even have a button 'Sign in to view logs'. Oh well.

> > It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
> > clang nightly and other deps like pahole.
>
> nice.

One more thing. There is test_bpf.ko.
Just insmod it and it will run a ton of JIT tests that we cannot do
from user space.
Please use bpf-next tree though. Few weeks ago Johan Almbladh added
a lot more tests to it.

2021-10-21 22:43:46

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:

> > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > lot more robust than the 64bit one and I couldn't figure out why the
> > difference.
>
> Interesting. Daniel will recognize that trick then :)

> > Is there concurrency on the jit?
>
> The JIT of different progs can happen in parallel.

In that case I don't think the patch is safe. I'll see if I can find a
variant that doesn't use static storage.

2021-10-21 23:25:50

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 3:40 PM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:
>
> > > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > > lot more robust than the 64bit one and I couldn't figure out why the
> > > difference.
> >
> > Interesting. Daniel will recognize that trick then :)
>
> > > Is there concurrency on the jit?
> >
> > The JIT of different progs can happen in parallel.
>
> In that case I don't think the patch is safe. I'll see if I can find a
> variant that doesn't use static storage.

The variable can only change from one fixed value to another fixed value.
Different threads will compute the same value. So I think it's safe
as-is. READ_ONCE/WRITE_ONCE won't hurt though.

2021-10-21 23:41:36

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 04:24:33PM -0700, Alexei Starovoitov wrote:
> On Thu, Oct 21, 2021 at 3:40 PM Peter Zijlstra <[email protected]> wrote:
> >
> > On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:
> >
> > > > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > > > lot more robust than the 64bit one and I couldn't figure out why the
> > > > difference.
> > >
> > > Interesting. Daniel will recognize that trick then :)
> >
> > > > Is there concurrency on the jit?
> > >
> > > The JIT of different progs can happen in parallel.
> >
> > In that case I don't think the patch is safe. I'll see if I can find a
> > variant that doesn't use static storage.
>
> The variable can only change from one fixed value to another fixed value.
> Different threads will compute the same value. So I think it's safe
> as-is. READ_ONCE/WRITE_ONCE won't hurt though.

But the size of the generated code differs based on the
emit_bpf_tail_call_indirect() args: 'callee_regs_used' and
'stack_depth'. So the fixed value can change.

--
Josh

2021-10-21 23:45:16

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 4:38 PM Josh Poimboeuf <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 04:24:33PM -0700, Alexei Starovoitov wrote:
> > On Thu, Oct 21, 2021 at 3:40 PM Peter Zijlstra <[email protected]> wrote:
> > >
> > > On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:
> > >
> > > > > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > > > > lot more robust than the 64bit one and I couldn't figure out why the
> > > > > difference.
> > > >
> > > > Interesting. Daniel will recognize that trick then :)
> > >
> > > > > Is there concurrency on the jit?
> > > >
> > > > The JIT of different progs can happen in parallel.
> > >
> > > In that case I don't think the patch is safe. I'll see if I can find a
> > > variant that doesn't use static storage.
> >
> > The variable can only change from one fixed value to another fixed value.
> > Different threads will compute the same value. So I think it's safe
> > as-is. READ_ONCE/WRITE_ONCE won't hurt though.
>
> But the size of the generated code differs based on the
> emit_bpf_tail_call_indirect() args: 'callee_regs_used' and
> 'stack_depth'. So the fixed value can change.

Ahh. Right. It's potentially a different offset for every prog.
Let's put it into struct jit_context then.

2021-10-21 23:52:31

by Zvi Effron

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 1:47 AM Peter Zijlstra <[email protected]> wrote:
>
> On Wed, Oct 20, 2021 at 05:05:02PM -0700, Alexei Starovoitov wrote:
> > On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
>
> > > @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> > > {
> > > int tcc_off = -4 - round_up(stack_depth, 8);
> > > u8 *prog = *pprog, *start = *pprog;
> > > - int pop_bytes = 0;
> > > - int off1 = 42;
> > > - int off2 = 31;
> > > - int off3 = 9;
> > > -
> > > - /* count the additional bytes used for popping callee regs from stack
> > > - * that need to be taken into account for each of the offsets that
> > > - * are used for bailing out of the tail call
> > > - */
> > > - pop_bytes = get_pop_bytes(callee_regs_used);
> > > - off1 += pop_bytes;
> > > - off2 += pop_bytes;
> > > - off3 += pop_bytes;
> > > -
> > > - if (stack_depth) {
> > > - off1 += 7;
> > > - off2 += 7;
> > > - off3 += 7;
> > > - }
> > > + static int out_label = -1;
> >
> > Interesting idea!
>
> I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> lot more robust than the 64bit one and I couldn't figure out why the
> difference.
>
> > All insn emits trying to do the right thing from the start.
> > Here the logic assumes that there will be at least two passes over image.
> > I think that is correct, but we never had such assumption.
>
> That's not exactly true; I think image is NULL on every first run, so
> all insn that depend on it will be wrong to start with. Equally there's
> a number of insn that seem to depend on addrs[i], that also requires at
> least two passes.
>
> > A comment is certainly must have.
>
> I can certainly add one, although I think we'll disagree on the comment
> style :-)
>
> > The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
> > are really warranted though. Might be overkill.
>
> Is there concurrency on the jit?
>
> > Once you have a git branch with all the changes I can give it a go.
>
> Ok, I'll go polish this thing and stick it in the tree mentioned in the
> cover letter.
>
> > Also you can rely on our BPF CI.
> > Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
> > In patchwork there will be "bpf/vmtest-bpf-next" link that
> > builds kernel, selftests and runs everything.
>
> What's a patchwork and where do I find it?
>

Patchwork[0] tracks the status of patches from submission through to merge (and
beyond?).

[0]: https://patchwork.kernel.org/

> > It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
> > clang nightly and other deps like pahole.
>
> nice.

2021-10-22 08:36:03

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 04:51:08PM -0700, Zvi Effron wrote:

> > What's a patchwork and where do I find it?
> >
>
> Patchwork[0] tracks the status of patches from submission through to merge (and
> beyond?).

Yeah, I sorta know that :-) Even though I loathe the things because
web-browser, but the second part of the question was genuine, there's a
*lot* of patchwork instances around, not everyone uses the new k.org
based one.

2021-10-22 11:36:49

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:

> Ahh. Right. It's potentially a different offset for every prog.
> Let's put it into struct jit_context then.

Something like this...

---
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -225,6 +225,14 @@ static void jit_fill_hole(void *area, un

struct jit_context {
int cleanup_addr; /* Epilogue code offset */
+
+ /*
+ * Program specific offsets of labels in the code; these rely on the
+ * JIT doing at least 2 passes, recording the position on the first
+ * pass, only to generate the correct offset on the second pass.
+ */
+ int tail_call_direct_label;
+ int tail_call_indirect_label;
};

/* Maximum number of bytes emitted while JITing one eBPF insn */
@@ -380,22 +388,6 @@ int bpf_arch_text_poke(void *ip, enum bp
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}

-static int get_pop_bytes(bool *callee_regs_used)
-{
- int bytes = 0;
-
- if (callee_regs_used[3])
- bytes += 2;
- if (callee_regs_used[2])
- bytes += 2;
- if (callee_regs_used[1])
- bytes += 2;
- if (callee_regs_used[0])
- bytes += 1;
-
- return bytes;
-}
-
/*
* Generate the following code:
*
@@ -411,29 +403,12 @@ static int get_pop_bytes(bool *callee_re
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
- u32 stack_depth)
+ u32 stack_depth, u8 *ip,
+ struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
- int pop_bytes = 0;
- int off1 = 42;
- int off2 = 31;
- int off3 = 9;
-
- /* count the additional bytes used for popping callee regs from stack
- * that need to be taken into account for each of the offsets that
- * are used for bailing out of the tail call
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
- off2 += pop_bytes;
- off3 += pop_bytes;
-
- if (stack_depth) {
- off1 += 7;
- off2 += 7;
- off3 += 7;
- }
+ u8 *prog = *pprog, *start = *pprog;
+ int offset;

/*
* rdi - pointer to ctx
@@ -448,8 +423,9 @@ static void emit_bpf_tail_call_indirect(
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
- EMIT2(X86_JBE, OFFSET1); /* jbe out */
+
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JBE, offset); /* jbe out */

/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -457,8 +433,9 @@ static void emit_bpf_tail_call_indirect(
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
- EMIT2(X86_JA, OFFSET2); /* ja out */
+
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */

@@ -471,12 +448,11 @@ static void emit_bpf_tail_call_indirect(
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
- EMIT2(X86_JE, OFFSET3); /* je out */

- *pprog = prog;
- pop_callee_regs(pprog, callee_regs_used);
- prog = *pprog;
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JE, offset); /* je out */
+
+ pop_callee_regs(&prog, callee_regs_used);

EMIT1(0x58); /* pop rax */
if (stack_depth)
@@ -496,38 +472,18 @@ static void emit_bpf_tail_call_indirect(
RETPOLINE_RCX_BPF_JIT();

/* out: */
+ ctx->tail_call_indirect_label = prog - start;
*pprog = prog;
}

static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
- u8 **pprog, int addr, u8 *image,
- bool *callee_regs_used, u32 stack_depth)
+ u8 **pprog, u8 *ip,
+ bool *callee_regs_used, u32 stack_depth,
+ struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
- int pop_bytes = 0;
- int off1 = 20;
- int poke_off;
-
- /* count the additional bytes used for popping callee regs to stack
- * that need to be taken into account for jump offset that is used for
- * bailing out from of the tail call when limit is reached
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
-
- /*
- * total bytes for:
- * - nop5/ jmpq $off
- * - pop callee regs
- * - sub rsp, $val if depth > 0
- * - pop rax
- */
- poke_off = X86_PATCH_SIZE + pop_bytes + 1;
- if (stack_depth) {
- poke_off += 7;
- off1 += 7;
- }
+ u8 *prog = *pprog, *start = *pprog;
+ int offset;

/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -535,28 +491,30 @@ static void emit_bpf_tail_call_direct(st
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
- EMIT2(X86_JA, off1); /* ja out */
+
+ offset = ctx->tail_call_direct_label - (prog + 2 - start);
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */

- poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
+ poke->tailcall_bypass = ip + (prog - start);
poke->adj_off = X86_TAIL_CALL_OFFSET;
- poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
+ poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;

emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);

- *pprog = prog;
- pop_callee_regs(pprog, callee_regs_used);
- prog = *pprog;
+ pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));

memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
+
/* out: */
+ ctx->tail_call_direct_label = prog - start;

*pprog = prog;
}
@@ -1405,13 +1363,16 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
- &prog, addrs[i], image,
+ &prog, image + addrs[i - 1],
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ ctx);
else
emit_bpf_tail_call_indirect(&prog,
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ image + addrs[i - 1],
+ ctx);
break;

/* cond jump */

2021-10-22 15:27:21

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Fri, Oct 22, 2021 at 4:33 AM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:
>
> > Ahh. Right. It's potentially a different offset for every prog.
> > Let's put it into struct jit_context then.
>
> Something like this...

Yep. Looks nice and clean to me.

> - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
> + poke->tailcall_bypass = ip + (prog - start);
> poke->adj_off = X86_TAIL_CALL_OFFSET;
> - poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
> + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;

This part looks correct too, but this is Daniel's magic.
He'll probably take a look next week when he comes back from PTO.
I don't recall which test exercises this tailcall poking logic.
It's only used with dynamic updates to prog_array.
insmod test_bpf.ko and test_verifier won't go down this path.

2021-10-22 21:09:03

by Zvi Effron

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Fri, Oct 22, 2021 at 1:33 AM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 04:51:08PM -0700, Zvi Effron wrote:
>
> > > What's a patchwork and where do I find it?
> > >
> >
> > Patchwork[0] tracks the status of patches from submission through to merge (and
> > beyond?).
>
> Yeah, I sorta know that :-) Even though I loathe the things because
> web-browser, but the second part of the question was genuine, there's a
> *lot* of patchwork instances around, not everyone uses the new k.org
> based one.
>

BPF and NetDev share one: https://patchwork.kernel.org/project/netdevbpf/list/

--Zvi

2021-10-25 11:53:38

by Maciej Fijalkowski

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Fri, Oct 22, 2021 at 08:22:35AM -0700, Alexei Starovoitov wrote:
> On Fri, Oct 22, 2021 at 4:33 AM Peter Zijlstra <[email protected]> wrote:
> >
> > On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:
> >
> > > Ahh. Right. It's potentially a different offset for every prog.
> > > Let's put it into struct jit_context then.
> >
> > Something like this...
>
> Yep. Looks nice and clean to me.
>
> > - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
> > + poke->tailcall_bypass = ip + (prog - start);
> > poke->adj_off = X86_TAIL_CALL_OFFSET;
> > - poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
> > + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
>
> This part looks correct too, but this is Daniel's magic.
> He'll probably take a look next week when he comes back from PTO.
> I don't recall which test exercises this tailcall poking logic.
> It's only used with dynamic updates to prog_array.
> insmod test_bpf.ko and test_verifier won't go down this path.

Please run ./test_progs -t tailcalls from tools/testing/selftests/bpf and
make sure that all of the tests are passing in there, especially the
tailcall_bpf2bpf* subset.

Thanks!

2021-10-25 12:49:37

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v2 14/14] bpf,x86: Respect X86_FEATURE_RETPOLINE*

On Mon, Oct 25, 2021 at 03:44:24PM +0200, Maciej Fijalkowski wrote:
> On Fri, Oct 22, 2021 at 08:22:35AM -0700, Alexei Starovoitov wrote:
> > On Fri, Oct 22, 2021 at 4:33 AM Peter Zijlstra <[email protected]> wrote:
> > >
> > > On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:
> > >
> > > > Ahh. Right. It's potentially a different offset for every prog.
> > > > Let's put it into struct jit_context then.
> > >
> > > Something like this...
> >
> > Yep. Looks nice and clean to me.
> >
> > > - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
> > > + poke->tailcall_bypass = ip + (prog - start);
> > > poke->adj_off = X86_TAIL_CALL_OFFSET;
> > > - poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
> > > + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
> >
> > This part looks correct too, but this is Daniel's magic.
> > He'll probably take a look next week when he comes back from PTO.
> > I don't recall which test exercises this tailcall poking logic.
> > It's only used with dynamic updates to prog_array.
> > insmod test_bpf.ko and test_verifier won't go down this path.
>
> Please run ./test_progs -t tailcalls from tools/testing/selftests/bpf and
> make sure that all of the tests are passing in there, especially the
> tailcall_bpf2bpf* subset.

Yeah, so nothing from that selftests crud wants to work for me; also I
*really* dislike how vmtest.sh as found there tramples all over my
source dir without asking.

Note that even when eventually supplied with O=builddir (confusingly in
front of it), it doesn't want to work and bails with lots of -ENOSPC
warnings (I double checked, my disks are nowhere near full). (and this
is after installing some horrendous python rst crap because clearly
running a test needs to build documentation :/)

I've spend hours on that, I'm not sinking more time into it. If you want
me to run that crap, fix it first.