Current BPF codegen doesn't respect X86_FEATURE_RETPOLINE* flags and
unconditionally emits a thunk call, this is sub-optimal and doesn't
match the regular, compiler generated, code.
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
arch/x86/include/asm/nospec-branch.h | 59 -----------------------------
arch/x86/net/bpf_jit_comp.c | 71 ++++++++++++++++++++---------------
arch/x86/net/bpf_jit_comp32.c | 22 ++++++++--
3 files changed, 59 insertions(+), 93 deletions(-)
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -303,63 +303,4 @@ static inline void mds_idle_clear_cpu_bu
#endif /* __ASSEMBLY__ */
-/*
- * Below is used in the eBPF JIT compiler and emits the byte sequence
- * for the following assembly:
- *
- * With retpolines configured:
- *
- * callq do_rop
- * spec_trap:
- * pause
- * lfence
- * jmp spec_trap
- * do_rop:
- * mov %rcx,(%rsp) for x86_64
- * mov %edx,(%esp) for x86_32
- * retq
- *
- * Without retpolines configured:
- *
- * jmp *%rcx for x86_64
- * jmp *%edx for x86_32
- */
-#ifdef CONFIG_RETPOLINE
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 17
-# define RETPOLINE_RCX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* callq do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT4(0x48, 0x89, 0x0C, 0x24); /* mov %rcx,(%rsp) */ \
- EMIT1(0xC3); /* retq */ \
-} while (0)
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
-do { \
- EMIT1_off32(0xE8, 7); /* call do_rop */ \
- /* spec_trap: */ \
- EMIT2(0xF3, 0x90); /* pause */ \
- EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \
- EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \
- /* do_rop: */ \
- EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
- EMIT1(0xC3); /* ret */ \
-} while (0)
-# endif
-#else /* !CONFIG_RETPOLINE */
-# ifdef CONFIG_X86_64
-# define RETPOLINE_RCX_BPF_JIT_SIZE 2
-# define RETPOLINE_RCX_BPF_JIT() \
- EMIT2(0xFF, 0xE1); /* jmp *%rcx */
-# else /* !CONFIG_X86_64 */
-# define RETPOLINE_EDX_BPF_JIT() \
- EMIT2(0xFF, 0xE2) /* jmp *%edx */
-# endif
-#endif
-
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -396,6 +396,37 @@ static int get_pop_bytes(bool *callee_re
return bytes;
}
+#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
+
+#ifdef CONFIG_RETPOLINE
+#define INDIRECT_SIZE (2 + 3*cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+#else
+#define INDIRECT_SIZE (2)
+#endif
+
+static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
+{
+ u8 *prog = *pprog;
+
+#ifdef CONFIG_RETPOLINE
+ static void * const reg_thunk[] = {
+#define GEN(reg) __x86_indirect_thunk_ ## reg,
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+ };
+
+ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
+ EMIT_LFENCE();
+ EMIT2(0xFF, 0xE0 + reg);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+ emit_jump(&prog, reg_thunk[reg], ip);
+ } else
+#endif
+ EMIT2(0xFF, 0xE0 + reg);
+
+ *pprog = prog;
+}
+
/*
* Generate the following code:
*
@@ -411,10 +442,10 @@ static int get_pop_bytes(bool *callee_re
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
- u32 stack_depth)
+ u32 stack_depth, u8 *ip)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
+ u8 *prog = *pprog, *start = *pprog;
int pop_bytes = 0;
int off1 = 42;
int off2 = 31;
@@ -448,7 +479,7 @@ static void emit_bpf_tail_call_indirect(
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
+#define OFFSET1 (off1 + INDIRECT_SIZE) /* Number of bytes to jump */
EMIT2(X86_JBE, OFFSET1); /* jbe out */
/*
@@ -457,7 +488,7 @@ static void emit_bpf_tail_call_indirect(
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
+#define OFFSET2 (off2 + INDIRECT_SIZE)
EMIT2(X86_JA, OFFSET2); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -471,7 +502,7 @@ static void emit_bpf_tail_call_indirect(
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
+#define OFFSET3 (off3 + INDIRECT_SIZE)
EMIT2(X86_JE, OFFSET3); /* je out */
*pprog = prog;
@@ -493,7 +524,7 @@ static void emit_bpf_tail_call_indirect(
* rdi == ctx (1st arg)
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- RETPOLINE_RCX_BPF_JIT();
+ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
/* out: */
*pprog = prog;
@@ -1220,8 +1251,7 @@ static int do_jit(struct bpf_prog *bpf_p
/* speculation barrier */
case BPF_ST | BPF_NOSPEC:
if (boot_cpu_has(X86_FEATURE_XMM2))
- /* Emit 'lfence' */
- EMIT3(0x0F, 0xAE, 0xE8);
+ EMIT_LFENCE();
break;
/* ST: *(u8*)(dst_reg + off) = imm */
@@ -1411,7 +1441,8 @@ st: if (is_imm8(insn->off))
else
emit_bpf_tail_call_indirect(&prog,
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ image + addrs[i - 1]);
break;
/* cond jump */
@@ -2117,24 +2148,6 @@ int arch_prepare_bpf_trampoline(struct b
return ret;
}
-static int emit_fallback_jump(u8 **pprog)
-{
- u8 *prog = *pprog;
- int err = 0;
-
-#ifdef CONFIG_RETPOLINE
- /* Note that this assumes the the compiler uses external
- * thunks for indirect calls. Both clang and GCC use the same
- * naming convention for external thunks.
- */
- err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
-#else
- EMIT2(0xFF, 0xE2); /* jmp rdx */
-#endif
- *pprog = prog;
- return err;
-}
-
static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
{
u8 *jg_reloc, *prog = *pprog;
@@ -2156,9 +2169,7 @@ static int emit_bpf_dispatcher(u8 **ppro
if (err)
return err;
- err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
- if (err)
- return err;
+ emit_indirect_jump(&prog, 2 /* rdx */, prog);
*pprog = prog;
return 0;
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -15,6 +15,7 @@
#include <asm/cacheflush.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/asm-prototypes.h>
#include <linux/bpf.h>
/*
@@ -1267,6 +1268,19 @@ static void emit_epilogue(u8 **pprog, u3
*pprog = prog;
}
+static void emit_jmp_edx(u8 **pprog, u8 *ip)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+#ifdef CONFIG_RETPOLINE
+ EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
+#else
+ EMIT2(0xFF, 0xE2);
+#endif
+ *pprog = prog;
+}
+
/*
* Generate the following code:
* ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
@@ -1280,9 +1294,9 @@ static void emit_epilogue(u8 **pprog, u3
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
{
- u8 *prog = *pprog;
+ u8 *prog = *pprog, *start = *pprog;
int cnt = 0;
const u8 *r1 = bpf2ia32[BPF_REG_1];
const u8 *r2 = bpf2ia32[BPF_REG_2];
@@ -1362,7 +1376,7 @@ static void emit_bpf_tail_call(u8 **ppro
* eax == ctx (1st arg)
* edx == prog->bpf_func + prologue_size
*/
- RETPOLINE_EDX_BPF_JIT();
+ emit_jmp_edx(&prog, ip + (prog - start));
if (jmp_label1 == -1)
jmp_label1 = cnt;
@@ -2122,7 +2136,7 @@ static int do_jit(struct bpf_prog *bpf_p
break;
}
case BPF_JMP | BPF_TAIL_CALL:
- emit_bpf_tail_call(&prog);
+ emit_bpf_tail_call(&prog, image + addrs[i - 1]);
break;
/* cond jump */
On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> +#ifdef CONFIG_RETPOLINE
> +#define INDIRECT_SIZE (2 + 3*cpu_feature_enabled(X86_FEATURE_RETPOLINE))
> +#else
> +#define INDIRECT_SIZE (2)
> +#endif
> @@ -411,10 +442,10 @@ static int get_pop_bytes(bool *callee_re
> * out:
> */
> static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
> - u32 stack_depth)
> + u32 stack_depth, u8 *ip)
> {
> int tcc_off = -4 - round_up(stack_depth, 8);
> - u8 *prog = *pprog;
> + u8 *prog = *pprog, *start = *pprog;
> int pop_bytes = 0;
> int off1 = 42;
> int off2 = 31;
> @@ -448,7 +479,7 @@ static void emit_bpf_tail_call_indirect(
> EMIT2(0x89, 0xD2); /* mov edx, edx */
> EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
> offsetof(struct bpf_array, map.max_entries));
> -#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
> +#define OFFSET1 (off1 + INDIRECT_SIZE) /* Number of bytes to jump */
> EMIT2(X86_JBE, OFFSET1); /* jbe out */
>
> /*
> @@ -457,7 +488,7 @@ static void emit_bpf_tail_call_indirect(
> */
> EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
> EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
> -#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
> +#define OFFSET2 (off2 + INDIRECT_SIZE)
> EMIT2(X86_JA, OFFSET2); /* ja out */
> EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
> EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
> @@ -471,7 +502,7 @@ static void emit_bpf_tail_call_indirect(
> * goto out;
> */
> EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
> -#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
> +#define OFFSET3 (off3 + INDIRECT_SIZE)
> EMIT2(X86_JE, OFFSET3); /* je out */
>
> *pprog = prog;
> @@ -493,7 +524,7 @@ static void emit_bpf_tail_call_indirect(
> * rdi == ctx (1st arg)
> * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
> */
> - RETPOLINE_RCX_BPF_JIT();
> + emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
>
> /* out: */
> *pprog = prog;
Alexei; could the above not be further improved with something like the
below?
Despite several hours trying and Song helping, I can't seem to run
anything bpf, that stuff is cursed. So I've no idea if the below
actually works, but it seems reasonable.
---
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -398,12 +398,6 @@ static int get_pop_bytes(bool *callee_re
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
-#ifdef CONFIG_RETPOLINE
-#define INDIRECT_SIZE (2 + 3*cpu_feature_enabled(X86_FEATURE_RETPOLINE))
-#else
-#define INDIRECT_SIZE (2)
-#endif
-
static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
{
u8 *prog = *pprog;
@@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
{
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog, *start = *pprog;
- int pop_bytes = 0;
- int off1 = 42;
- int off2 = 31;
- int off3 = 9;
-
- /* count the additional bytes used for popping callee regs from stack
- * that need to be taken into account for each of the offsets that
- * are used for bailing out of the tail call
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
- off2 += pop_bytes;
- off3 += pop_bytes;
-
- if (stack_depth) {
- off1 += 7;
- off2 += 7;
- off3 += 7;
- }
+ static int out_label = -1;
+ int offset;
/*
* rdi - pointer to ctx
@@ -479,8 +456,9 @@ static void emit_bpf_tail_call_indirect(
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (off1 + INDIRECT_SIZE) /* Number of bytes to jump */
- EMIT2(X86_JBE, OFFSET1); /* jbe out */
+
+ offset = out_label - (prog - start) + 2;
+ EMIT2(X86_JBE, offset); /* jbe out */
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -488,8 +466,9 @@ static void emit_bpf_tail_call_indirect(
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (off2 + INDIRECT_SIZE)
- EMIT2(X86_JA, OFFSET2); /* ja out */
+
+ offset = out_label - (prog - start) + 2;
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -502,8 +481,9 @@ static void emit_bpf_tail_call_indirect(
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
-#define OFFSET3 (off3 + INDIRECT_SIZE)
- EMIT2(X86_JE, OFFSET3); /* je out */
+
+ offset = out_label - (prog - start) + 2;
+ EMIT2(X86_JE, offset); /* je out */
*pprog = prog;
pop_callee_regs(pprog, callee_regs_used);
@@ -527,6 +507,8 @@ static void emit_bpf_tail_call_indirect(
emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
/* out: */
+ out_label = (prog - start);
+
*pprog = prog;
}
On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
> Alexei; could the above not be further improved with something like the
> below?
>
> Despite several hours trying and Song helping, I can't seem to run
> anything bpf, that stuff is cursed. So I've no idea if the below
> actually works, but it seems reasonable.
The below fix gets it to run with test_verififer.
I do like it, it does seem less fiddly and more robust against future
changes, though it probably needs a comment for 'out_label' clarifying
it only works because this function is always called at least twice for
a given bpf_tail_call emission.
---
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 08f32c9fceaa..c9230c5bbca5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -457,7 +457,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
- offset = out_label - (prog - start) + 2;
+ offset = out_label - (prog - start + 2);
EMIT2(X86_JBE, offset); /* jbe out */
/*
@@ -467,7 +467,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
- offset = out_label - (prog - start) + 2;
+ offset = out_label - (prog - start + 2);
EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -482,7 +482,7 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
- offset = out_label - (prog - start) + 2;
+ offset = out_label - (prog - start + 2);
EMIT2(X86_JE, offset); /* je out */
*pprog = prog;
On Wed, Oct 20, 2021 at 09:56:55AM -0700, Josh Poimboeuf wrote:
> On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
> > Alexei; could the above not be further improved with something like the
> > below?
> >
> > Despite several hours trying and Song helping, I can't seem to run
> > anything bpf, that stuff is cursed. So I've no idea if the below
> > actually works, but it seems reasonable.
>
> The below fix gets it to run with test_verififer.
Argh, I;m fast running out of brown paper bags at this rate :/
>
> I do like it, it does seem less fiddly and more robust against future
> changes, though it probably needs a comment for 'out_label' clarifying
> it only works because this function is always called at least twice for
> a given bpf_tail_call emission.
Right.
On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
> > - RETPOLINE_RCX_BPF_JIT();
> > + emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
> >
> > /* out: */
> > *pprog = prog;
>
> Alexei; could the above not be further improved with something like the
> below?
sorry for delay. I was traveling last week
and Daniel is on PTO this week.
> Despite several hours trying and Song helping, I can't seem to run
> anything bpf, that stuff is cursed. So I've no idea if the below
> actually works, but it seems reasonable.
It's certainly delicate.
> @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> {
> int tcc_off = -4 - round_up(stack_depth, 8);
> u8 *prog = *pprog, *start = *pprog;
> - int pop_bytes = 0;
> - int off1 = 42;
> - int off2 = 31;
> - int off3 = 9;
> -
> - /* count the additional bytes used for popping callee regs from stack
> - * that need to be taken into account for each of the offsets that
> - * are used for bailing out of the tail call
> - */
> - pop_bytes = get_pop_bytes(callee_regs_used);
> - off1 += pop_bytes;
> - off2 += pop_bytes;
> - off3 += pop_bytes;
> -
> - if (stack_depth) {
> - off1 += 7;
> - off2 += 7;
> - off3 += 7;
> - }
> + static int out_label = -1;
Interesting idea!
All insn emits trying to do the right thing from the start.
Here the logic assumes that there will be at least two passes over image.
I think that is correct, but we never had such assumption.
A comment is certainly must have.
The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
are really warranted though. Might be overkill.
Nice that Josh's test_verifier is passing, but it doesn't provide
a ton of coverage. test_progs has a lot more.
Once you have a git branch with all the changes I can give it a go.
Also you can rely on our BPF CI.
Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
In patchwork there will be "bpf/vmtest-bpf-next" link that
builds kernel, selftests and runs everything.
It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
clang nightly and other deps like pahole.
On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> +
> + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
> + EMIT_LFENCE();
> + EMIT2(0xFF, 0xE0 + reg);
> + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
> + emit_jump(&prog, reg_thunk[reg], ip);
> + } else
One more question.
What's a deal with AMD? I thought the retpoline is effective on it as well.
lfence is an optimization or retpoline turned out to be not enough
in some cases?
On Wed, Oct 20, 2021 at 05:07:53PM -0700, Alexei Starovoitov wrote:
> On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> > +
> > + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
> > + EMIT_LFENCE();
> > + EMIT2(0xFF, 0xE0 + reg);
> > + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
> > + emit_jump(&prog, reg_thunk[reg], ip);
> > + } else
>
> One more question.
> What's a deal with AMD? I thought the retpoline is effective on it as well.
> lfence is an optimization or retpoline turned out to be not enough
> in some cases?
Yes, it's basically an optimization. AMD recommends it presumably
because it's quite a bit faster than a retpoline.
According to AMD it shrinks the speculative execution window enough so
that Spectre v2 isn't a threat.
--
Josh
On Wed, Oct 20, 2021 at 05:05:02PM -0700, Alexei Starovoitov wrote:
> On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
> > @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> > {
> > int tcc_off = -4 - round_up(stack_depth, 8);
> > u8 *prog = *pprog, *start = *pprog;
> > - int pop_bytes = 0;
> > - int off1 = 42;
> > - int off2 = 31;
> > - int off3 = 9;
> > -
> > - /* count the additional bytes used for popping callee regs from stack
> > - * that need to be taken into account for each of the offsets that
> > - * are used for bailing out of the tail call
> > - */
> > - pop_bytes = get_pop_bytes(callee_regs_used);
> > - off1 += pop_bytes;
> > - off2 += pop_bytes;
> > - off3 += pop_bytes;
> > -
> > - if (stack_depth) {
> > - off1 += 7;
> > - off2 += 7;
> > - off3 += 7;
> > - }
> > + static int out_label = -1;
>
> Interesting idea!
I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
lot more robust than the 64bit one and I couldn't figure out why the
difference.
> All insn emits trying to do the right thing from the start.
> Here the logic assumes that there will be at least two passes over image.
> I think that is correct, but we never had such assumption.
That's not exactly true; I think image is NULL on every first run, so
all insn that depend on it will be wrong to start with. Equally there's
a number of insn that seem to depend on addrs[i], that also requires at
least two passes.
> A comment is certainly must have.
I can certainly add one, although I think we'll disagree on the comment
style :-)
> The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
> are really warranted though. Might be overkill.
Is there concurrency on the jit?
> Once you have a git branch with all the changes I can give it a go.
Ok, I'll go polish this thing and stick it in the tree mentioned in the
cover letter.
> Also you can rely on our BPF CI.
> Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
> In patchwork there will be "bpf/vmtest-bpf-next" link that
> builds kernel, selftests and runs everything.
What's a patchwork and where do I find it?
> It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
> clang nightly and other deps like pahole.
nice.
On Wed, Oct 20, 2021 at 05:18:08PM -0700, Josh Poimboeuf wrote:
> On Wed, Oct 20, 2021 at 05:07:53PM -0700, Alexei Starovoitov wrote:
> > On Wed, Oct 20, 2021 at 12:44:56PM +0200, Peter Zijlstra wrote:
> > > +
> > > + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) {
> > > + EMIT_LFENCE();
> > > + EMIT2(0xFF, 0xE0 + reg);
> > > + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
> > > + emit_jump(&prog, reg_thunk[reg], ip);
> > > + } else
> >
> > One more question.
> > What's a deal with AMD? I thought the retpoline is effective on it as well.
> > lfence is an optimization or retpoline turned out to be not enough
> > in some cases?
>
> Yes, it's basically an optimization. AMD recommends it presumably
> because it's quite a bit faster than a retpoline.
>
> According to AMD it shrinks the speculative execution window enough so
> that Spectre v2 isn't a threat.
Right, also note that we've been using alternatives to patch the thunk
to lfence;jmp for AMD pretty much forever.
Inlining it is better tho; just a shame clang seems to insist on r11,
which means we cannot fit it in the thunk call site for them :/
On Thu, Oct 21, 2021 at 1:47 AM Peter Zijlstra <[email protected]> wrote:
>
> On Wed, Oct 20, 2021 at 05:05:02PM -0700, Alexei Starovoitov wrote:
> > On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
>
> > > @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> > > {
> > > int tcc_off = -4 - round_up(stack_depth, 8);
> > > u8 *prog = *pprog, *start = *pprog;
> > > - int pop_bytes = 0;
> > > - int off1 = 42;
> > > - int off2 = 31;
> > > - int off3 = 9;
> > > -
> > > - /* count the additional bytes used for popping callee regs from stack
> > > - * that need to be taken into account for each of the offsets that
> > > - * are used for bailing out of the tail call
> > > - */
> > > - pop_bytes = get_pop_bytes(callee_regs_used);
> > > - off1 += pop_bytes;
> > > - off2 += pop_bytes;
> > > - off3 += pop_bytes;
> > > -
> > > - if (stack_depth) {
> > > - off1 += 7;
> > > - off2 += 7;
> > > - off3 += 7;
> > > - }
> > > + static int out_label = -1;
> >
> > Interesting idea!
>
> I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> lot more robust than the 64bit one and I couldn't figure out why the
> difference.
Interesting. Daniel will recognize that trick then :)
> > All insn emits trying to do the right thing from the start.
> > Here the logic assumes that there will be at least two passes over image.
> > I think that is correct, but we never had such assumption.
>
> That's not exactly true; I think image is NULL on every first run, so
> all insn that depend on it will be wrong to start with. Equally there's
> a number of insn that seem to depend on addrs[i], that also requires at
> least two passes.
Right. The image will be allocated after size converges and
addrs[] is inited with 64.
So there is certainly more than one pass.
I was saying that emit* helpers didn't have that assumption.
Looks like 32-bit JIT had.
> > A comment is certainly must have.
>
> I can certainly add one, although I think we'll disagree on the comment
> style :-)
I think we're on the same page actually.
> > The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
> > are really warranted though. Might be overkill.
>
> Is there concurrency on the jit?
The JIT of different progs can happen in parallel.
> > Once you have a git branch with all the changes I can give it a go.
>
> Ok, I'll go polish this thing and stick it in the tree mentioned in the
> cover letter.
>
> > Also you can rely on our BPF CI.
> > Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
> > In patchwork there will be "bpf/vmtest-bpf-next" link that
> > builds kernel, selftests and runs everything.
>
> What's a patchwork and where do I find it?
https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
Click on any patch, search for 'bpf/vmtest-bpf-next' and follow the
'VM_Test' link.
The summary of the test run is available without logging in into github.
To see detailed logs you need to be logged in with your github account.
It's a silly limitation they have.
They even have a button 'Sign in to view logs'. Oh well.
> > It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
> > clang nightly and other deps like pahole.
>
> nice.
One more thing. There is test_bpf.ko.
Just insmod it and it will run a ton of JIT tests that we cannot do
from user space.
Please use bpf-next tree though. Few weeks ago Johan Almbladh added
a lot more tests to it.
On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:
> > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > lot more robust than the 64bit one and I couldn't figure out why the
> > difference.
>
> Interesting. Daniel will recognize that trick then :)
> > Is there concurrency on the jit?
>
> The JIT of different progs can happen in parallel.
In that case I don't think the patch is safe. I'll see if I can find a
variant that doesn't use static storage.
On Thu, Oct 21, 2021 at 3:40 PM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:
>
> > > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > > lot more robust than the 64bit one and I couldn't figure out why the
> > > difference.
> >
> > Interesting. Daniel will recognize that trick then :)
>
> > > Is there concurrency on the jit?
> >
> > The JIT of different progs can happen in parallel.
>
> In that case I don't think the patch is safe. I'll see if I can find a
> variant that doesn't use static storage.
The variable can only change from one fixed value to another fixed value.
Different threads will compute the same value. So I think it's safe
as-is. READ_ONCE/WRITE_ONCE won't hurt though.
On Thu, Oct 21, 2021 at 04:24:33PM -0700, Alexei Starovoitov wrote:
> On Thu, Oct 21, 2021 at 3:40 PM Peter Zijlstra <[email protected]> wrote:
> >
> > On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:
> >
> > > > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > > > lot more robust than the 64bit one and I couldn't figure out why the
> > > > difference.
> > >
> > > Interesting. Daniel will recognize that trick then :)
> >
> > > > Is there concurrency on the jit?
> > >
> > > The JIT of different progs can happen in parallel.
> >
> > In that case I don't think the patch is safe. I'll see if I can find a
> > variant that doesn't use static storage.
>
> The variable can only change from one fixed value to another fixed value.
> Different threads will compute the same value. So I think it's safe
> as-is. READ_ONCE/WRITE_ONCE won't hurt though.
But the size of the generated code differs based on the
emit_bpf_tail_call_indirect() args: 'callee_regs_used' and
'stack_depth'. So the fixed value can change.
--
Josh
On Thu, Oct 21, 2021 at 4:38 PM Josh Poimboeuf <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 04:24:33PM -0700, Alexei Starovoitov wrote:
> > On Thu, Oct 21, 2021 at 3:40 PM Peter Zijlstra <[email protected]> wrote:
> > >
> > > On Thu, Oct 21, 2021 at 11:03:33AM -0700, Alexei Starovoitov wrote:
> > >
> > > > > I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> > > > > lot more robust than the 64bit one and I couldn't figure out why the
> > > > > difference.
> > > >
> > > > Interesting. Daniel will recognize that trick then :)
> > >
> > > > > Is there concurrency on the jit?
> > > >
> > > > The JIT of different progs can happen in parallel.
> > >
> > > In that case I don't think the patch is safe. I'll see if I can find a
> > > variant that doesn't use static storage.
> >
> > The variable can only change from one fixed value to another fixed value.
> > Different threads will compute the same value. So I think it's safe
> > as-is. READ_ONCE/WRITE_ONCE won't hurt though.
>
> But the size of the generated code differs based on the
> emit_bpf_tail_call_indirect() args: 'callee_regs_used' and
> 'stack_depth'. So the fixed value can change.
Ahh. Right. It's potentially a different offset for every prog.
Let's put it into struct jit_context then.
On Thu, Oct 21, 2021 at 1:47 AM Peter Zijlstra <[email protected]> wrote:
>
> On Wed, Oct 20, 2021 at 05:05:02PM -0700, Alexei Starovoitov wrote:
> > On Wed, Oct 20, 2021 at 01:09:51PM +0200, Peter Zijlstra wrote:
>
> > > @@ -446,25 +440,8 @@ static void emit_bpf_tail_call_indirect(
> > > {
> > > int tcc_off = -4 - round_up(stack_depth, 8);
> > > u8 *prog = *pprog, *start = *pprog;
> > > - int pop_bytes = 0;
> > > - int off1 = 42;
> > > - int off2 = 31;
> > > - int off3 = 9;
> > > -
> > > - /* count the additional bytes used for popping callee regs from stack
> > > - * that need to be taken into account for each of the offsets that
> > > - * are used for bailing out of the tail call
> > > - */
> > > - pop_bytes = get_pop_bytes(callee_regs_used);
> > > - off1 += pop_bytes;
> > > - off2 += pop_bytes;
> > > - off3 += pop_bytes;
> > > -
> > > - if (stack_depth) {
> > > - off1 += 7;
> > > - off2 += 7;
> > > - off3 += 7;
> > > - }
> > > + static int out_label = -1;
> >
> > Interesting idea!
>
> I nicked it from emit_bpf_tail_call() in the 32bit jit :-) It seemed a
> lot more robust than the 64bit one and I couldn't figure out why the
> difference.
>
> > All insn emits trying to do the right thing from the start.
> > Here the logic assumes that there will be at least two passes over image.
> > I think that is correct, but we never had such assumption.
>
> That's not exactly true; I think image is NULL on every first run, so
> all insn that depend on it will be wrong to start with. Equally there's
> a number of insn that seem to depend on addrs[i], that also requires at
> least two passes.
>
> > A comment is certainly must have.
>
> I can certainly add one, although I think we'll disagree on the comment
> style :-)
>
> > The race is possible too. Not sure whether READ_ONCE/WRITE_ONCE
> > are really warranted though. Might be overkill.
>
> Is there concurrency on the jit?
>
> > Once you have a git branch with all the changes I can give it a go.
>
> Ok, I'll go polish this thing and stick it in the tree mentioned in the
> cover letter.
>
> > Also you can rely on our BPF CI.
> > Just cc your patchset to bpf@vger and add [PATCH bpf-next] to a subject.
> > In patchwork there will be "bpf/vmtest-bpf-next" link that
> > builds kernel, selftests and runs everything.
>
> What's a patchwork and where do I find it?
>
Patchwork[0] tracks the status of patches from submission through to merge (and
beyond?).
[0]: https://patchwork.kernel.org/
> > It's pretty much the same as selftests/bpf/vmtest.sh, but with the latest
> > clang nightly and other deps like pahole.
>
> nice.
On Thu, Oct 21, 2021 at 04:51:08PM -0700, Zvi Effron wrote:
> > What's a patchwork and where do I find it?
> >
>
> Patchwork[0] tracks the status of patches from submission through to merge (and
> beyond?).
Yeah, I sorta know that :-) Even though I loathe the things because
web-browser, but the second part of the question was genuine, there's a
*lot* of patchwork instances around, not everyone uses the new k.org
based one.
On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:
> Ahh. Right. It's potentially a different offset for every prog.
> Let's put it into struct jit_context then.
Something like this...
---
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -225,6 +225,14 @@ static void jit_fill_hole(void *area, un
struct jit_context {
int cleanup_addr; /* Epilogue code offset */
+
+ /*
+ * Program specific offsets of labels in the code; these rely on the
+ * JIT doing at least 2 passes, recording the position on the first
+ * pass, only to generate the correct offset on the second pass.
+ */
+ int tail_call_direct_label;
+ int tail_call_indirect_label;
};
/* Maximum number of bytes emitted while JITing one eBPF insn */
@@ -380,22 +388,6 @@ int bpf_arch_text_poke(void *ip, enum bp
return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
}
-static int get_pop_bytes(bool *callee_regs_used)
-{
- int bytes = 0;
-
- if (callee_regs_used[3])
- bytes += 2;
- if (callee_regs_used[2])
- bytes += 2;
- if (callee_regs_used[1])
- bytes += 2;
- if (callee_regs_used[0])
- bytes += 1;
-
- return bytes;
-}
-
/*
* Generate the following code:
*
@@ -411,29 +403,12 @@ static int get_pop_bytes(bool *callee_re
* out:
*/
static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
- u32 stack_depth)
+ u32 stack_depth, u8 *ip,
+ struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
- int pop_bytes = 0;
- int off1 = 42;
- int off2 = 31;
- int off3 = 9;
-
- /* count the additional bytes used for popping callee regs from stack
- * that need to be taken into account for each of the offsets that
- * are used for bailing out of the tail call
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
- off2 += pop_bytes;
- off3 += pop_bytes;
-
- if (stack_depth) {
- off1 += 7;
- off2 += 7;
- off3 += 7;
- }
+ u8 *prog = *pprog, *start = *pprog;
+ int offset;
/*
* rdi - pointer to ctx
@@ -448,8 +423,9 @@ static void emit_bpf_tail_call_indirect(
EMIT2(0x89, 0xD2); /* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */
offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */
- EMIT2(X86_JBE, OFFSET1); /* jbe out */
+
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JBE, offset); /* jbe out */
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -457,8 +433,9 @@ static void emit_bpf_tail_call_indirect(
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
-#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE)
- EMIT2(X86_JA, OFFSET2); /* ja out */
+
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
@@ -471,12 +448,11 @@ static void emit_bpf_tail_call_indirect(
* goto out;
*/
EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */
-#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE)
- EMIT2(X86_JE, OFFSET3); /* je out */
- *pprog = prog;
- pop_callee_regs(pprog, callee_regs_used);
- prog = *pprog;
+ offset = ctx->tail_call_indirect_label - (prog + 2 - start);
+ EMIT2(X86_JE, offset); /* je out */
+
+ pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
@@ -496,38 +472,18 @@ static void emit_bpf_tail_call_indirect(
RETPOLINE_RCX_BPF_JIT();
/* out: */
+ ctx->tail_call_indirect_label = prog - start;
*pprog = prog;
}
static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
- u8 **pprog, int addr, u8 *image,
- bool *callee_regs_used, u32 stack_depth)
+ u8 **pprog, u8 *ip,
+ bool *callee_regs_used, u32 stack_depth,
+ struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
- u8 *prog = *pprog;
- int pop_bytes = 0;
- int off1 = 20;
- int poke_off;
-
- /* count the additional bytes used for popping callee regs to stack
- * that need to be taken into account for jump offset that is used for
- * bailing out from of the tail call when limit is reached
- */
- pop_bytes = get_pop_bytes(callee_regs_used);
- off1 += pop_bytes;
-
- /*
- * total bytes for:
- * - nop5/ jmpq $off
- * - pop callee regs
- * - sub rsp, $val if depth > 0
- * - pop rax
- */
- poke_off = X86_PATCH_SIZE + pop_bytes + 1;
- if (stack_depth) {
- poke_off += 7;
- off1 += 7;
- }
+ u8 *prog = *pprog, *start = *pprog;
+ int offset;
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
@@ -535,28 +491,30 @@ static void emit_bpf_tail_call_direct(st
*/
EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
- EMIT2(X86_JA, off1); /* ja out */
+
+ offset = ctx->tail_call_direct_label - (prog + 2 - start);
+ EMIT2(X86_JA, offset); /* ja out */
EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */
- poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
+ poke->tailcall_bypass = ip + (prog - start);
poke->adj_off = X86_TAIL_CALL_OFFSET;
- poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
+ poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE;
emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE,
poke->tailcall_bypass);
- *pprog = prog;
- pop_callee_regs(pprog, callee_regs_used);
- prog = *pprog;
+ pop_callee_regs(&prog, callee_regs_used);
EMIT1(0x58); /* pop rax */
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
+
/* out: */
+ ctx->tail_call_direct_label = prog - start;
*pprog = prog;
}
@@ -1405,13 +1363,16 @@ st: if (is_imm8(insn->off))
case BPF_JMP | BPF_TAIL_CALL:
if (imm32)
emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
- &prog, addrs[i], image,
+ &prog, image + addrs[i - 1],
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ ctx);
else
emit_bpf_tail_call_indirect(&prog,
callee_regs_used,
- bpf_prog->aux->stack_depth);
+ bpf_prog->aux->stack_depth,
+ image + addrs[i - 1],
+ ctx);
break;
/* cond jump */
On Fri, Oct 22, 2021 at 4:33 AM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:
>
> > Ahh. Right. It's potentially a different offset for every prog.
> > Let's put it into struct jit_context then.
>
> Something like this...
Yep. Looks nice and clean to me.
> - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
> + poke->tailcall_bypass = ip + (prog - start);
> poke->adj_off = X86_TAIL_CALL_OFFSET;
> - poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
> + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
This part looks correct too, but this is Daniel's magic.
He'll probably take a look next week when he comes back from PTO.
I don't recall which test exercises this tailcall poking logic.
It's only used with dynamic updates to prog_array.
insmod test_bpf.ko and test_verifier won't go down this path.
On Fri, Oct 22, 2021 at 1:33 AM Peter Zijlstra <[email protected]> wrote:
>
> On Thu, Oct 21, 2021 at 04:51:08PM -0700, Zvi Effron wrote:
>
> > > What's a patchwork and where do I find it?
> > >
> >
> > Patchwork[0] tracks the status of patches from submission through to merge (and
> > beyond?).
>
> Yeah, I sorta know that :-) Even though I loathe the things because
> web-browser, but the second part of the question was genuine, there's a
> *lot* of patchwork instances around, not everyone uses the new k.org
> based one.
>
BPF and NetDev share one: https://patchwork.kernel.org/project/netdevbpf/list/
--Zvi
On Fri, Oct 22, 2021 at 08:22:35AM -0700, Alexei Starovoitov wrote:
> On Fri, Oct 22, 2021 at 4:33 AM Peter Zijlstra <[email protected]> wrote:
> >
> > On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:
> >
> > > Ahh. Right. It's potentially a different offset for every prog.
> > > Let's put it into struct jit_context then.
> >
> > Something like this...
>
> Yep. Looks nice and clean to me.
>
> > - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
> > + poke->tailcall_bypass = ip + (prog - start);
> > poke->adj_off = X86_TAIL_CALL_OFFSET;
> > - poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
> > + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
>
> This part looks correct too, but this is Daniel's magic.
> He'll probably take a look next week when he comes back from PTO.
> I don't recall which test exercises this tailcall poking logic.
> It's only used with dynamic updates to prog_array.
> insmod test_bpf.ko and test_verifier won't go down this path.
Please run ./test_progs -t tailcalls from tools/testing/selftests/bpf and
make sure that all of the tests are passing in there, especially the
tailcall_bpf2bpf* subset.
Thanks!
On Mon, Oct 25, 2021 at 03:44:24PM +0200, Maciej Fijalkowski wrote:
> On Fri, Oct 22, 2021 at 08:22:35AM -0700, Alexei Starovoitov wrote:
> > On Fri, Oct 22, 2021 at 4:33 AM Peter Zijlstra <[email protected]> wrote:
> > >
> > > On Thu, Oct 21, 2021 at 04:42:12PM -0700, Alexei Starovoitov wrote:
> > >
> > > > Ahh. Right. It's potentially a different offset for every prog.
> > > > Let's put it into struct jit_context then.
> > >
> > > Something like this...
> >
> > Yep. Looks nice and clean to me.
> >
> > > - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE);
> > > + poke->tailcall_bypass = ip + (prog - start);
> > > poke->adj_off = X86_TAIL_CALL_OFFSET;
> > > - poke->tailcall_target = image + (addr - X86_PATCH_SIZE);
> > > + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE;
> >
> > This part looks correct too, but this is Daniel's magic.
> > He'll probably take a look next week when he comes back from PTO.
> > I don't recall which test exercises this tailcall poking logic.
> > It's only used with dynamic updates to prog_array.
> > insmod test_bpf.ko and test_verifier won't go down this path.
>
> Please run ./test_progs -t tailcalls from tools/testing/selftests/bpf and
> make sure that all of the tests are passing in there, especially the
> tailcall_bpf2bpf* subset.
Yeah, so nothing from that selftests crud wants to work for me; also I
*really* dislike how vmtest.sh as found there tramples all over my
source dir without asking.
Note that even when eventually supplied with O=builddir (confusingly in
front of it), it doesn't want to work and bails with lots of -ENOSPC
warnings (I double checked, my disks are nowhere near full). (and this
is after installing some horrendous python rst crap because clearly
running a test needs to build documentation :/)
I've spend hours on that, I'm not sinking more time into it. If you want
me to run that crap, fix it first.