2022-09-19 09:25:58

by Xu Kuohai

[permalink] [raw]
Subject: [RESEND PATCH bpf-next 0/2] Jit BPF_CALL to direct call when possible

Currently BPF_CALL is always jited to indirect call, but when target is
in the range of direct call, a BPF_CALL can be jited to direct call.

For example, the following BPF_CALL

call __htab_map_lookup_elem

is always jited to an indirect call:

mov x10, #0xffffffffffff18f4
movk x10, #0x821, lsl #16
movk x10, #0x8000, lsl #32
blr x10

When the target is in the range of a direct call, it can be jited to:

bl 0xfffffffffd33bc98

This patchset does such jit.

Xu Kuohai (2):
bpf, arm64: Jit BPF_CALL to direct call when possible
bpf, arm64: Eliminate false -EFBIG error in bpf trampoline

arch/arm64/net/bpf_jit_comp.c | 136 ++++++++++++++++++++++------------
1 file changed, 87 insertions(+), 49 deletions(-)

--
2.30.2


2022-09-19 09:36:14

by Xu Kuohai

[permalink] [raw]
Subject: [RESEND PATCH bpf-next 2/2] bpf, arm64: Eliminate false -EFBIG error in bpf trampoline

From: Xu Kuohai <[email protected]>

With jiting BPF_CALL to indirect call or direct call based on the jump
distance, the bpf trampoline size calculated with NULL image address
may be larger than the actual size needed, resulting in a fake -EFBIG
error.

So remove the estimate of the bpf trampoline size based on NULL image
address, and check if there is enough space when emitting instructions.

Signed-off-by: Xu Kuohai <[email protected]>
---
arch/arm64/net/bpf_jit_comp.c | 67 ++++++++++++++++-------------------
1 file changed, 30 insertions(+), 37 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 06437e34614b..a07742ee7860 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -73,6 +73,7 @@ struct jit_ctx {
const struct bpf_prog *prog;
int idx;
bool write;
+ int max_insns;
int epilogue_offset;
int *offset;
int exentry_idx;
@@ -90,11 +91,15 @@ struct bpf_plt {
#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target)
#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)

-static inline void emit(const u32 insn, struct jit_ctx *ctx)
+static inline void __emit(u32 insn, struct jit_ctx *ctx, int idx)
{
- if (ctx->image != NULL && ctx->write)
- ctx->image[ctx->idx] = cpu_to_le32(insn);
+ if (ctx->image != NULL && ctx->write && idx < ctx->max_insns)
+ ctx->image[idx] = cpu_to_le32(insn);
+}

+static inline void emit(u32 insn, struct jit_ctx *ctx)
+{
+ __emit(insn, ctx, ctx->idx);
ctx->idx++;
}

@@ -1544,6 +1549,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
build_epilogue(&ctx);
build_plt(&ctx);

+ ctx.max_insns = ctx.idx;
extable_align = __alignof__(struct exception_table_entry);
extable_size = prog->aux->num_exentries *
sizeof(struct exception_table_entry);
@@ -1603,7 +1609,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
build_plt(&ctx);

/* 3. Extra pass to validate JITed code. */
- if (validate_ctx(&ctx)) {
+ if (WARN_ON_ONCE(ctx.idx > ctx.max_insns) || validate_ctx(&ctx)) {
bpf_jit_binary_free(header);
prog = orig_prog;
goto out_off;
@@ -1687,7 +1693,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
int args_off, int retval_off, int run_ctx_off,
bool save_ret)
{
- __le32 *branch;
+ int br_idx;
u64 enter_prog;
u64 exit_prog;
struct bpf_prog *p = l->link.prog;
@@ -1725,7 +1731,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
/* if (__bpf_prog_enter(prog) == 0)
* goto skip_exec_of_prog;
*/
- branch = ctx->image + ctx->idx;
+ br_idx = ctx->idx;
emit(A64_NOP, ctx);

/* save return value to callee saved register x20 */
@@ -1740,10 +1746,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
if (save_ret)
emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);

- if (ctx->image) {
- int offset = &ctx->image[ctx->idx] - branch;
- *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset));
- }
+ __emit(A64_CBZ(1, A64_R(0), ctx->idx - br_idx), ctx, br_idx);

/* arg1: prog */
emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
@@ -1757,7 +1760,7 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,

static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
int args_off, int retval_off, int run_ctx_off,
- __le32 **branches)
+ int *br_idxes)
{
int i;

@@ -1775,7 +1778,7 @@ static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
/* Save the location of branch, and generate a nop.
* This nop will be replaced with a cbnz later.
*/
- branches[i] = ctx->image + ctx->idx;
+ br_idxes[i] = ctx->idx;
emit(A64_NOP, ctx);
}
}
@@ -1828,7 +1831,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
bool save_ret;
- __le32 **branches = NULL;
+ int *br_idxes = NULL;

/* trampoline stack layout:
* [ parent ip ]
@@ -1936,13 +1939,12 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
flags & BPF_TRAMP_F_RET_FENTRY_RET);

if (fmod_ret->nr_links) {
- branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
- GFP_KERNEL);
- if (!branches)
+ br_idxes = kcalloc(fmod_ret->nr_links, sizeof(int), GFP_KERNEL);
+ if (!br_idxes)
return -ENOMEM;

invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
- run_ctx_off, branches);
+ run_ctx_off, br_idxes);
}

if (flags & BPF_TRAMP_F_CALL_ORIG) {
@@ -1957,11 +1959,10 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
emit(A64_NOP, ctx);
}

- /* update the branches saved in invoke_bpf_mod_ret with cbnz */
- for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
- int offset = &ctx->image[ctx->idx] - branches[i];
- *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
- }
+ /* update the bridx saved in invoke_bpf_mod_ret with cbnz */
+ for (i = 0; i < fmod_ret->nr_links; i++)
+ __emit(A64_CBNZ(1, A64_R(10), ctx->idx - br_idxes[i]), ctx,
+ br_idxes[i]);

for (i = 0; i < fexit->nr_links; i++)
invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
@@ -2004,7 +2005,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
if (ctx->image)
bpf_flush_icache(ctx->image, ctx->image + ctx->idx);

- kfree(branches);
+ kfree(br_idxes);

return ctx->idx;
}
@@ -2018,9 +2019,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
int nargs = m->nr_args;
int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
struct jit_ctx ctx = {
- .image = NULL,
+ .image = image,
.idx = 0,
.write = true,
+ .max_insns = max_insns,
};

/* the first 8 arguments are passed by registers */
@@ -2033,26 +2035,17 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
return -ENOTSUPP;
}

+ jit_fill_hole(image, (unsigned int)(image_end - image));
+
ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
+
if (ret < 0)
return ret;

if (ret > max_insns)
return -EFBIG;

- ctx.image = image;
- ctx.idx = 0;
-
- jit_fill_hole(image, (unsigned int)(image_end - image));
- ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags);
-
- if (ret > 0 && validate_code(&ctx) < 0)
- ret = -EINVAL;
-
- if (ret > 0)
- ret *= AARCH64_INSN_SIZE;
-
- return ret;
+ return validate_code(&ctx) < 0 ? -EINVAL : ret * AARCH64_INSN_SIZE;
}

static bool is_long_jump(void *ip, void *target)
--
2.30.2

2022-09-19 09:54:33

by Xu Kuohai

[permalink] [raw]
Subject: [RESEND PATCH bpf-next 1/2] bpf, arm64: Jit BPF_CALL to direct call when possible

From: Xu Kuohai <[email protected]>

Currently BPF_CALL is always jited to indirect call, but when target is
in the range of direct call, BPF_CALL can be jited to direct call.

For example, the following BPF_CALL

call __htab_map_lookup_elem

is always jited to an indirect call:

mov x10, #0xffffffffffff18f4
movk x10, #0x821, lsl #16
movk x10, #0x8000, lsl #32
blr x10

When the target is in the range of direct call, it can be jited to:

bl 0xfffffffffd33bc98

This patch does such jit when possible.

1. First pass, get the maximum jited image size. Since the jited image
memory is not allocated yet, the distance between jited BPF_CALL
instructon and call target is unknown, so jit all BPF_CALL to indirect
call to get the maximum image size.

2. Allocate image memory with the size caculated in step 1.

3. Second pass, determine the jited address and size for every bpf instruction.
Since image memory is now allocated and there is only one jit method for
bpf instructions other than BPF_CALL, so the jited address for the first
BPF_CALL is determined, so the distance to call target is determined, so
the first BPF_CALL is determined to be jited to direct or indirect call,
so the jited image size after the first BPF_CALL is determined. By analogy,
the jited addresses and sizes for all subsequent BPF instructions are
determined.

4. Last pass, generate the final image. The jump offset of jump instruction
whose target is within the jited image is determined in this pass, since
the target instruction address may be changed in step 3.

Tested with test_bpf.ko and some arm64 working selftests, nothing failed.

Signed-off-by: Xu Kuohai <[email protected]>
---
arch/arm64/net/bpf_jit_comp.c | 71 ++++++++++++++++++++++++++++-------
1 file changed, 58 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 30f76178608b..06437e34614b 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -72,6 +72,7 @@ static const int bpf2a64[] = {
struct jit_ctx {
const struct bpf_prog *prog;
int idx;
+ bool write;
int epilogue_offset;
int *offset;
int exentry_idx;
@@ -91,7 +92,7 @@ struct bpf_plt {

static inline void emit(const u32 insn, struct jit_ctx *ctx)
{
- if (ctx->image != NULL)
+ if (ctx->image != NULL && ctx->write)
ctx->image[ctx->idx] = cpu_to_le32(insn);

ctx->idx++;
@@ -178,10 +179,29 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,

static inline void emit_call(u64 target, struct jit_ctx *ctx)
{
- u8 tmp = bpf2a64[TMP_REG_1];
+ u8 tmp;
+ long offset;
+ unsigned long pc;
+ u32 insn = AARCH64_BREAK_FAULT;
+
+ /* if ctx->image == NULL or target == 0, the jump distance is unknown,
+ * emit indirect call.
+ */
+ if (ctx->image && target) {
+ pc = (unsigned long)&ctx->image[ctx->idx];
+ offset = (long)target - (long)pc;
+ if (offset >= -SZ_128M && offset < SZ_128M)
+ insn = aarch64_insn_gen_branch_imm(pc, target,
+ AARCH64_INSN_BRANCH_LINK);
+ }

- emit_addr_mov_i64(tmp, target, ctx);
- emit(A64_BLR(tmp), ctx);
+ if (insn == AARCH64_BREAK_FAULT) {
+ tmp = bpf2a64[TMP_REG_1];
+ emit_addr_mov_i64(tmp, target, ctx);
+ emit(A64_BLR(tmp), ctx);
+ } else {
+ emit(insn, ctx);
+ }
}

static inline int bpf2a64_offset(int bpf_insn, int off,
@@ -1392,13 +1412,11 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
const struct bpf_insn *insn = &prog->insnsi[i];
int ret;

- if (ctx->image == NULL)
- ctx->offset[i] = ctx->idx;
+ ctx->offset[i] = ctx->idx;
ret = build_insn(insn, ctx, extra_pass);
if (ret > 0) {
i++;
- if (ctx->image == NULL)
- ctx->offset[i] = ctx->idx;
+ ctx->offset[i] = ctx->idx;
continue;
}
if (ret)
@@ -1409,8 +1427,7 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
* the last element with the offset after the last
* instruction (end of program)
*/
- if (ctx->image == NULL)
- ctx->offset[i] = ctx->idx;
+ ctx->offset[i] = ctx->idx;

return 0;
}
@@ -1461,6 +1478,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool extra_pass = false;
struct jit_ctx ctx;
u8 *image_ptr;
+ int body_idx;
+ int exentry_idx;

if (!prog->jit_requested)
return orig_prog;
@@ -1515,6 +1534,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}

+ /* Get the max image size */
if (build_body(&ctx, extra_pass)) {
prog = orig_prog;
goto out_off;
@@ -1528,7 +1548,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
extable_size = prog->aux->num_exentries *
sizeof(struct exception_table_entry);

- /* Now we know the actual image size. */
+ /* Now we know the max image size. */
prog_size = sizeof(u32) * ctx.idx;
/* also allocate space for plt target */
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
@@ -1548,15 +1568,37 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
skip_init_ctx:
ctx.idx = 0;
ctx.exentry_idx = 0;
+ ctx.write = true;

build_prologue(&ctx, was_classic);

+ /* Record exentry_idx and ctx.idx before first build_body */
+ exentry_idx = ctx.exentry_idx;
+ body_idx = ctx.idx;
+ /* Don't write instruction to memory for now */
+ ctx.write = false;
+
+ /* Determine call distance and instruction position */
if (build_body(&ctx, extra_pass)) {
bpf_jit_binary_free(header);
prog = orig_prog;
goto out_off;
}

+ ctx.epilogue_offset = ctx.idx;
+
+ ctx.exentry_idx = exentry_idx;
+ ctx.idx = body_idx;
+ ctx.write = true;
+
+ /* Determine jump offset and write result to memory */
+ if (build_body(&ctx, extra_pass) ||
+ WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) {
+ bpf_jit_binary_free(header);
+ prog = orig_prog;
+ goto out_off;
+ }
+
build_epilogue(&ctx);
build_plt(&ctx);

@@ -1567,6 +1609,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}

+ /* Update prog size */
+ prog_size = sizeof(u32) * ctx.idx;
/* And we're done. */
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
@@ -1574,8 +1618,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bpf_flush_icache(header, ctx.image + ctx.idx);

if (!prog->is_func || extra_pass) {
- if (extra_pass && ctx.idx != jit_data->ctx.idx) {
- pr_err_once("multi-func JIT bug %d != %d\n",
+ if (extra_pass && ctx.idx > jit_data->ctx.idx) {
+ pr_err_once("multi-func JIT bug %d > %d\n",
ctx.idx, jit_data->ctx.idx);
bpf_jit_binary_free(header);
prog->bpf_func = NULL;
@@ -1976,6 +2020,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
struct jit_ctx ctx = {
.image = NULL,
.idx = 0,
+ .write = true,
};

/* the first 8 arguments are passed by registers */
--
2.30.2

2022-09-26 20:38:33

by Daniel Borkmann

[permalink] [raw]
Subject: Re: [RESEND PATCH bpf-next 1/2] bpf, arm64: Jit BPF_CALL to direct call when possible

[ +Mark/Florent ]

On 9/19/22 11:21 AM, Xu Kuohai wrote:
> From: Xu Kuohai <[email protected]>
>
> Currently BPF_CALL is always jited to indirect call, but when target is
> in the range of direct call, BPF_CALL can be jited to direct call.
>
> For example, the following BPF_CALL
>
> call __htab_map_lookup_elem
>
> is always jited to an indirect call:
>
> mov x10, #0xffffffffffff18f4
> movk x10, #0x821, lsl #16
> movk x10, #0x8000, lsl #32
> blr x10
>
> When the target is in the range of direct call, it can be jited to:
>
> bl 0xfffffffffd33bc98
>
> This patch does such jit when possible.
>
> 1. First pass, get the maximum jited image size. Since the jited image
> memory is not allocated yet, the distance between jited BPF_CALL
> instructon and call target is unknown, so jit all BPF_CALL to indirect
> call to get the maximum image size.
>
> 2. Allocate image memory with the size caculated in step 1.
>
> 3. Second pass, determine the jited address and size for every bpf instruction.
> Since image memory is now allocated and there is only one jit method for
> bpf instructions other than BPF_CALL, so the jited address for the first
> BPF_CALL is determined, so the distance to call target is determined, so
> the first BPF_CALL is determined to be jited to direct or indirect call,
> so the jited image size after the first BPF_CALL is determined. By analogy,
> the jited addresses and sizes for all subsequent BPF instructions are
> determined.
>
> 4. Last pass, generate the final image. The jump offset of jump instruction
> whose target is within the jited image is determined in this pass, since
> the target instruction address may be changed in step 3.

Wouldn't this require similar convergence process like in x86-64 JIT? You state
the jump instructions are placed in step 4 because step 3 could have changed their
offsets, but then after step 4, couldn't also again the offsets have changed for
the target addresses from 3 again in some corner cases (given emit_a64_mov_i() is
used also in jump encoding)?

> Tested with test_bpf.ko and some arm64 working selftests, nothing failed.
>
> Signed-off-by: Xu Kuohai <[email protected]>
> ---
> arch/arm64/net/bpf_jit_comp.c | 71 ++++++++++++++++++++++++++++-------
> 1 file changed, 58 insertions(+), 13 deletions(-)
>
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index 30f76178608b..06437e34614b 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -72,6 +72,7 @@ static const int bpf2a64[] = {
> struct jit_ctx {
> const struct bpf_prog *prog;
> int idx;
> + bool write;
> int epilogue_offset;
> int *offset;
> int exentry_idx;
> @@ -91,7 +92,7 @@ struct bpf_plt {
>
> static inline void emit(const u32 insn, struct jit_ctx *ctx)
> {
> - if (ctx->image != NULL)
> + if (ctx->image != NULL && ctx->write)
> ctx->image[ctx->idx] = cpu_to_le32(insn);
>
> ctx->idx++;
> @@ -178,10 +179,29 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
>
> static inline void emit_call(u64 target, struct jit_ctx *ctx)
> {
> - u8 tmp = bpf2a64[TMP_REG_1];
> + u8 tmp;
> + long offset;
> + unsigned long pc;
> + u32 insn = AARCH64_BREAK_FAULT;
> +
> + /* if ctx->image == NULL or target == 0, the jump distance is unknown,
> + * emit indirect call.
> + */
> + if (ctx->image && target) {
> + pc = (unsigned long)&ctx->image[ctx->idx];
> + offset = (long)target - (long)pc;
> + if (offset >= -SZ_128M && offset < SZ_128M)
> + insn = aarch64_insn_gen_branch_imm(pc, target,
> + AARCH64_INSN_BRANCH_LINK);
> + }
>
> - emit_addr_mov_i64(tmp, target, ctx);
> - emit(A64_BLR(tmp), ctx);
> + if (insn == AARCH64_BREAK_FAULT) {
> + tmp = bpf2a64[TMP_REG_1];
> + emit_addr_mov_i64(tmp, target, ctx);
> + emit(A64_BLR(tmp), ctx);
> + } else {
> + emit(insn, ctx);
> + }
> }
>
> static inline int bpf2a64_offset(int bpf_insn, int off,
> @@ -1392,13 +1412,11 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
> const struct bpf_insn *insn = &prog->insnsi[i];
> int ret;
>
> - if (ctx->image == NULL)
> - ctx->offset[i] = ctx->idx;
> + ctx->offset[i] = ctx->idx;
> ret = build_insn(insn, ctx, extra_pass);
> if (ret > 0) {
> i++;
> - if (ctx->image == NULL)
> - ctx->offset[i] = ctx->idx;
> + ctx->offset[i] = ctx->idx;
> continue;
> }
> if (ret)
> @@ -1409,8 +1427,7 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
> * the last element with the offset after the last
> * instruction (end of program)
> */
> - if (ctx->image == NULL)
> - ctx->offset[i] = ctx->idx;
> + ctx->offset[i] = ctx->idx;
>
> return 0;
> }
> @@ -1461,6 +1478,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> bool extra_pass = false;
> struct jit_ctx ctx;
> u8 *image_ptr;
> + int body_idx;
> + int exentry_idx;
>
> if (!prog->jit_requested)
> return orig_prog;
> @@ -1515,6 +1534,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> goto out_off;
> }
>
> + /* Get the max image size */
> if (build_body(&ctx, extra_pass)) {
> prog = orig_prog;
> goto out_off;
> @@ -1528,7 +1548,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> extable_size = prog->aux->num_exentries *
> sizeof(struct exception_table_entry);
>
> - /* Now we know the actual image size. */
> + /* Now we know the max image size. */
> prog_size = sizeof(u32) * ctx.idx;
> /* also allocate space for plt target */
> extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
> @@ -1548,15 +1568,37 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> skip_init_ctx:
> ctx.idx = 0;
> ctx.exentry_idx = 0;
> + ctx.write = true;
>
> build_prologue(&ctx, was_classic);
>
> + /* Record exentry_idx and ctx.idx before first build_body */
> + exentry_idx = ctx.exentry_idx;
> + body_idx = ctx.idx;
> + /* Don't write instruction to memory for now */
> + ctx.write = false;
> +
> + /* Determine call distance and instruction position */
> if (build_body(&ctx, extra_pass)) {
> bpf_jit_binary_free(header);
> prog = orig_prog;
> goto out_off;
> }
>
> + ctx.epilogue_offset = ctx.idx;
> +
> + ctx.exentry_idx = exentry_idx;
> + ctx.idx = body_idx;
> + ctx.write = true;
> +
> + /* Determine jump offset and write result to memory */
> + if (build_body(&ctx, extra_pass) ||
> + WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) {
> + bpf_jit_binary_free(header);
> + prog = orig_prog;
> + goto out_off;
> + }
> +
> build_epilogue(&ctx);
> build_plt(&ctx);
>
> @@ -1567,6 +1609,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> goto out_off;
> }
>
> + /* Update prog size */
> + prog_size = sizeof(u32) * ctx.idx;
> /* And we're done. */
> if (bpf_jit_enable > 1)
> bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
> @@ -1574,8 +1618,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
> bpf_flush_icache(header, ctx.image + ctx.idx);
>
> if (!prog->is_func || extra_pass) {
> - if (extra_pass && ctx.idx != jit_data->ctx.idx) {
> - pr_err_once("multi-func JIT bug %d != %d\n",
> + if (extra_pass && ctx.idx > jit_data->ctx.idx) {
> + pr_err_once("multi-func JIT bug %d > %d\n",
> ctx.idx, jit_data->ctx.idx);
> bpf_jit_binary_free(header);
> prog->bpf_func = NULL;
> @@ -1976,6 +2020,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
> struct jit_ctx ctx = {
> .image = NULL,
> .idx = 0,
> + .write = true,
> };
>
> /* the first 8 arguments are passed by registers */
>