2024-03-14 15:00:25

by Puranjay Mohan

[permalink] [raw]
Subject: [PATCH bpf-next 0/2] bpf,arm64: Add support for BPF Arena

This series adds the support for PROBE_MEM32 and bpf_addr_space_cast
instructions to the ARM64 BPF JIT. These two instructions allow the
enablement of BPF Arena.

All arena related selftests are passing.

[root@ip-172-31-6-62 bpf]# ./test_progs -a "*arena*"
#3/1 arena_htab/arena_htab_llvm:OK
#3/2 arena_htab/arena_htab_asm:OK
#3 arena_htab:OK
#4/1 arena_list/arena_list_1:OK
#4/2 arena_list/arena_list_1000:OK
#4 arena_list:OK
#434/1 verifier_arena/basic_alloc1:OK
#434/2 verifier_arena/basic_alloc2:OK
#434/3 verifier_arena/basic_alloc3:OK
#434/4 verifier_arena/iter_maps1:OK
#434/5 verifier_arena/iter_maps2:OK
#434/6 verifier_arena/iter_maps3:OK
#434 verifier_arena:OK
Summary: 3/10 PASSED, 0 SKIPPED, 0 FAILED

The implementation of bpf_addr_space_cast can be optimised by using ROR
(immediate) and CSEL instructions. Currently, lib/insn.c doesn't have APIs
to generate these intructions. I will send subsequent patches to implement
the APIs and then use these instructions in the JIT.

Puranjay Mohan (2):
bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.
bpf: Add arm64 JIT support for bpf_addr_space_cast instruction.

arch/arm64/net/bpf_jit.h | 1 +
arch/arm64/net/bpf_jit_comp.c | 105 +++++++++++++++++--
tools/testing/selftests/bpf/DENYLIST.aarch64 | 2 -
3 files changed, 96 insertions(+), 12 deletions(-)

--
2.40.1



2024-03-14 15:00:37

by Puranjay Mohan

[permalink] [raw]
Subject: [PATCH bpf-next 1/2] bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.

Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
instructions. They are similar to PROBE_MEM instructions with the
following differences:
- PROBE_MEM32 supports store.
- PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
src/dst register
- PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
in the prologue). Due to bpf_arena constructions such R28 + reg +
off16 access is guaranteed to be within arena virtual range, so no
address check at run-time.
- PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
LDX faults the destination register is zeroed.

To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
tmp2 as the new src/dst register. This allows us to reuse most of the
code for normal [LDX | STX | ST].

Signed-off-by: Puranjay Mohan <[email protected]>
---
arch/arm64/net/bpf_jit_comp.c | 70 ++++++++++++++++++++++++++++++-----
1 file changed, 60 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index c5b461dda438..ce66c17b73a0 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -29,6 +29,7 @@
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
+#define PROBE_MEM32_BASE (MAX_BPF_JIT_REG + 5)

#define check_imm(bits, imm) do { \
if ((((imm) > 0) && ((imm) >> (bits))) || \
@@ -67,6 +68,8 @@ static const int bpf2a64[] = {
/* temporary register for blinding constants */
[BPF_REG_AX] = A64_R(9),
[FP_BOTTOM] = A64_R(27),
+ /* callee saved register for kern_vm_start address */
+ [PROBE_MEM32_BASE] = A64_R(28),
};

struct jit_ctx {
@@ -295,7 +298,7 @@ static bool is_lsi_offset(int offset, int scale)
#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8)

static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
- bool is_exception_cb)
+ bool is_exception_cb, u64 arena_vm_start)
{
const struct bpf_prog *prog = ctx->prog;
const bool is_main_prog = !bpf_is_subprog(prog);
@@ -306,6 +309,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 tcc = bpf2a64[TCALL_CNT];
const u8 fpb = bpf2a64[FP_BOTTOM];
+ const u8 pb = bpf2a64[PROBE_MEM32_BASE];
const int idx0 = ctx->idx;
int cur_offset;

@@ -411,6 +415,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf,

/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+
+ if (arena_vm_start)
+ emit_a64_mov_i64(pb, arena_vm_start, ctx);
+
return 0;
}

@@ -738,6 +746,7 @@ static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb)

#define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0)
#define BPF_FIXUP_REG_MASK GENMASK(31, 27)
+#define DONT_CLEAR 32

bool ex_handler_bpf(const struct exception_table_entry *ex,
struct pt_regs *regs)
@@ -745,7 +754,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);

- regs->regs[dst_reg] = 0;
+ if (dst_reg != DONT_CLEAR)
+ regs->regs[dst_reg] = 0;
regs->pc = (unsigned long)&ex->fixup - offset;
return true;
}
@@ -765,7 +775,8 @@ static int add_exception_handler(const struct bpf_insn *insn,
return 0;

if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
- BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
+ BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
+ BPF_MODE(insn->code) != BPF_PROBE_MEM32)
return 0;

if (!ctx->prog->aux->extable ||
@@ -810,6 +821,9 @@ static int add_exception_handler(const struct bpf_insn *insn,

ex->insn = ins_offset;

+ if (BPF_CLASS(insn->code) != BPF_LDX)
+ dst_reg = DONT_CLEAR;
+
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);

@@ -829,12 +843,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
bool extra_pass)
{
const u8 code = insn->code;
- const u8 dst = bpf2a64[insn->dst_reg];
- const u8 src = bpf2a64[insn->src_reg];
+ u8 dst = bpf2a64[insn->dst_reg];
+ u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 fp = bpf2a64[BPF_REG_FP];
const u8 fpb = bpf2a64[FP_BOTTOM];
+ const u8 pb = bpf2a64[PROBE_MEM32_BASE];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
@@ -1237,7 +1252,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
- if (ctx->fpb_offset > 0 && src == fp) {
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit(A64_ADD(1, tmp2, src, pb), ctx);
+ src = tmp2;
+ }
+ if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
src_adj = fpb;
off_adj = off + ctx->fpb_offset;
} else {
@@ -1322,7 +1345,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_ST | BPF_MEM | BPF_H:
case BPF_ST | BPF_MEM | BPF_B:
case BPF_ST | BPF_MEM | BPF_DW:
- if (ctx->fpb_offset > 0 && dst == fp) {
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit(A64_ADD(1, tmp2, dst, pb), ctx);
+ dst = tmp2;
+ }
+ if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
dst_adj = fpb;
off_adj = off + ctx->fpb_offset;
} else {
@@ -1365,6 +1396,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
}
break;
}
+
+ ret = add_exception_handler(insn, ctx, dst);
+ if (ret)
+ return ret;
break;

/* STX: *(size *)(dst + off) = src */
@@ -1372,7 +1407,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
case BPF_STX | BPF_MEM | BPF_H:
case BPF_STX | BPF_MEM | BPF_B:
case BPF_STX | BPF_MEM | BPF_DW:
- if (ctx->fpb_offset > 0 && dst == fp) {
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
+ emit(A64_ADD(1, tmp2, dst, pb), ctx);
+ dst = tmp2;
+ }
+ if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) {
dst_adj = fpb;
off_adj = off + ctx->fpb_offset;
} else {
@@ -1413,6 +1456,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
}
break;
}
+
+ ret = add_exception_handler(insn, ctx, dst);
+ if (ret)
+ return ret;
break;

case BPF_STX | BPF_ATOMIC | BPF_W:
@@ -1594,6 +1641,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
+ u64 arena_vm_start;
u8 *image_ptr;
u8 *ro_image_ptr;

@@ -1611,6 +1659,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = tmp;
}

+ arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
jit_data = prog->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
@@ -1648,7 +1697,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
* BPF line info needs ctx->offset[i] to be the offset of
* instruction[i] in jited image, so build prologue first.
*/
- if (build_prologue(&ctx, was_classic, prog->aux->exception_cb)) {
+ if (build_prologue(&ctx, was_classic, prog->aux->exception_cb,
+ arena_vm_start)) {
prog = orig_prog;
goto out_off;
}
@@ -1696,7 +1746,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.idx = 0;
ctx.exentry_idx = 0;

- build_prologue(&ctx, was_classic, prog->aux->exception_cb);
+ build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start);

if (build_body(&ctx, extra_pass)) {
prog = orig_prog;
--
2.40.1


2024-03-14 15:00:46

by Puranjay Mohan

[permalink] [raw]
Subject: [PATCH bpf-next 2/2] bpf: Add arm64 JIT support for bpf_addr_space_cast instruction.

LLVM generates bpf_addr_space_cast instruction while translating
pointers between native (zero) address space and
__attribute__((address_space(N))). The addr_space=1 is reserved as
bpf_arena address space.

rY = addr_space_cast(rX, 0, 1) is processed by the verifier and
converted to normal 32-bit move: wX = wY

rY = addr_space_cast(rX, 1, 0) has to be converted by JIT:

Here I explain using symbolic language what the JIT is supposed to do:
We have:
src = [src_upper32][src_lower32] // 64 bit src kernel pointer
uvm = [uvm_upper32][uvm_lower32] // 64 bit user_vm_start

The JIT has to make the dst reg like following
dst = [uvm_upper32][src_lower32] // if src_lower32 != 0
dst = [00000000000][00000000000] // if src_lower32 == 0

Signed-off-by: Puranjay Mohan <[email protected]>
---
arch/arm64/net/bpf_jit.h | 1 +
arch/arm64/net/bpf_jit_comp.c | 35 ++++++++++++++++++++
tools/testing/selftests/bpf/DENYLIST.aarch64 | 2 --
3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 23b1b34db088..813c3c428fde 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -238,6 +238,7 @@
#define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV)
#define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV)
#define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
+#define A64_RORV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, RORV)

/* Data-processing (3 source) */
/* Rd = Ra + Rn * Rm */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index ce66c17b73a0..e12e0df3ad1a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -82,6 +82,7 @@ struct jit_ctx {
__le32 *ro_image;
u32 stack_size;
int fpb_offset;
+ u64 user_vm_start;
};

struct bpf_plt {
@@ -868,6 +869,34 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
/* dst = src */
case BPF_ALU | BPF_MOV | BPF_X:
case BPF_ALU64 | BPF_MOV | BPF_X:
+ if (insn->off == BPF_ADDR_SPACE_CAST &&
+ insn->imm == 1U << 16) {
+ /* Zero out tmp2 */
+ emit(A64_EOR(1, tmp2, tmp2, tmp2), ctx);
+
+ /* Move lo_32_bits(src) to dst */
+ if (dst != src)
+ emit(A64_MOV(0, dst, src), ctx);
+
+ /* Logical shift left by 32 bits */
+ emit(A64_LSL(1, dst, dst, 32), ctx);
+
+ /* Get upper 32 bits of user_vm_start in tmp */
+ emit_a64_mov_i(0, tmp, ctx->user_vm_start >> 32, ctx);
+
+ /* dst |= up_32_bits(user_vm_start) */
+ emit(A64_ORR(1, dst, dst, tmp), ctx);
+
+ /* Rotate by 32 bits to get final result */
+ emit_a64_mov_i(0, tmp, 32, ctx);
+ emit(A64_RORV(1, dst, dst, tmp), ctx);
+
+ /* If lo_32_bits(dst) == 0, set dst = tmp2(0) */
+ emit(A64_CBZ(0, dst, 2), ctx);
+ emit(A64_MOV(1, tmp2, dst), ctx);
+ emit(A64_MOV(1, dst, tmp2), ctx);
+ break;
+ }
switch (insn->off) {
case 0:
emit(A64_MOV(is64, dst, src), ctx);
@@ -1690,6 +1719,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
}

ctx.fpb_offset = find_fpb_offset(prog);
+ ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);

/*
* 1. Initial fake pass to compute ctx->idx and ctx->offset.
@@ -2514,6 +2544,11 @@ bool bpf_jit_supports_exceptions(void)
return true;
}

+bool bpf_jit_supports_arena(void)
+{
+ return true;
+}
+
void bpf_jit_free(struct bpf_prog *prog)
{
if (prog->jited) {
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index d8ade15e2789..0445ac38bc07 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -10,5 +10,3 @@ fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_mu
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
-verifier_arena # JIT does not support arena
-arena_htab # JIT does not support arena
--
2.40.1


2024-03-14 17:08:37

by Kumar Kartikeya Dwivedi

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/2] bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.

On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <[email protected]> wrote:
>
> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> instructions. They are similar to PROBE_MEM instructions with the
> following differences:
> - PROBE_MEM32 supports store.
> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
> src/dst register
> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
> in the prologue). Due to bpf_arena constructions such R28 + reg +
> off16 access is guaranteed to be within arena virtual range, so no
> address check at run-time.
> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
> LDX faults the destination register is zeroed.
>
> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
> tmp2 as the new src/dst register. This allows us to reuse most of the
> code for normal [LDX | STX | ST].
>
> Signed-off-by: Puranjay Mohan <[email protected]>
> ---

Hi Alexei,
Puranjay and I were discussing this stuff off list and noticed that
atomic instructions are not handled.
It turns out that will cause a kernel crash right now because the
32-bit offset into arena will be dereferenced directly.

e.g. something like this:

@@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
test_val++;
n->value = i;
arena_sum += i;
+ __sync_fetch_and_add(&arena_sum, 0);
list_add_head(&n->node, list_head);
}
#else

I will try to prepare a fix for the x86 JIT. Puranjay will do the same
for his set.

2024-03-14 17:14:11

by Puranjay Mohan

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/2] bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.

Kumar Kartikeya Dwivedi <[email protected]> writes:

> On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <[email protected]> wrote:
>>
>> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
>> instructions. They are similar to PROBE_MEM instructions with the
>> following differences:
>> - PROBE_MEM32 supports store.
>> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
>> src/dst register
>> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
>> in the prologue). Due to bpf_arena constructions such R28 + reg +
>> off16 access is guaranteed to be within arena virtual range, so no
>> address check at run-time.
>> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
>> LDX faults the destination register is zeroed.
>>
>> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
>> tmp2 as the new src/dst register. This allows us to reuse most of the
>> code for normal [LDX | STX | ST].
>>
>> Signed-off-by: Puranjay Mohan <[email protected]>
>> ---
>
> Hi Alexei,
> Puranjay and I were discussing this stuff off list and noticed that
> atomic instructions are not handled.
> It turns out that will cause a kernel crash right now because the
> 32-bit offset into arena will be dereferenced directly.
>
> e.g. something like this:
>
> @@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
> test_val++;
> n->value = i;
> arena_sum += i;
> + __sync_fetch_and_add(&arena_sum, 0);
> list_add_head(&n->node, list_head);
> }
> #else
>
> I will try to prepare a fix for the x86 JIT. Puranjay will do the same
> for his set.

Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well:

bpf_testmod: loading out-of-tree module taints kernel.
bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
Mem abort info:
ESR = 0x0000000096000006
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
FSC = 0x06: level 2 translation fault
Data abort info:
ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
CM = 0, WnR = 0, TnD = 0, TagAccess = 0
GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000
[0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000
Internal error: Oops: 0000000096000006 [#1] SMP
Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs
CPU: 8 PID: 5631 Comm: test_progs Tainted: G OE 6.8.0+ #2
Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018
pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8
sp : ffff80008b84bc30
x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38
x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000
x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838
x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8
x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c
x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf
x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001
x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010
x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0
Call trace:
bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
bpf_prog_test_run_syscall+0x100/0x340
__sys_bpf+0x8e8/0xa20
__arm64_sys_bpf+0x2c/0x48
invoke_syscall+0x50/0x128
el0_svc_common.constprop.0+0x48/0xf8
do_el0_svc+0x28/0x40
el0_svc+0x58/0x190
el0t_64_sync_handler+0x13c/0x158
el0t_64_sync+0x1a8/0x1b0
Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f)
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Oops: Fatal exception
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x0,00000120,7002014a,21407a0b
Memory Limit: none
Rebooting in 5 seconds..

I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified
as well to add BPF_PROBE_MEM32 to atomic instructions.

Thanks,
Puranjay

2024-03-14 17:22:28

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/2] bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.

On Thu, Mar 14, 2024 at 10:13 AM Puranjay Mohan <[email protected]> wrote:
>
> Kumar Kartikeya Dwivedi <[email protected]> writes:
>
> > On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <[email protected]> wrote:
> >>
> >> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> >> instructions. They are similar to PROBE_MEM instructions with the
> >> following differences:
> >> - PROBE_MEM32 supports store.
> >> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
> >> src/dst register
> >> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
> >> in the prologue). Due to bpf_arena constructions such R28 + reg +
> >> off16 access is guaranteed to be within arena virtual range, so no
> >> address check at run-time.
> >> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
> >> LDX faults the destination register is zeroed.
> >>
> >> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
> >> tmp2 as the new src/dst register. This allows us to reuse most of the
> >> code for normal [LDX | STX | ST].
> >>
> >> Signed-off-by: Puranjay Mohan <[email protected]>
> >> ---
> >
> > Hi Alexei,
> > Puranjay and I were discussing this stuff off list and noticed that
> > atomic instructions are not handled.
> > It turns out that will cause a kernel crash right now because the
> > 32-bit offset into arena will be dereferenced directly.
> >
> > e.g. something like this:
> >
> > @@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
> > test_val++;
> > n->value = i;
> > arena_sum += i;
> > + __sync_fetch_and_add(&arena_sum, 0);
> > list_add_head(&n->node, list_head);
> > }
> > #else
> >
> > I will try to prepare a fix for the x86 JIT. Puranjay will do the same
> > for his set.
>
> Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well:
>
> bpf_testmod: loading out-of-tree module taints kernel.
> bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel
> Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
> Mem abort info:
> ESR = 0x0000000096000006
> EC = 0x25: DABT (current EL), IL = 32 bits
> SET = 0, FnV = 0
> EA = 0, S1PTW = 0
> FSC = 0x06: level 2 translation fault
> Data abort info:
> ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
> CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000
> [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000
> Internal error: Oops: 0000000096000006 [#1] SMP
> Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs
> CPU: 8 PID: 5631 Comm: test_progs Tainted: G OE 6.8.0+ #2
> Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018
> pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
> lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8
> sp : ffff80008b84bc30
> x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38
> x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000
> x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838
> x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000
> x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8
> x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c
> x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf
> x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001
> x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010
> x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0
> Call trace:
> bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
> bpf_prog_test_run_syscall+0x100/0x340
> __sys_bpf+0x8e8/0xa20
> __arm64_sys_bpf+0x2c/0x48
> invoke_syscall+0x50/0x128
> el0_svc_common.constprop.0+0x48/0xf8
> do_el0_svc+0x28/0x40
> el0_svc+0x58/0x190
> el0t_64_sync_handler+0x13c/0x158
> el0t_64_sync+0x1a8/0x1b0
> Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f)
> ---[ end trace 0000000000000000 ]---
> Kernel panic - not syncing: Oops: Fatal exception
> SMP: stopping secondary CPUs
> Kernel Offset: disabled
> CPU features: 0x0,00000120,7002014a,21407a0b
> Memory Limit: none
> Rebooting in 5 seconds..
>
> I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified
> as well to add BPF_PROBE_MEM32 to atomic instructions.

The JIT and the verifier changes for atomics might be too big.
Let's disable atomics in arena in the verifier for now.
Pls send a patch.

2024-03-15 10:33:05

by Puranjay Mohan

[permalink] [raw]
Subject: Re: [PATCH bpf-next 1/2] bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.

On Thu, Mar 14, 2024 at 6:21 PM Alexei Starovoitov
<[email protected]> wrote:
>
> On Thu, Mar 14, 2024 at 10:13 AM Puranjay Mohan <puranjay12@gmailcom> wrote:
> >
> > Kumar Kartikeya Dwivedi <[email protected]> writes:
> >
> > > On Thu, 14 Mar 2024 at 16:00, Puranjay Mohan <[email protected]> wrote:
> > >>
> > >> Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
> > >> instructions. They are similar to PROBE_MEM instructions with the
> > >> following differences:
> > >> - PROBE_MEM32 supports store.
> > >> - PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
> > >> src/dst register
> > >> - PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in R28
> > >> in the prologue). Due to bpf_arena constructions such R28 + reg +
> > >> off16 access is guaranteed to be within arena virtual range, so no
> > >> address check at run-time.
> > >> - PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
> > >> LDX faults the destination register is zeroed.
> > >>
> > >> To support these on arm64, we do tmp2 = R28 + src/dst reg and then use
> > >> tmp2 as the new src/dst register. This allows us to reuse most of the
> > >> code for normal [LDX | STX | ST].
> > >>
> > >> Signed-off-by: Puranjay Mohan <[email protected]>
> > >> ---
> > >
> > > Hi Alexei,
> > > Puranjay and I were discussing this stuff off list and noticed that
> > > atomic instructions are not handled.
> > > It turns out that will cause a kernel crash right now because the
> > > 32-bit offset into arena will be dereferenced directly.
> > >
> > > e.g. something like this:
> > >
> > > @@ -55,6 +56,7 @@ int arena_list_add(void *ctx)
> > > test_val++;
> > > n->value = i;
> > > arena_sum += i;
> > > + __sync_fetch_and_add(&arena_sum, 0);
> > > list_add_head(&n->node, list_head);
> > > }
> > > #else
> > >
> > > I will try to prepare a fix for the x86 JIT. Puranjay will do the same
> > > for his set.
> >
> > Yes, testing the change mentioned by Kumar on ARM64 causes a crashes as well:
> >
> > bpf_testmod: loading out-of-tree module taints kernel.
> > bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel
> > Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
> > Mem abort info:
> > ESR = 0x0000000096000006
> > EC = 0x25: DABT (current EL), IL = 32 bits
> > SET = 0, FnV = 0
> > EA = 0, S1PTW = 0
> > FSC = 0x06: level 2 translation fault
> > Data abort info:
> > ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
> > CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> > GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> > user pgtable: 4k pages, 48-bit VAs, pgdp=00000004043cc000
> > [0000000000000010] pgd=0800000410d8f003, p4d=0800000410d8f003, pud=0800000405972003, pmd=0000000000000000
> > Internal error: Oops: 0000000096000006 [#1] SMP
> > Modules linked in: bpf_testmod(OE) nls_ascii nls_cp437 sunrpc vfat fat aes_ce_blk aes_ce_cipher ghash_ce sha1_ce button sch_fq_codel dm_mod dax configfs dmi_sysfs sha2_ce sha256_arm64 efivarfs
> > CPU: 8 PID: 5631 Comm: test_progs Tainted: G OE 6.8.0+ #2
> > Hardware name: Amazon EC2 c6g.16xlarge/, BIOS 1.0 11/1/2018
> > pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> > pc : bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
> > lr : bpf_prog_8771c336cb6a18eb_arena_list_add+0x144/0x2b8
> > sp : ffff80008b84bc30
> > x29: ffff80008b84bca0 x28: ffff8000a5008000 x27: ffff80008b84bc38
> > x26: 0000000000000000 x25: ffff80008b84bc60 x24: 0000000000000000
> > x23: 0000000000000000 x22: 0000000000000058 x21: 0000000000000838
> > x20: 0000000000000000 x19: 0000000100001fe0 x18: 0000000000000000
> > x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffcc66d2c8
> > x14: 0000000000000000 x13: 0000000000000000 x12: 000000000004058c
> > x11: ffff8000a5008010 x10: 00000000ffffffff x9 : 00000000000002cf
> > x8 : ffff800082ff4ab8 x7 : 0000000100001000 x6 : 0000000000000001
> > x5 : 0000000010e5e3fd x4 : 000000003619b978 x3 : 0000000000000010
> > x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000001fe0
> > Call trace:
> > bpf_prog_8771c336cb6a18eb_arena_list_add+0x204/0x2b8
> > bpf_prog_test_run_syscall+0x100/0x340
> > __sys_bpf+0x8e8/0xa20
> > __arm64_sys_bpf+0x2c/0x48
> > invoke_syscall+0x50/0x128
> > el0_svc_common.constprop.0+0x48/0xf8
> > do_el0_svc+0x28/0x40
> > el0_svc+0x58/0x190
> > el0t_64_sync_handler+0x13c/0x158
> > el0t_64_sync+0x1a8/0x1b0
> > Code: 8b010042 8b1c006b f9000162 d2800001 (f821307f)
> > ---[ end trace 0000000000000000 ]---
> > Kernel panic - not syncing: Oops: Fatal exception
> > SMP: stopping secondary CPUs
> > Kernel Offset: disabled
> > CPU features: 0x0,00000120,7002014a,21407a0b
> > Memory Limit: none
> > Rebooting in 5 seconds..
> >
> > I will send v2 with the arm64 JIT fix, but I guess verifier has to be modified
> > as well to add BPF_PROBE_MEM32 to atomic instructions.
>
> The JIT and the verifier changes for atomics might be too big.
> Let's disable atomics in arena in the verifier for now.
> Pls send a patch.

As atomics are disabled in the Arena now, this series will not require
any changes.
Looking forward to the reviews.

Thanks,
Puranjay