BPF trampoline is the critical infrastructure of the bpf
subsystem, acting as a mediator between kernel functions
and BPF programs. Numerous important features, such as
using ebpf program for zero overhead kernel introspection,
rely on this key component. We can't wait to support bpf
trampoline on RV64. Since RV64 does not support ftrace
direct call yet, the current RV64 bpf trampoline is only
used in bpf context.
As most of riscv cpu support unaligned memory accesses,
we temporarily use patch [1] to facilitate testing. The
test results are as follow, and test_verifier with no
new failure ceses.
- fexit_bpf2bpf:OK
- dummy_st_ops:OK
- xdp_bpf2bpf:OK
[1] https://lore.kernel.org/linux-riscv/[email protected]/
v1:
- Remove the logic of bpf_arch_text_poke supported for
kernel functions. (Kuohai and Björn)
- Extend patch_text for multiple instructions. (Björn)
- Fix OOB issue when image too big. (Björn)
RFC:
https://lore.kernel.org/bpf/[email protected]/
Pu Lehui (4):
riscv: Extend patch_text for multiple instructions
riscv, bpf: Factor out emit_call for kernel and bpf context
riscv, bpf: Add bpf_arch_text_poke support for RV64
riscv, bpf: Add bpf trampoline support for RV64
arch/riscv/include/asm/patch.h | 2 +-
arch/riscv/kernel/patch.c | 19 +-
arch/riscv/kernel/probes/kprobes.c | 15 +-
arch/riscv/net/bpf_jit.h | 5 +
arch/riscv/net/bpf_jit_comp64.c | 437 +++++++++++++++++++++++++++--
5 files changed, 444 insertions(+), 34 deletions(-)
--
2.25.1
From: Pu Lehui <[email protected]>
BPF trampoline is the critical infrastructure of the bpf
subsystem, acting as a mediator between kernel functions
and BPF programs. Numerous important features, such as
using ebpf program for zero overhead kernel introspection,
rely on this key component. We can't wait to support bpf
trampoline on RV64. The related tests have passed, as well
as the test_verifier with no new failure ceses.
Signed-off-by: Pu Lehui <[email protected]>
---
arch/riscv/net/bpf_jit_comp64.c | 319 ++++++++++++++++++++++++++++++++
1 file changed, 319 insertions(+)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index b6b9bbcc977a..ef79c54a8eb2 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -695,6 +695,325 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
return ret;
}
+static void store_args(int nregs, int args_off, struct rv_jit_context *ctx)
+{
+ int i;
+
+ for (i = 0; i < nregs; i++) {
+ emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
+ args_off -= 8;
+ }
+}
+
+static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx)
+{
+ int i;
+
+ for (i = 0; i < nregs; i++) {
+ emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
+ args_off -= 8;
+ }
+}
+
+static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
+ int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
+{
+ int ret, branch_off;
+ struct bpf_prog *p = l->link.prog;
+ int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
+
+ if (l->cookie) {
+ emit_imm(RV_REG_T1, l->cookie, ctx);
+ emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_T1, ctx);
+ } else {
+ emit_sd(RV_REG_FP, -run_ctx_off + cookie_off, RV_REG_ZERO, ctx);
+ }
+
+ /* arg1: prog */
+ emit_imm(RV_REG_A0, (const s64)p, ctx);
+ /* arg2: &run_ctx */
+ emit_addi(RV_REG_A1, RV_REG_FP, -run_ctx_off, ctx);
+ ret = emit_call((const u64)bpf_trampoline_enter(p), true, ctx);
+ if (ret)
+ return ret;
+
+ /* if (__bpf_prog_enter(prog) == 0)
+ * goto skip_exec_of_prog;
+ */
+ branch_off = ctx->ninsns;
+ /* nop reserved for conditional jump */
+ emit(rv_nop(), ctx);
+
+ /* store prog start time */
+ emit_mv(RV_REG_S1, RV_REG_A0, ctx);
+
+ /* arg1: &args_off */
+ emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
+ if (!p->jited)
+ /* arg2: progs[i]->insnsi for interpreter */
+ emit_imm(RV_REG_A1, (const s64)p->insnsi, ctx);
+ ret = emit_call((const u64)p->bpf_func, true, ctx);
+ if (ret)
+ return ret;
+
+ if (save_ret)
+ emit_sd(RV_REG_FP, -retval_off, regmap[BPF_REG_0], ctx);
+
+ /* update branch with beqz */
+ if (ctx->insns) {
+ int offset = ninsns_rvoff(ctx->ninsns - branch_off);
+ u32 insn = rv_beq(RV_REG_A0, RV_REG_ZERO, offset >> 1);
+ *(u32 *)(ctx->insns + branch_off) = insn;
+ }
+
+ /* arg1: prog */
+ emit_imm(RV_REG_A0, (const s64)p, ctx);
+ /* arg2: prog start time */
+ emit_mv(RV_REG_A1, RV_REG_S1, ctx);
+ /* arg3: &run_ctx */
+ emit_addi(RV_REG_A2, RV_REG_FP, -run_ctx_off, ctx);
+ ret = emit_call((const u64)bpf_trampoline_exit(p), true, ctx);
+
+ return ret;
+}
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
+ const struct btf_func_model *m,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr, u32 flags,
+ struct rv_jit_context *ctx)
+{
+ int i, ret, offset;
+ int *branches_off = NULL;
+ int stack_size = 0, nregs = m->nr_args;
+ int retaddr_off, fp_off, retval_off, args_off;
+ int nregs_off, ip_off, run_ctx_off, sreg_off;
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ void *orig_call = func_addr;
+ bool save_ret;
+ u32 insn;
+
+ /* Generated trampoline stack layout:
+ *
+ * FP - 8 [ RA of parent func ] return address of parent
+ * function
+ * FP - retaddr_off [ RA of traced func ] return address of traced
+ * function
+ * FP - fp_off [ FP of parent func ]
+ *
+ * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or
+ * BPF_TRAMP_F_RET_FENTRY_RET
+ * [ argN ]
+ * [ ... ]
+ * FP - args_off [ arg1 ]
+ *
+ * FP - nregs_off [ regs count ]
+ *
+ * FP - ip_off [ traced func ] BPF_TRAMP_F_IP_ARG
+ *
+ * FP - run_ctx_off [ bpf_tramp_run_ctx ]
+ *
+ * FP - sreg_off [ callee saved reg ]
+ *
+ * [ pads ] pads for 16 bytes alignment
+ */
+
+ if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
+ return -ENOTSUPP;
+
+ /* extra regiters for struct arguments */
+ for (i = 0; i < m->nr_args; i++)
+ if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
+ nregs += round_up(m->arg_size[i], 8) / 8 - 1;
+
+ /* 8 arguments passed by registers */
+ if (nregs > 8)
+ return -ENOTSUPP;
+
+ /* room for parent function return address */
+ stack_size += 8;
+
+ stack_size += 8;
+ retaddr_off = stack_size;
+
+ stack_size += 8;
+ fp_off = stack_size;
+
+ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+ if (save_ret) {
+ stack_size += 8;
+ retval_off = stack_size;
+ }
+
+ stack_size += nregs * 8;
+ args_off = stack_size;
+
+ stack_size += 8;
+ nregs_off = stack_size;
+
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ stack_size += 8;
+ ip_off = stack_size;
+ }
+
+ stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
+ run_ctx_off = stack_size;
+
+ stack_size += 8;
+ sreg_off = stack_size;
+
+ stack_size = round_up(stack_size, 16);
+
+ emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx);
+
+ emit_sd(RV_REG_SP, stack_size - retaddr_off, RV_REG_RA, ctx);
+ emit_sd(RV_REG_SP, stack_size - fp_off, RV_REG_FP, ctx);
+
+ emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx);
+
+ /* callee saved register S1 to pass start time */
+ emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx);
+
+ /* store ip address of the traced function */
+ if (flags & BPF_TRAMP_F_IP_ARG) {
+ emit_imm(RV_REG_T1, (const s64)func_addr, ctx);
+ emit_sd(RV_REG_FP, -ip_off, RV_REG_T1, ctx);
+ }
+
+ emit_li(RV_REG_T1, nregs, ctx);
+ emit_sd(RV_REG_FP, -nregs_off, RV_REG_T1, ctx);
+
+ store_args(nregs, args_off, ctx);
+
+ /* skip to actual body of traced function */
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ orig_call += 16;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ emit_imm(RV_REG_A0, (const s64)im, ctx);
+ ret = emit_call((const u64)__bpf_tramp_enter, true, ctx);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < fentry->nr_links; i++) {
+ ret = invoke_bpf_prog(fentry->links[i], args_off, retval_off, run_ctx_off,
+ flags & BPF_TRAMP_F_RET_FENTRY_RET, ctx);
+ if (ret)
+ return ret;
+ }
+
+ if (fmod_ret->nr_links) {
+ branches_off = kcalloc(fmod_ret->nr_links, sizeof(int), GFP_KERNEL);
+ if (!branches_off)
+ return -ENOMEM;
+
+ /* cleanup to avoid garbage return value confusion */
+ emit_sd(RV_REG_FP, -retval_off, RV_REG_ZERO, ctx);
+ for (i = 0; i < fmod_ret->nr_links; i++) {
+ ret = invoke_bpf_prog(fmod_ret->links[i], args_off, retval_off,
+ run_ctx_off, true, ctx);
+ if (ret)
+ goto out;
+ emit_ld(RV_REG_T1, -retval_off, RV_REG_FP, ctx);
+ branches_off[i] = ctx->ninsns;
+ /* nop reserved for conditional jump */
+ emit(rv_nop(), ctx);
+ }
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ restore_args(nregs, args_off, ctx);
+ ret = emit_call((const u64)orig_call, true, ctx);
+ if (ret)
+ goto out;
+ emit_sd(RV_REG_FP, -retval_off, RV_REG_A0, ctx);
+ im->ip_after_call = ctx->insns + ctx->ninsns;
+ /* 2 nops reserved for auipc+jalr pair */
+ emit(rv_nop(), ctx);
+ emit(rv_nop(), ctx);
+ }
+
+ /* update branches saved in invoke_bpf_mod_ret with bnez */
+ for (i = 0; ctx->insns && i < fmod_ret->nr_links; i++) {
+ offset = ninsns_rvoff(ctx->ninsns - branches_off[i]);
+ insn = rv_bne(RV_REG_T1, RV_REG_ZERO, offset >> 1);
+ *(u32 *)(ctx->insns + branches_off[i]) = insn;
+ }
+
+ for (i = 0; i < fexit->nr_links; i++) {
+ ret = invoke_bpf_prog(fexit->links[i], args_off, retval_off,
+ run_ctx_off, false, ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ im->ip_epilogue = ctx->insns + ctx->ninsns;
+ emit_imm(RV_REG_A0, (const s64)im, ctx);
+ ret = emit_call((const u64)__bpf_tramp_exit, true, ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_args(nregs, args_off, ctx);
+
+ if (save_ret)
+ emit_ld(RV_REG_A0, -retval_off, RV_REG_FP, ctx);
+
+ emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx);
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* return address of parent function */
+ emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx);
+ else
+ /* return address of traced function */
+ emit_ld(RV_REG_RA, stack_size - retaddr_off, RV_REG_SP, ctx);
+
+ emit_ld(RV_REG_FP, stack_size - fp_off, RV_REG_SP, ctx);
+ emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx);
+
+ emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx);
+
+ ret = ctx->ninsns;
+out:
+ if (branches_off)
+ kfree(branches_off);
+
+ return ret;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+ void *image_end, const struct btf_func_model *m,
+ u32 flags, struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ int ret;
+ struct rv_jit_context ctx;
+
+ ctx.ninsns = 0;
+ ctx.insns = NULL;
+ ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+ if (ret < 0)
+ return ret;
+
+ if (ninsns_rvoff(ret) > (long)image_end - (long)image)
+ return -EFBIG;
+
+ ctx.ninsns = 0;
+ ctx.insns = image;
+ ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+ if (ret < 0)
+ return ret;
+
+ bpf_flush_icache(ctx.insns, ctx.insns + ctx.ninsns);
+
+ return ninsns_rvoff(ret);
+}
+
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
{
--
2.25.1
From: Pu Lehui <[email protected]>
The current emit_call function is not suitable for kernel
function call as it store return value to bpf R0 register.
We can separate it out for common use. Meanwhile, simplify
judgment logic, that is, fixed function address can use jal
or auipc+jalr, while the unfixed can use only auipc+jalr.
Signed-off-by: Pu Lehui <[email protected]>
---
arch/riscv/net/bpf_jit_comp64.c | 30 +++++++++++++-----------------
1 file changed, 13 insertions(+), 17 deletions(-)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index f2417ac54edd..69ebab81d935 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -428,12 +428,12 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
*rd = RV_REG_T2;
}
-static int emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr,
+static int emit_jump_and_link(u8 rd, s64 rvoff, bool fixed_addr,
struct rv_jit_context *ctx)
{
s64 upper, lower;
- if (rvoff && is_21b_int(rvoff) && !force_jalr) {
+ if (rvoff && fixed_addr && is_21b_int(rvoff)) {
emit(rv_jal(rd, rvoff >> 1), ctx);
return 0;
} else if (in_auipc_jalr_range(rvoff)) {
@@ -454,24 +454,17 @@ static bool is_signed_bpf_cond(u8 cond)
cond == BPF_JSGE || cond == BPF_JSLE;
}
-static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
+static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
{
s64 off = 0;
u64 ip;
- u8 rd;
- int ret;
if (addr && ctx->insns) {
ip = (u64)(long)(ctx->insns + ctx->ninsns);
off = addr - ip;
}
- ret = emit_jump_and_link(RV_REG_RA, off, !fixed, ctx);
- if (ret)
- return ret;
- rd = bpf_to_rv_reg(BPF_REG_0, ctx);
- emit_mv(rd, RV_REG_A0, ctx);
- return 0;
+ return emit_jump_and_link(RV_REG_RA, off, fixed_addr, ctx);
}
static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
@@ -913,7 +906,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* JUMP off */
case BPF_JMP | BPF_JA:
rvoff = rv_offset(i, off, ctx);
- ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
+ ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
if (ret)
return ret;
break;
@@ -1032,17 +1025,20 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
/* function call */
case BPF_JMP | BPF_CALL:
{
- bool fixed;
+ bool fixed_addr;
u64 addr;
mark_call(ctx);
- ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
- &fixed);
+ ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
+ &addr, &fixed_addr);
if (ret < 0)
return ret;
- ret = emit_call(fixed, addr, ctx);
+
+ ret = emit_call(addr, fixed_addr, ctx);
if (ret)
return ret;
+
+ emit_mv(bpf_to_rv_reg(BPF_REG_0, ctx), RV_REG_A0, ctx);
break;
}
/* tail call */
@@ -1057,7 +1053,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
break;
rvoff = epilogue_offset(ctx);
- ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
+ ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
if (ret)
return ret;
break;
--
2.25.1
From: Pu Lehui <[email protected]>
Extend patch_text for multiple instructions. This
is the preparaiton for multiple instructions text
patching in riscv bpf trampoline, and may be useful
for other scenario.
Signed-off-by: Pu Lehui <[email protected]>
---
arch/riscv/include/asm/patch.h | 2 +-
arch/riscv/kernel/patch.c | 19 ++++++++++++-------
arch/riscv/kernel/probes/kprobes.c | 15 ++++++++-------
3 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
index 9a7d7346001e..f433121774c0 100644
--- a/arch/riscv/include/asm/patch.h
+++ b/arch/riscv/include/asm/patch.h
@@ -7,6 +7,6 @@
#define _ASM_RISCV_PATCH_H
int patch_text_nosync(void *addr, const void *insns, size_t len);
-int patch_text(void *addr, u32 insn);
+int patch_text(void *addr, u32 *insns, int ninsns);
#endif /* _ASM_RISCV_PATCH_H */
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 765004b60513..8086d1a281cd 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -15,7 +15,8 @@
struct patch_insn {
void *addr;
- u32 insn;
+ u32 *insns;
+ int ninsns;
atomic_t cpu_count;
};
@@ -102,12 +103,15 @@ NOKPROBE_SYMBOL(patch_text_nosync);
static int patch_text_cb(void *data)
{
struct patch_insn *patch = data;
- int ret = 0;
+ unsigned long len;
+ int i, ret = 0;
if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
- ret =
- patch_text_nosync(patch->addr, &patch->insn,
- GET_INSN_LENGTH(patch->insn));
+ for (i = 0; ret == 0 && i < patch->ninsns; i++) {
+ len = GET_INSN_LENGTH(patch->insns[i]);
+ ret = patch_text_nosync(patch->addr + i * len,
+ &patch->insns[i], len);
+ }
atomic_inc(&patch->cpu_count);
} else {
while (atomic_read(&patch->cpu_count) <= num_online_cpus())
@@ -119,11 +123,12 @@ static int patch_text_cb(void *data)
}
NOKPROBE_SYMBOL(patch_text_cb);
-int patch_text(void *addr, u32 insn)
+int patch_text(void *addr, u32 *insns, int ninsns)
{
struct patch_insn patch = {
.addr = addr,
- .insn = insn,
+ .insns = insns,
+ .ninsns = ninsns,
.cpu_count = ATOMIC_INIT(0),
};
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 41c7481afde3..ef6d6e702485 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -23,13 +23,14 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
+ u32 insn = __BUG_INSN_32;
unsigned long offset = GET_INSN_LENGTH(p->opcode);
p->ainsn.api.restore = (unsigned long)p->addr + offset;
- patch_text(p->ainsn.api.insn, p->opcode);
+ patch_text(p->ainsn.api.insn, &p->opcode, 1);
patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset),
- __BUG_INSN_32);
+ &insn, 1);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
@@ -114,16 +115,16 @@ void *alloc_insn_page(void)
/* install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- if ((p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32)
- patch_text(p->addr, __BUG_INSN_32);
- else
- patch_text(p->addr, __BUG_INSN_16);
+ u32 insn = (p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32 ?
+ __BUG_INSN_32 : __BUG_INSN_16;
+
+ patch_text(p->addr, &insn, 1);
}
/* remove breakpoint from text */
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, p->opcode);
+ patch_text(p->addr, &p->opcode, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
--
2.25.1
From: Pu Lehui <[email protected]>
Implement bpf_arch_text_poke for RV64. For call scenario,
to make bpf trampoline compatible with the kernel and bpf
context, we follow the framework of RV64 ftrace to reserve
4 nops for bpf programs as function entry, and use auipc+jalr
instructions for function call. However, since auipc+jalr
call instruction is non-atomic operation, we need to use
stop-machine to make sure instructions patching in atomic
context. Also, we use auipc+jalr pair and need to patch
in stop-machine context for jump scenario.
Signed-off-by: Pu Lehui <[email protected]>
---
arch/riscv/net/bpf_jit.h | 5 ++
arch/riscv/net/bpf_jit_comp64.c | 88 ++++++++++++++++++++++++++++++++-
2 files changed, 91 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index d926e0f7ef57..bf9802a63061 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -573,6 +573,11 @@ static inline u32 rv_fence(u8 pred, u8 succ)
return rv_i_insn(imm11_0, 0, 0, 0, 0xf);
}
+static inline u32 rv_nop(void)
+{
+ return rv_i_insn(0, 0, 0, 0, 0x13);
+}
+
/* RVC instrutions. */
static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 69ebab81d935..b6b9bbcc977a 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -8,6 +8,8 @@
#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/memory.h>
+#include <linux/stop_machine.h>
#include "bpf_jit.h"
#define RV_REG_TCC RV_REG_A6
@@ -238,7 +240,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
if (!is_tail_call)
emit_mv(RV_REG_A0, RV_REG_A5, ctx);
emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
- is_tail_call ? 4 : 0, /* skip TCC init */
+ is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */
ctx);
}
@@ -615,6 +617,84 @@ static int add_exception_handler(const struct bpf_insn *insn,
return 0;
}
+static int gen_call_or_nops(void *target, void *ip, u32 *insns)
+{
+ s64 rvoff;
+ int i, ret;
+ struct rv_jit_context ctx;
+
+ ctx.ninsns = 0;
+ ctx.insns = (u16 *)insns;
+
+ if (!target) {
+ for (i = 0; i < 4; i++)
+ emit(rv_nop(), &ctx);
+ return 0;
+ }
+
+ rvoff = (s64)(target - (ip + 4));
+ emit(rv_sd(RV_REG_SP, -8, RV_REG_RA), &ctx);
+ ret = emit_jump_and_link(RV_REG_RA, rvoff, false, &ctx);
+ if (ret)
+ return ret;
+ emit(rv_ld(RV_REG_RA, -8, RV_REG_SP), &ctx);
+
+ return 0;
+}
+
+static int gen_jump_or_nops(void *target, void *ip, u32 *insns)
+{
+ s64 rvoff;
+ struct rv_jit_context ctx;
+
+ ctx.ninsns = 0;
+ ctx.insns = (u16 *)insns;
+
+ if (!target) {
+ emit(rv_nop(), &ctx);
+ emit(rv_nop(), &ctx);
+ return 0;
+ }
+
+ rvoff = (s64)(target - ip);
+ return emit_jump_and_link(RV_REG_ZERO, rvoff, false, &ctx);
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
+ void *old_addr, void *new_addr)
+{
+ u32 old_insns[4], new_insns[4];
+ bool is_call = poke_type == BPF_MOD_CALL;
+ int (*gen_insns)(void *target, void *ip, u32 *insns);
+ int ninsns = is_call ? 4 : 2;
+ int ret;
+
+ if (!is_bpf_text_address((unsigned long)ip))
+ return -ENOTSUPP;
+
+ gen_insns = is_call ? gen_call_or_nops : gen_jump_or_nops;
+
+ ret = gen_insns(old_addr, ip, old_insns);
+ if (ret)
+ return ret;
+
+ if (memcmp(ip, old_insns, ninsns * 4))
+ return -EFAULT;
+
+ ret = gen_insns(new_addr, ip, new_insns);
+ if (ret)
+ return ret;
+
+ cpus_read_lock();
+ mutex_lock(&text_mutex);
+ if (memcmp(ip, new_insns, ninsns * 4))
+ ret = patch_text(ip, new_insns, ninsns);
+ mutex_unlock(&text_mutex);
+ cpus_read_unlock();
+
+ return ret;
+}
+
int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
{
@@ -1266,7 +1346,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
void bpf_jit_build_prologue(struct rv_jit_context *ctx)
{
- int stack_adjust = 0, store_offset, bpf_stack_adjust;
+ int i, stack_adjust = 0, store_offset, bpf_stack_adjust;
bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
if (bpf_stack_adjust)
@@ -1293,6 +1373,10 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
store_offset = stack_adjust - 8;
+ /* reserve 4 nop insns */
+ for (i = 0; i < 4; i++)
+ emit(rv_nop(), ctx);
+
/* First instruction is always setting the tail-call-counter
* (TCC) register. This instruction is skipped for tail calls.
* Force using a 4-byte (non-compressed) instruction.
--
2.25.1
Hi Lehui,
Pu Lehui <[email protected]> writes:
> BPF trampoline is the critical infrastructure of the bpf
> subsystem, acting as a mediator between kernel functions
> and BPF programs. Numerous important features, such as
> using ebpf program for zero overhead kernel introspection,
> rely on this key component. We can't wait to support bpf
> trampoline on RV64. Since RV64 does not support ftrace
> direct call yet, the current RV64 bpf trampoline is only
> used in bpf context.
Thanks a lot for continuing this work. I agree with you that it's
valuable to have BPF trampoline support, even without proper direct call
support (we'll get there soon). The trampoline enables kfunc calls. On
that note; I don't see that you enable "bpf_jit_supports_kfunc_call()"
anywhere in the series. With BPF trampoline support, the RISC-V BPF
finally can support kfunc calls!
I'd add the following to bpf_jit_comp64.c:
bool bpf_jit_supports_kfunc_call(void)
{
return true;
}
:-)
I'll do a review ASAP.
Björn
On Wed, Feb 15, 2023 at 09:52:02PM +0800, Pu Lehui wrote:
> From: Pu Lehui <[email protected]>
>
> Extend patch_text for multiple instructions. This
> is the preparaiton for multiple instructions text
> patching in riscv bpf trampoline, and may be useful
> for other scenario.
In the future, please use the full width for your commit message (or
fix your editor if it thinks fifty-something chars is the full width).
Otherwise, looks grand...
Reviewed-by: Conor Dooley <[email protected]>
Thanks,
Conor.
On 2023/2/16 6:37, Conor Dooley wrote:
> On Wed, Feb 15, 2023 at 09:52:02PM +0800, Pu Lehui wrote:
>> From: Pu Lehui <[email protected]>
>>
>> Extend patch_text for multiple instructions. This
>> is the preparaiton for multiple instructions text
>> patching in riscv bpf trampoline, and may be useful
>> for other scenario.
>
> In the future, please use the full width for your commit message (or
> fix your editor if it thinks fifty-something chars is the full width).
>
Thanks Conor, will try to fix it.
> Otherwise, looks grand...
> Reviewed-by: Conor Dooley <[email protected]>
>
> Thanks,
> Conor.
>
On 2023/2/15 22:45, Björn Töpel wrote:
> Hi Lehui,
>
> Pu Lehui <[email protected]> writes:
>
>> BPF trampoline is the critical infrastructure of the bpf
>> subsystem, acting as a mediator between kernel functions
>> and BPF programs. Numerous important features, such as
>> using ebpf program for zero overhead kernel introspection,
>> rely on this key component. We can't wait to support bpf
>> trampoline on RV64. Since RV64 does not support ftrace
>> direct call yet, the current RV64 bpf trampoline is only
>> used in bpf context.
>
> Thanks a lot for continuing this work. I agree with you that it's
> valuable to have BPF trampoline support, even without proper direct call
> support (we'll get there soon). The trampoline enables kfunc calls. On
> that note; I don't see that you enable "bpf_jit_supports_kfunc_call()"
> anywhere in the series. With BPF trampoline support, the RISC-V BPF
> finally can support kfunc calls!
>
> I'd add the following to bpf_jit_comp64.c:
>
happy to hear that,let's make it more completeable.
> bool bpf_jit_supports_kfunc_call(void)
> {
> return true;
> }
>
> :-)
>
> I'll do a review ASAP.
>
>
> Björn
Pu Lehui <[email protected]> writes:
> BPF trampoline is the critical infrastructure of the bpf
> subsystem, acting as a mediator between kernel functions
> and BPF programs. Numerous important features, such as
> using ebpf program for zero overhead kernel introspection,
> rely on this key component. We can't wait to support bpf
> trampoline on RV64. Since RV64 does not support ftrace
> direct call yet, the current RV64 bpf trampoline is only
> used in bpf context.
>
> As most of riscv cpu support unaligned memory accesses,
> we temporarily use patch [1] to facilitate testing. The
> test results are as follow, and test_verifier with no
> new failure ceses.
>
> - fexit_bpf2bpf:OK
> - dummy_st_ops:OK
> - xdp_bpf2bpf:OK
>
> [1] https://lore.kernel.org/linux-riscv/[email protected]/
>
> v1:
> - Remove the logic of bpf_arch_text_poke supported for
> kernel functions. (Kuohai and Björn)
> - Extend patch_text for multiple instructions. (Björn)
> - Fix OOB issue when image too big. (Björn)
This series is ready to go in as is.
@Palmer I'd like to take this series via the bpf-next tree (as usual),
but note that there are some non-BPF changes as well, related to text
poking.
@Lehui I'd like to see two follow-up patches:
1. Enable kfunc for RV64, by adding:
| bool bpf_jit_supports_kfunc_call(void)
| {
| return true;
| }
2. Remove the checkpatch warning on patch 4:
| WARNING: kfree(NULL) is safe and this check is probably not required
| #313: FILE: arch/riscv/net/bpf_jit_comp64.c:984:
| + if (branches_off)
| + kfree(branches_off);
For the series:
Tested-by: Björn Töpel <[email protected]>
Acked-by: Björn Töpel <[email protected]>
On 2/16/23 10:56 AM, Björn Töpel wrote:
> Pu Lehui <[email protected]> writes:
>
>> BPF trampoline is the critical infrastructure of the bpf
>> subsystem, acting as a mediator between kernel functions
>> and BPF programs. Numerous important features, such as
>> using ebpf program for zero overhead kernel introspection,
>> rely on this key component. We can't wait to support bpf
>> trampoline on RV64. Since RV64 does not support ftrace
>> direct call yet, the current RV64 bpf trampoline is only
>> used in bpf context.
>>
>> As most of riscv cpu support unaligned memory accesses,
>> we temporarily use patch [1] to facilitate testing. The
>> test results are as follow, and test_verifier with no
>> new failure ceses.
>>
>> - fexit_bpf2bpf:OK
>> - dummy_st_ops:OK
>> - xdp_bpf2bpf:OK
>>
>> [1] https://lore.kernel.org/linux-riscv/[email protected]/
>>
>> v1:
>> - Remove the logic of bpf_arch_text_poke supported for
>> kernel functions. (Kuohai and Björn)
>> - Extend patch_text for multiple instructions. (Björn)
>> - Fix OOB issue when image too big. (Björn)
>
> This series is ready to go in as is.
Ok.
> @Palmer I'd like to take this series via the bpf-next tree (as usual),
> but note that there are some non-BPF changes as well, related to text
> poking.
>
> @Lehui I'd like to see two follow-up patches:
>
> 1. Enable kfunc for RV64, by adding:
> | bool bpf_jit_supports_kfunc_call(void)
> | {
> | return true;
> | }
>
> 2. Remove the checkpatch warning on patch 4:
> | WARNING: kfree(NULL) is safe and this check is probably not required
> | #313: FILE: arch/riscv/net/bpf_jit_comp64.c:984:
> | + if (branches_off)
> | + kfree(branches_off);
>
>
> For the series:
>
> Tested-by: Björn Töpel <[email protected]>
> Acked-by: Björn Töpel <[email protected]>
Thanks, I fixed up issue 2 and cleaned up the commit msgs while applying.
For issue 1, pls send a follow-up.
On 2023/2/18 4:49, Daniel Borkmann wrote:
> On 2/16/23 10:56 AM, Björn Töpel wrote:
>> Pu Lehui <[email protected]> writes:
>>
>>> BPF trampoline is the critical infrastructure of the bpf
>>> subsystem, acting as a mediator between kernel functions
>>> and BPF programs. Numerous important features, such as
>>> using ebpf program for zero overhead kernel introspection,
>>> rely on this key component. We can't wait to support bpf
>>> trampoline on RV64. Since RV64 does not support ftrace
>>> direct call yet, the current RV64 bpf trampoline is only
>>> used in bpf context.
>>>
>>> As most of riscv cpu support unaligned memory accesses,
>>> we temporarily use patch [1] to facilitate testing. The
>>> test results are as follow, and test_verifier with no
>>> new failure ceses.
>>>
>>> - fexit_bpf2bpf:OK
>>> - dummy_st_ops:OK
>>> - xdp_bpf2bpf:OK
>>>
>>> [1]
>>> https://lore.kernel.org/linux-riscv/[email protected]/
>>>
>>> v1:
>>> - Remove the logic of bpf_arch_text_poke supported for
>>> kernel functions. (Kuohai and Björn)
>>> - Extend patch_text for multiple instructions. (Björn)
>>> - Fix OOB issue when image too big. (Björn)
>>
>> This series is ready to go in as is.
>
> Ok.
>
>> @Palmer I'd like to take this series via the bpf-next tree (as usual),
>> but note that there are some non-BPF changes as well, related to text
>> poking.
>>
>> @Lehui I'd like to see two follow-up patches:
>>
>> 1. Enable kfunc for RV64, by adding:
>> | bool bpf_jit_supports_kfunc_call(void)
>> | {
>> | return true;
>> | }
>>
>> 2. Remove the checkpatch warning on patch 4:
>> | WARNING: kfree(NULL) is safe and this check is probably not required
>> | #313: FILE: arch/riscv/net/bpf_jit_comp64.c:984:
>> | + if (branches_off)
>> | + kfree(branches_off);
>>
>>
>> For the series:
>>
>> Tested-by: Björn Töpel <[email protected]>
>> Acked-by: Björn Töpel <[email protected]>
>
> Thanks, I fixed up issue 2 and cleaned up the commit msgs while applying.
> For issue 1, pls send a follow-up.
Thank you both, will handle this.