This is a RFC, following[0].
It works, still needs some care but this is mainly to see if this will
have a chance to get upsrteamed or if I should rely on struct_ops
instead.
Cheers,
Benjamin
Signed-off-by: Benjamin Tissoires <[email protected]>
---
Benjamin Tissoires (8):
bpf: ignore sleepable prog parameter for kfuncs
bpf: add kfunc_meta parameter to push_callback_call()
bpf: implement __async and __s_async kfunc suffixes
bpf: typedef a type for the bpf_wq callbacks
selftests/bpf: rely on wq_callback_fn_t
bpf: remove one special case of is_bpf_wq_set_callback_impl_kfunc
bpf: implement __aux kfunc argument suffix to fetch prog_aux
bpf: rely on __aux suffix for bpf_wq_set_callback_impl
kernel/bpf/helpers.c | 10 +-
kernel/bpf/verifier.c | 333 +++++++++++++++++++-----
tools/testing/selftests/bpf/bpf_experimental.h | 3 +-
tools/testing/selftests/bpf/progs/wq.c | 10 +-
tools/testing/selftests/bpf/progs/wq_failures.c | 4 +-
5 files changed, 280 insertions(+), 80 deletions(-)
---
base-commit: 05cbc217aafbc631a6c2fab4accf95850cb48358
change-id: 20240507-bpf_async-bd2e65847525
Best regards,
--
Benjamin Tissoires <[email protected]>
There is no change of behavior: for each kfunc, we store the prog
sleepable state. But this allows to declare an async non sleepable
callback from a syscall, where everything is sleepable.
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
kernel/bpf/verifier.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5d42db05315e..856cb77d0f87 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5288,8 +5288,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
static bool in_sleepable(struct bpf_verifier_env *env)
{
- return env->prog->sleepable ||
- (env->cur_state && env->cur_state->in_sleepable);
+ return env->cur_state ? env->cur_state->in_sleepable : env->prog->sleepable;
}
/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
@@ -20722,6 +20721,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
state->curframe = 0;
state->speculative = false;
state->branches = 1;
+ state->in_sleepable = env->prog->sleepable;
state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
if (!state->frame[0]) {
kfree(state);
--
2.44.0
No code change but is a preparatory patch for being able to declare
async callbacks from bpf kfuncs.
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
kernel/bpf/verifier.c | 48 +++++++++++++++++++++++++++++-------------------
1 file changed, 29 insertions(+), 19 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 856cb77d0f87..2b1e24c440c5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9339,11 +9339,13 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
- int insn_idx);
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta);
static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
- struct bpf_func_state *callee, int insn_idx);
+ struct bpf_func_state *callee, int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta);
static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int callsite,
set_callee_state_fn set_callee_state_cb,
@@ -9381,7 +9383,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls
subprog /* subprog number within this prog */);
/* Transfer references to the callee */
err = copy_reference_state(callee, caller);
- err = err ?: set_callee_state_cb(env, caller, callee, callsite);
+ err = err ?: set_callee_state_cb(env, caller, callee, callsite, NULL);
if (err)
goto err_out;
@@ -9518,7 +9520,8 @@ static int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int insn_idx, int subprog,
- set_callee_state_fn set_callee_state_cb)
+ set_callee_state_fn set_callee_state_cb,
+ struct bpf_kfunc_call_arg_meta *kfunc_meta)
{
struct bpf_verifier_state *state = env->cur_state, *callback_state;
struct bpf_func_state *caller, *callee;
@@ -9560,7 +9563,7 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
callee->async_entry_cnt = caller->async_entry_cnt + 1;
/* Convert bpf_timer_set_callback() args into timer callback args */
- err = set_callee_state_cb(env, caller, callee, insn_idx);
+ err = set_callee_state_cb(env, caller, callee, insn_idx, kfunc_meta);
if (err)
return err;
@@ -9691,7 +9694,8 @@ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
- struct bpf_func_state *callee, int insn_idx)
+ struct bpf_func_state *callee, int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
{
int i;
@@ -9706,7 +9710,8 @@ static int set_callee_state(struct bpf_verifier_env *env,
static int set_map_elem_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
- int insn_idx)
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
{
struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
struct bpf_map *map;
@@ -9732,7 +9737,8 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
static int set_loop_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
- int insn_idx)
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
{
/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
* u64 flags);
@@ -9754,7 +9760,8 @@ static int set_loop_callback_state(struct bpf_verifier_env *env,
static int set_timer_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
- int insn_idx)
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
{
struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
@@ -9784,7 +9791,8 @@ static int set_timer_callback_state(struct bpf_verifier_env *env,
static int set_find_vma_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
- int insn_idx)
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
{
/* bpf_find_vma(struct task_struct *task, u64 addr,
* void *callback_fn, void *callback_ctx, u64 flags)
@@ -9812,7 +9820,8 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env,
static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
- int insn_idx)
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
{
/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
* callback_ctx, u64 flags);
@@ -9835,7 +9844,8 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
- int insn_idx)
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
{
/* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
* bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
@@ -10411,15 +10421,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
break;
case BPF_FUNC_for_each_map_elem:
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_map_elem_callback_state);
+ set_map_elem_callback_state, NULL);
break;
case BPF_FUNC_timer_set_callback:
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_timer_callback_state);
+ set_timer_callback_state, NULL);
break;
case BPF_FUNC_find_vma:
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_find_vma_callback_state);
+ set_find_vma_callback_state, NULL);
break;
case BPF_FUNC_snprintf:
err = check_bpf_snprintf_call(env, regs);
@@ -10434,7 +10444,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
if (cur_func(env)->callback_depth < regs[BPF_REG_1].umax_value) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_loop_callback_state);
+ set_loop_callback_state, NULL);
} else {
cur_func(env)->callback_depth = 0;
if (env->log.level & BPF_LOG_LEVEL2)
@@ -10537,7 +10547,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
}
case BPF_FUNC_user_ringbuf_drain:
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_user_ringbuf_callback_state);
+ set_user_ringbuf_callback_state, NULL);
break;
}
@@ -12285,7 +12295,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_rbtree_add_callback_state);
+ set_rbtree_add_callback_state, &meta);
if (err) {
verbose(env, "kfunc %s#%d failed callback verification\n",
func_name, meta.func_id);
@@ -12295,7 +12305,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (is_bpf_wq_set_callback_impl_kfunc(meta.func_id)) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_timer_callback_state);
+ set_timer_callback_state, &meta);
if (err) {
verbose(env, "kfunc %s#%d failed callback verification\n",
func_name, meta.func_id);
--
2.44.0
still mostly a WIP, but it seems to be working for the couple of tests.
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
kernel/bpf/verifier.c | 223 ++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 206 insertions(+), 17 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2b1e24c440c5..cc4dab81b306 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -336,6 +336,16 @@ struct bpf_kfunc_call_arg_meta {
struct bpf_map *ptr;
int uid;
} map;
+ struct {
+ bool enabled;
+ bool sleepable;
+ u32 nr_args;
+ struct {
+ // FIXME: should be enum kfunc_ptr_arg_type type;
+ int type;
+ u32 btf_id;
+ } args[5];
+ } async_cb;
u64 mem_size;
};
@@ -9538,7 +9548,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
*/
env->subprog_info[subprog].is_cb = true;
if (bpf_pseudo_kfunc_call(insn) &&
- !is_callback_calling_kfunc(insn->imm)) {
+ !is_callback_calling_kfunc(insn->imm) &&
+ !(kfunc_meta && kfunc_meta->async_cb.enabled)) {
verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
func_id_name(insn->imm), insn->imm);
return -EFAULT;
@@ -9549,14 +9560,15 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
return -EFAULT;
}
- if (is_async_callback_calling_insn(insn)) {
+ if (is_async_callback_calling_insn(insn) || (kfunc_meta && kfunc_meta->async_cb.enabled)) {
struct bpf_verifier_state *async_cb;
/* there is no real recursion here. timer and workqueue callbacks are async */
env->subprog_info[subprog].is_async_cb = true;
async_cb = push_async_cb(env, env->subprog_info[subprog].start,
insn_idx, subprog,
- is_bpf_wq_set_callback_impl_kfunc(insn->imm));
+ (is_bpf_wq_set_callback_impl_kfunc(insn->imm) ||
+ (kfunc_meta && kfunc_meta->async_cb.sleepable)));
if (!async_cb)
return -EFAULT;
callee = async_cb->frame[0];
@@ -10890,6 +10902,16 @@ static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param
return btf_param_match_suffix(btf, arg, "__str");
}
+static bool is_kfunc_arg_async_cb(const struct btf *btf, const struct btf_param *arg)
+{
+ return btf_param_match_suffix(btf, arg, "__async");
+}
+
+static bool is_kfunc_arg_sleepable_async_cb(const struct btf *btf, const struct btf_param *arg)
+{
+ return btf_param_match_suffix(btf, arg, "__s_async");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -11045,6 +11067,48 @@ enum kfunc_ptr_arg_type {
KF_ARG_PTR_TO_WORKQUEUE,
};
+static const char *__kfunc_ptr_arg_type_str(enum kfunc_ptr_arg_type value)
+{
+ switch (value) {
+ case KF_ARG_PTR_TO_CTX:
+ return "KF_ARG_PTR_TO_CTX";
+ case KF_ARG_PTR_TO_ALLOC_BTF_ID:
+ return "KF_ARG_PTR_TO_ALLOC_BTF_ID";
+ case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+ return "KF_ARG_PTR_TO_REFCOUNTED_KPTR";
+ case KF_ARG_PTR_TO_DYNPTR:
+ return "KF_ARG_PTR_TO_DYNPTR";
+ case KF_ARG_PTR_TO_ITER:
+ return "KF_ARG_PTR_TO_ITER";
+ case KF_ARG_PTR_TO_LIST_HEAD:
+ return "KF_ARG_PTR_TO_LIST_HEAD";
+ case KF_ARG_PTR_TO_LIST_NODE:
+ return "KF_ARG_PTR_TO_LIST_NODE";
+ case KF_ARG_PTR_TO_BTF_ID:
+ return "KF_ARG_PTR_TO_BTF_ID";
+ case KF_ARG_PTR_TO_MEM:
+ return "KF_ARG_PTR_TO_MEM";
+ case KF_ARG_PTR_TO_MEM_SIZE:
+ return "KF_ARG_PTR_TO_MEM_SIZE";
+ case KF_ARG_PTR_TO_CALLBACK:
+ return "KF_ARG_PTR_TO_CALLBACK";
+ case KF_ARG_PTR_TO_RB_ROOT:
+ return "KF_ARG_PTR_TO_RB_ROOT";
+ case KF_ARG_PTR_TO_RB_NODE:
+ return "KF_ARG_PTR_TO_RB_NODE";
+ case KF_ARG_PTR_TO_NULL:
+ return "KF_ARG_PTR_TO_NULL";
+ case KF_ARG_PTR_TO_CONST_STR:
+ return "KF_ARG_PTR_TO_CONST_STR";
+ case KF_ARG_PTR_TO_MAP:
+ return "KF_ARG_PTR_TO_MAP";
+ case KF_ARG_PTR_TO_WORKQUEUE:
+ return "KF_ARG_PTR_TO_WORKQUEUE";
+ }
+
+ return "UNKNOWN";
+}
+
enum special_kfunc_type {
KF_bpf_obj_new_impl,
KF_bpf_obj_drop_impl,
@@ -12151,6 +12215,39 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
return -EINVAL;
}
meta->subprogno = reg->subprogno;
+ meta->async_cb.sleepable = is_kfunc_arg_sleepable_async_cb(meta->btf, &args[i]);
+ meta->async_cb.enabled = meta->async_cb.sleepable ||
+ is_kfunc_arg_async_cb(meta->btf, &args[i]);
+ if (meta->async_cb.enabled) {
+ const struct btf_type *cb_proto;
+ const struct btf_param *cb_args;
+ u32 cb_type = args[i].type;
+ int i;
+
+ cb_proto = btf_type_resolve_func_ptr(btf, cb_type, NULL);
+ if (cb_proto) {
+ meta->async_cb.nr_args = btf_type_vlen(cb_proto);
+ cb_args = btf_params(cb_proto);
+ for (i = 0; i < meta->async_cb.nr_args; i++) {
+ const struct btf_type *t, *ref_t;
+ const char *ref_tname;
+ u32 ref_id, t_id;
+
+ t = btf_type_skip_modifiers(btf, cb_args[i].type, &t_id);
+ ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
+ ref_tname = btf_name_by_offset(btf, ref_t->name_off);
+ meta->async_cb.args[i].type = get_kfunc_ptr_arg_type(env, meta,
+ t, ref_t, ref_tname, cb_args, i, meta->async_cb.nr_args);
+
+ /* FIXME: we should not get an error from get_kfunc_ptr_arg_type() */
+ if (meta->async_cb.args[i].type < 0)
+ meta->async_cb.args[i].type = KF_ARG_PTR_TO_BTF_ID;
+ meta->async_cb.args[i].btf_id = ref_id;
+ }
+ } else {
+ meta->async_cb.nr_args = 0;
+ }
+ }
break;
case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
if (!type_is_ptr_alloc_obj(reg->type)) {
@@ -12248,6 +12345,71 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env,
static int check_return_code(struct bpf_verifier_env *env, int regno, const char *reg_name);
+static int set_generic_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx,
+ struct bpf_kfunc_call_arg_meta *meta)
+{
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ if (i < meta->async_cb.nr_args) {
+ u32 type = meta->async_cb.args[i].type;
+
+ switch (type) {
+ case KF_ARG_PTR_TO_CTX:
+ case KF_ARG_PTR_TO_ALLOC_BTF_ID:
+ case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
+ case KF_ARG_PTR_TO_DYNPTR:
+ case KF_ARG_PTR_TO_ITER:
+ case KF_ARG_PTR_TO_LIST_HEAD:
+ case KF_ARG_PTR_TO_LIST_NODE:
+ case KF_ARG_PTR_TO_CALLBACK:
+ case KF_ARG_PTR_TO_RB_ROOT:
+ case KF_ARG_PTR_TO_RB_NODE:
+ case KF_ARG_PTR_TO_NULL:
+ case KF_ARG_PTR_TO_CONST_STR:
+ verbose(env, "argument #%d of type %s is not supported in async callbacks\n",
+ i, __kfunc_ptr_arg_type_str(meta->async_cb.args[i].type));
+ return -EINVAL;
+ case KF_ARG_PTR_TO_MEM:
+ case KF_ARG_PTR_TO_MEM_SIZE:
+ callee->regs[BPF_REG_1 + i].type = PTR_TO_MEM;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+ callee->regs[BPF_REG_1 + i].mem_size = 8; // FIXME: should store the size while walking the arguments
+ break;
+ case KF_ARG_PTR_TO_MAP:
+ callee->regs[BPF_REG_1 + i].type = CONST_PTR_TO_MAP;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+ callee->regs[BPF_REG_1 + i].map_ptr = meta->map.ptr;
+ break;
+ case KF_ARG_PTR_TO_WORKQUEUE:
+ callee->regs[BPF_REG_1 + i].type = PTR_TO_MAP_VALUE;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+ callee->regs[BPF_REG_1 + i].map_ptr = meta->map.ptr;
+ break;
+ case KF_ARG_PTR_TO_BTF_ID:
+ callee->regs[BPF_REG_1 + i].type = PTR_TO_BTF_ID;
+ __mark_reg_known_zero(&callee->regs[BPF_REG_1 + i]);
+ callee->regs[BPF_REG_1 + i].btf = meta->btf;
+ callee->regs[BPF_REG_1 + i].btf_id = meta->async_cb.args[i].btf_id;
+ break;
+ default:
+ verbose(env, "verifier bug: unexpected arg#%d type (%d) in async callback\n",
+ i, type);
+ return -EFAULT;
+ }
+ } else {
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_1 + i]);
+ }
+ }
+ callee->in_callback_fn = true;
+ // callee->callback_ret_range = retval_range(-MAX_ERRNO, );
+ return 0;
+}
+
+
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
@@ -12313,6 +12475,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
+ if (meta.async_cb.enabled) {
+ err = push_callback_call(env, insn, insn_idx, meta.subprogno,
+ set_generic_callback_state, &meta);
+ if (err) {
+ verbose(env, "kfunc %s#%d failed callback verification\n",
+ func_name, meta.func_id);
+ return err;
+ }
+ }
+
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
@@ -15918,22 +16090,39 @@ static int visit_insn(int t, struct bpf_verifier_env *env)
}
if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
struct bpf_kfunc_call_arg_meta meta;
+ const struct btf_param *args;
+ u32 i, nargs;
ret = fetch_kfunc_meta(env, insn, &meta, NULL);
- if (ret == 0 && is_iter_next_kfunc(&meta)) {
- mark_prune_point(env, t);
- /* Checking and saving state checkpoints at iter_next() call
- * is crucial for fast convergence of open-coded iterator loop
- * logic, so we need to force it. If we don't do that,
- * is_state_visited() might skip saving a checkpoint, causing
- * unnecessarily long sequence of not checkpointed
- * instructions and jumps, leading to exhaustion of jump
- * history buffer, and potentially other undesired outcomes.
- * It is expected that with correct open-coded iterators
- * convergence will happen quickly, so we don't run a risk of
- * exhausting memory.
- */
- mark_force_checkpoint(env, t);
+ if (ret == 0) {
+ args = (const struct btf_param *)(meta.func_proto + 1);
+ nargs = btf_type_vlen(meta.func_proto);
+
+ for (i = 0; i < nargs; i++) {
+ if (is_kfunc_arg_sleepable_async_cb(meta.btf, &args[i]) ||
+ is_kfunc_arg_async_cb(meta.btf, &args[i]))
+ /* Mark this call insn as a prune point to trigger
+ * is_state_visited() check before call itself is
+ * processed by __check_func_call(). Otherwise new
+ * async state will be pushed for further exploration.
+ */
+ mark_prune_point(env, t);
+ }
+ if (is_iter_next_kfunc(&meta)) {
+ mark_prune_point(env, t);
+ /* Checking and saving state checkpoints at iter_next() call
+ * is crucial for fast convergence of open-coded iterator loop
+ * logic, so we need to force it. If we don't do that,
+ * is_state_visited() might skip saving a checkpoint, causing
+ * unnecessarily long sequence of not checkpointed
+ * instructions and jumps, leading to exhaustion of jump
+ * history buffer, and potentially other undesired outcomes.
+ * It is expected that with correct open-coded iterators
+ * convergence will happen quickly, so we don't run a risk of
+ * exhausting memory.
+ */
+ mark_force_checkpoint(env, t);
+ }
}
}
return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
--
2.44.0
This allows users to rely on it by using it from the vmlinux.h
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
kernel/bpf/helpers.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2a69a9a36c0f..97628bcbd507 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2720,8 +2720,10 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
return 0;
}
+typedef int (*wq_callback_fn_t)(struct bpf_map *map, int *key, struct bpf_wq *wq);
+
__bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq),
+ wq_callback_fn_t callback_fn__s_async,
unsigned int flags,
void *aux__ign)
{
@@ -2731,7 +2733,7 @@ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
if (flags)
return -EINVAL;
- return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ);
+ return __bpf_async_set_callback(async, callback_fn__s_async, aux, flags, BPF_ASYNC_TYPE_WQ);
}
__bpf_kfunc void bpf_preempt_disable(void)
--
2.44.0
The type of bpf_wq callbacks changed. So adapt to it and make use of
wq_callback_fn_t.
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
tools/testing/selftests/bpf/bpf_experimental.h | 3 +--
tools/testing/selftests/bpf/progs/wq.c | 10 ++++------
tools/testing/selftests/bpf/progs/wq_failures.c | 4 ++--
3 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 8b9cc87be4c4..0a35e6efccae 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -494,8 +494,7 @@ extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
-extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq),
+extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, wq_callback_fn_t cb,
unsigned int flags__k, void *aux__ign) __ksym;
#define bpf_wq_set_callback(timer, cb, flags) \
bpf_wq_set_callback_impl(timer, cb, flags, NULL)
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
index 49e712acbf60..c8c88976baca 100644
--- a/tools/testing/selftests/bpf/progs/wq.c
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -52,8 +52,7 @@ struct {
__u32 ok;
__u32 ok_sleepable;
-static int test_elem_callback(void *map, int *key,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq))
+static int test_elem_callback(void *map, int *key, wq_callback_fn_t callback_fn)
{
struct elem init = {}, *val;
struct bpf_wq *wq;
@@ -83,8 +82,7 @@ static int test_elem_callback(void *map, int *key,
return 0;
}
-static int test_hmap_elem_callback(void *map, int *key,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq))
+static int test_hmap_elem_callback(void *map, int *key, wq_callback_fn_t callback_fn)
{
struct hmap_elem init = {}, *val;
struct bpf_wq *wq;
@@ -114,7 +112,7 @@ static int test_hmap_elem_callback(void *map, int *key,
}
/* callback for non sleepable workqueue */
-static int wq_callback(void *map, int *key, struct bpf_wq *work)
+static int wq_callback(struct bpf_map *map, int *key, struct bpf_wq *work)
{
bpf_kfunc_common_test();
ok |= (1 << *key);
@@ -122,7 +120,7 @@ static int wq_callback(void *map, int *key, struct bpf_wq *work)
}
/* callback for sleepable workqueue */
-static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work)
+static int wq_cb_sleepable(struct bpf_map *map, int *key, struct bpf_wq *work)
{
bpf_kfunc_call_test_sleepable();
ok_sleepable |= (1 << *key);
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index 4cbdb425f223..3d87ccb8286e 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -28,14 +28,14 @@ struct {
} lru SEC(".maps");
/* callback for non sleepable workqueue */
-static int wq_callback(void *map, int *key, struct bpf_wq *work)
+static int wq_callback(struct bpf_map *map, int *key, struct bpf_wq *work)
{
bpf_kfunc_common_test();
return 0;
}
/* callback for sleepable workqueue */
-static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work)
+static int wq_cb_sleepable(struct bpf_map *map, int *key, struct bpf_wq *work)
{
bpf_kfunc_call_test_sleepable();
return 0;
--
2.44.0
This allows kfunc to request the prog_aux environment in their
implementation, to have access to the originated bpf_prog for example.
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
kernel/bpf/verifier.c | 40 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6fba9e2caa83..33b108db0025 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10909,6 +10909,11 @@ static bool is_kfunc_arg_sleepable_async_cb(const struct btf *btf, const struct
return btf_param_match_suffix(btf, arg, "__s_async");
}
+static bool is_kfunc_arg_prog_aux(const struct btf *btf, const struct btf_param *arg)
+{
+ return btf_param_match_suffix(btf, arg, "__aux");
+}
+
static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
const struct btf_param *arg,
const char *name)
@@ -11807,7 +11812,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
t = btf_type_skip_modifiers(btf, args[i].type, NULL);
- if (is_kfunc_arg_ignore(btf, &args[i]))
+ if (is_kfunc_arg_ignore(btf, &args[i]) ||
+ is_kfunc_arg_prog_aux(btf, &args[i]))
continue;
if (btf_type_is_scalar(t)) {
@@ -19950,6 +19956,38 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn_buf[1] = ld_addrs[1];
insn_buf[2] = *insn;
*cnt = 3;
+ } else {
+ struct bpf_kfunc_call_arg_meta meta;
+ struct bpf_insn kfunc_insn = *insn;
+ const struct btf_param *args;
+ u32 i, nargs, prog_aux_arg;
+ const char *func_name;
+ int ret;
+
+ /* imm might not have func_id, so create a fake insn with the expected args */
+ kfunc_insn.imm = desc->func_id;
+
+ ret = fetch_kfunc_meta(env, &kfunc_insn, &meta, &func_name);
+ if (ret == 0) {
+ args = (const struct btf_param *)(meta.func_proto + 1);
+ nargs = btf_type_vlen(meta.func_proto);
+ prog_aux_arg = nargs;
+
+ for (i = 0; i < nargs; i++) {
+ if (is_kfunc_arg_prog_aux(meta.btf, &args[i]))
+ prog_aux_arg = i;
+ }
+
+ if (prog_aux_arg < nargs) {
+ struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_1 + prog_aux_arg,
+ (long)env->prog->aux) };
+
+ insn_buf[0] = ld_addrs[0];
+ insn_buf[1] = ld_addrs[1];
+ insn_buf[2] = *insn;
+ *cnt = 3;
+ }
+ }
}
return 0;
}
--
2.44.0
And then cleanup the verifier about the special cases about this kfunc.
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
kernel/bpf/helpers.c | 4 ++--
kernel/bpf/verifier.c | 17 -----------------
2 files changed, 2 insertions(+), 19 deletions(-)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 97628bcbd507..03524fa5feef 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2725,9 +2725,9 @@ typedef int (*wq_callback_fn_t)(struct bpf_map *map, int *key, struct bpf_wq *wq
__bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
wq_callback_fn_t callback_fn__s_async,
unsigned int flags,
- void *aux__ign)
+ void *prog__aux)
{
- struct bpf_prog_aux *aux = (struct bpf_prog_aux *)aux__ign;
+ struct bpf_prog_aux *aux = (struct bpf_prog_aux *)prog__aux;
struct bpf_async_kern *async = (struct bpf_async_kern *)wq;
if (flags)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 33b108db0025..829a234832d9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -514,8 +514,6 @@ static bool is_sync_callback_calling_kfunc(u32 btf_id);
static bool is_callback_calling_kfunc(u32 btf_id);
static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
-static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id);
-
static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_for_each_map_elem ||
@@ -11134,7 +11132,6 @@ enum special_kfunc_type {
KF_bpf_percpu_obj_new_impl,
KF_bpf_percpu_obj_drop_impl,
KF_bpf_throw,
- KF_bpf_wq_set_callback_impl,
KF_bpf_preempt_disable,
KF_bpf_preempt_enable,
KF_bpf_iter_css_task_new,
@@ -11161,7 +11158,6 @@ BTF_ID(func, bpf_dynptr_clone)
BTF_ID(func, bpf_percpu_obj_new_impl)
BTF_ID(func, bpf_percpu_obj_drop_impl)
BTF_ID(func, bpf_throw)
-BTF_ID(func, bpf_wq_set_callback_impl)
#ifdef CONFIG_CGROUPS
BTF_ID(func, bpf_iter_css_task_new)
#endif
@@ -11190,7 +11186,6 @@ BTF_ID(func, bpf_dynptr_clone)
BTF_ID(func, bpf_percpu_obj_new_impl)
BTF_ID(func, bpf_percpu_obj_drop_impl)
BTF_ID(func, bpf_throw)
-BTF_ID(func, bpf_wq_set_callback_impl)
BTF_ID(func, bpf_preempt_disable)
BTF_ID(func, bpf_preempt_enable)
#ifdef CONFIG_CGROUPS
@@ -11542,11 +11537,6 @@ static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
insn->imm == special_kfunc_list[KF_bpf_throw];
}
-static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)
-{
- return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
-}
-
static bool is_callback_calling_kfunc(u32 btf_id)
{
return is_sync_callback_calling_kfunc(btf_id);
@@ -19949,13 +19939,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
*cnt = 1;
- } else if (is_bpf_wq_set_callback_impl_kfunc(desc->func_id)) {
- struct bpf_insn ld_addrs[2] = { BPF_LD_IMM64(BPF_REG_4, (long)env->prog->aux) };
-
- insn_buf[0] = ld_addrs[0];
- insn_buf[1] = ld_addrs[1];
- insn_buf[2] = *insn;
- *cnt = 3;
} else {
struct bpf_kfunc_call_arg_meta meta;
struct bpf_insn kfunc_insn = *insn;
--
2.44.0
It looks like the generic implementation based on __s_async suffix works
well enough. So let's use it.
Note:
- currently we lose the return value range
- the second argument is not of type PTR_TO_MAP_KEY
Signed-off-by: Benjamin Tissoires <[email protected]>
---
This is an RFC, and is not meant to be fully reviewed/applied as it is.
I'm posting this to show what I wanted to explain in
https://lore.kernel.org/bpf/mhkzkf4e23uvljtmwizwcxyuyat2tmfxn33xb4t7waafgmsa66@mcrzpj3b6ssx/
---
kernel/bpf/verifier.c | 27 ++++-----------------------
1 file changed, 4 insertions(+), 23 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cc4dab81b306..6fba9e2caa83 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -511,7 +511,6 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id)
}
static bool is_sync_callback_calling_kfunc(u32 btf_id);
-static bool is_async_callback_calling_kfunc(u32 btf_id);
static bool is_callback_calling_kfunc(u32 btf_id);
static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
@@ -544,8 +543,7 @@ static bool is_sync_callback_calling_insn(struct bpf_insn *insn)
static bool is_async_callback_calling_insn(struct bpf_insn *insn)
{
- return (bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm)) ||
- (bpf_pseudo_kfunc_call(insn) && is_async_callback_calling_kfunc(insn->imm));
+ return bpf_helper_call(insn) && is_async_callback_calling_function(insn->imm);
}
static bool is_may_goto_insn(struct bpf_insn *insn)
@@ -9560,15 +9558,14 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins
return -EFAULT;
}
- if (is_async_callback_calling_insn(insn) || (kfunc_meta && kfunc_meta->async_cb.enabled)) {
+ if (kfunc_meta && kfunc_meta->async_cb.enabled) {
struct bpf_verifier_state *async_cb;
/* there is no real recursion here. timer and workqueue callbacks are async */
env->subprog_info[subprog].is_async_cb = true;
async_cb = push_async_cb(env, env->subprog_info[subprog].start,
insn_idx, subprog,
- (is_bpf_wq_set_callback_impl_kfunc(insn->imm) ||
- (kfunc_meta && kfunc_meta->async_cb.sleepable)));
+ kfunc_meta && kfunc_meta->async_cb.sleepable);
if (!async_cb)
return -EFAULT;
callee = async_cb->frame[0];
@@ -11534,11 +11531,6 @@ static bool is_sync_callback_calling_kfunc(u32 btf_id)
return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
}
-static bool is_async_callback_calling_kfunc(u32 btf_id)
-{
- return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl];
-}
-
static bool is_bpf_throw_kfunc(struct bpf_insn *insn)
{
return bpf_pseudo_kfunc_call(insn) && insn->off == 0 &&
@@ -11552,8 +11544,7 @@ static bool is_bpf_wq_set_callback_impl_kfunc(u32 btf_id)
static bool is_callback_calling_kfunc(u32 btf_id)
{
- return is_sync_callback_calling_kfunc(btf_id) ||
- is_async_callback_calling_kfunc(btf_id);
+ return is_sync_callback_calling_kfunc(btf_id);
}
static bool is_rbtree_lock_required_kfunc(u32 btf_id)
@@ -12465,16 +12456,6 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
}
}
- if (is_bpf_wq_set_callback_impl_kfunc(meta.func_id)) {
- err = push_callback_call(env, insn, insn_idx, meta.subprogno,
- set_timer_callback_state, &meta);
- if (err) {
- verbose(env, "kfunc %s#%d failed callback verification\n",
- func_name, meta.func_id);
- return err;
- }
- }
-
if (meta.async_cb.enabled) {
err = push_callback_call(env, insn, insn_idx, meta.subprogno,
set_generic_callback_state, &meta);
--
2.44.0