A previous change added a series of kfuncs for storing struct
task_struct objects as referenced kptrs. This patch adds a new
task_kfunc test suite for validating their expected behavior.
Signed-off-by: David Vernet <[email protected]>
---
tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
.../selftests/bpf/prog_tests/task_kfunc.c | 160 +++++++++
.../selftests/bpf/progs/task_kfunc_common.h | 83 +++++
.../selftests/bpf/progs/task_kfunc_failure.c | 315 ++++++++++++++++++
.../selftests/bpf/progs/task_kfunc_success.c | 132 ++++++++
5 files changed, 691 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/task_kfunc.c
create mode 100644 tools/testing/selftests/bpf/progs/task_kfunc_common.h
create mode 100644 tools/testing/selftests/bpf/progs/task_kfunc_failure.c
create mode 100644 tools/testing/selftests/bpf/progs/task_kfunc_success.c
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 520f12229b98..323a0e312b3d 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -52,6 +52,7 @@ skc_to_unix_sock # could not attach BPF object unexpecte
socket_cookie # prog_attach unexpected error: -524 (trampoline)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?)
+task_kfunc # JIT does not support calling kernel function
task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline)
test_bpffs # bpffs test failed 255 (iterator)
test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
new file mode 100644
index 000000000000..18492d010c45
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sys/wait.h>
+#include <test_progs.h>
+#include <unistd.h>
+
+#include "task_kfunc_failure.skel.h"
+#include "task_kfunc_success.skel.h"
+
+static size_t log_buf_sz = 1 << 20; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct task_kfunc_success *open_load_task_kfunc_skel(void)
+{
+ struct task_kfunc_success *skel;
+ int err;
+
+ skel = task_kfunc_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ err = task_kfunc_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ task_kfunc_success__destroy(skel);
+ return NULL;
+}
+
+static void run_success_test(const char *prog_name)
+{
+ struct task_kfunc_success *skel;
+ int status;
+ pid_t child_pid;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+
+ skel = open_load_task_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_spawn_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attached_link"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GT(child_pid, -1, "child_pid"))
+ goto cleanup;
+ if (child_pid == 0)
+ _exit(0);
+ waitpid(child_pid, &status, 0);
+
+ ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ task_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+ "test_task_acquire_release_argument",
+ "test_task_acquire_release_current",
+ "test_task_acquire_leave_in_map",
+ "test_task_xchg_release",
+ "test_task_get_release",
+};
+
+static struct {
+ const char *prog_name;
+ const char *expected_err_msg;
+} failure_tests[] = {
+ {"task_kfunc_acquire_untrusted", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_acquire_no_null_check", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_acquire_trusted_nested", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_acquire_unreleased", "Unreleased reference"},
+ {"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
+ {"task_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
+ {"task_kfunc_get_null", "arg#0 expected pointer to map value"},
+ {"task_kfunc_xchg_unreleased", "Unreleased reference"},
+ {"task_kfunc_get_unreleased", "Unreleased reference"},
+ {"task_kfunc_release_untrusted", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_release_null", "arg#0 pointer type STRUCT task_struct must point"},
+ {"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID"},
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct task_kfunc_failure *skel;
+ int err, i;
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 1;
+
+ skel = task_kfunc_failure__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "task_kfunc_failure__open_opts"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+ struct bpf_program *prog;
+ const char *curr_name = failure_tests[i].prog_name;
+
+ prog = bpf_object__find_program_by_name(skel->obj, curr_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
+ }
+
+ err = task_kfunc_failure__load(skel);
+ if (!ASSERT_ERR(err, "unexpected load success"))
+ goto cleanup;
+
+ if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+ fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+ fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+ }
+
+cleanup:
+ task_kfunc_failure__destroy(skel);
+}
+
+void test_task_kfunc(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i]))
+ continue;
+
+ run_success_test(success_tests[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+ if (!test__start_subtest(failure_tests[i].prog_name))
+ continue;
+
+ verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
+ }
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
new file mode 100644
index 000000000000..f51257bdd695
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TASK_KFUNC_COMMON_H
+#define _TASK_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct __tasks_kfunc_map_value {
+ struct task_struct __kptr_ref * task;
+};
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __tasks_kfunc_map_value);
+ __uint(max_entries, 1);
+} __tasks_kfunc_map SEC(".maps");
+
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+#define TEST_NAME_SZ 128
+
+/* The pid of the test process used to determine if a newly created task is the test task. */
+int pid;
+
+static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
+{
+ s32 pid;
+ long status;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return NULL;
+
+ return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+}
+
+static inline int tasks_kfunc_map_insert(struct task_struct *p)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return status;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v) {
+ bpf_map_delete_elem(&__tasks_kfunc_map, &pid);
+ return status;
+ }
+
+ acquired = bpf_task_acquire(p);
+ old = bpf_kptr_xchg(&v->task, acquired);
+ if (old) {
+ bpf_task_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+static inline bool is_test_kfunc_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return pid == cur_pid;
+}
+
+#endif /* _TASK_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
new file mode 100644
index 000000000000..74d2f176a2de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task)
+{
+ int status;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ return NULL;
+
+ return tasks_kfunc_map_value_lookup(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+ struct __tasks_kfunc_map_value *v;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_acquire() on an untrusted pointer. */
+ acquired = bpf_task_acquire(v->task);
+ if (!acquired)
+ return 0;
+
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* Can't invoke bpf_task_acquire() on a random frame pointer. */
+ acquired = bpf_task_acquire((struct task_struct *)&stack_task);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+ /* Can't release a bpf_task_acquire()'d task without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_trusted_nested, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* Can't invoke bpf_task_acquire() on a trusted pointer at a nonzero offset. */
+ acquired = bpf_task_acquire(task->last_wakee);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* Can't invoke bpf_task_acquire() on a NULL pointer. */
+ acquired = bpf_task_acquire(NULL);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+
+ /* Acquired task is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */
+ kptr = bpf_task_kptr_get(&task);
+ if (!kptr)
+ return 0;
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr, *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ /* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */
+ kptr = bpf_task_kptr_get(&acquired);
+ bpf_task_release(acquired);
+ if (!kptr)
+ return 0;
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* Cannot use bpf_task_kptr_get() on a NULL pointer. */
+ kptr = bpf_task_kptr_get(NULL);
+ if (!kptr)
+ return 0;
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr)
+ return 0;
+
+ /* Kptr retrieved from map is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ kptr = bpf_task_kptr_get(&v->task);
+ if (!kptr)
+ return 0;
+
+ /* Kptr acquired above is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value *v;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_release() on an untrusted pointer. */
+ bpf_task_release(v->task);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired = (struct task_struct *)&clone_flags;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* Cannot release random frame pointer. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid);
+ if (status)
+ return 0;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v)
+ return status;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ old = bpf_kptr_xchg(&v->task, acquired);
+
+ /* old cannot be passed to bpf_task_release() without a NULL check. */
+ bpf_task_release(old);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* Cannot release trusted task pointer which was not acquired. */
+ bpf_task_release(task);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
new file mode 100644
index 000000000000..8d5c05b41d53
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static int test_acquire_release(struct task_struct *task)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired) {
+ err = 1;
+ return 0;
+ }
+
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags)
+{
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ err = 1;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_task_kptr_get(&v->task);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
--
2.38.0
On Tue, Oct 18, 2022 at 07:23:23AM +0530, Kumar Kartikeya Dwivedi wrote:
Note: I'm responding to Kumar's email from v3 [0] here on v5 instead,
per his request on [1].
[0]: https://lore.kernel.org/all/CAP01T77PTK+bD2mBrxJShKNPhEypT2+nSHcr3=uuJbrghv_wFg@mail.gmail.com/
[1]: https://lore.kernel.org/all/CAP01T747PKC2jySOZCWu_gauHbBfaj4JE=hbtm4Z4C-Y8b3ZHg@mail.gmail.com/
My apologies again for the silly mistakes and having to send multiple
versions of the patch set.
> On Sat, 15 Oct 2022 at 01:45, David Vernet <[email protected]> wrote:
> >
> > A previous change added a series of kfuncs for storing struct
> > task_struct objects as referenced kptrs. This patch adds a new
> > task_kfunc test suite for validating their expected behavior.
> >
> > Signed-off-by: David Vernet <[email protected]>
> > ---
> > [...]
> > +
> > +SEC("tp_btf/task_newtask")
> > +int BPF_PROG(task_kfunc_acquire_trusted_nested, struct task_struct *task, u64 clone_flags)
> > +{
> > + struct task_struct *acquired;
> > +
> > + if (!is_test_kfunc_task())
> > + return 0;
> > +
> > + /* Can't invoke bpf_task_acquire() on a trusted pointer at a nonzero offset. */
> > + acquired = bpf_task_acquire(task->last_wakee);
>
> The comment is incorrect, that would be &task->last_wakee instead,
> this is PTR_TO_BTF_ID | PTR_NESTED.
Well, it's a nonzero offset from task. But yes, to your point, it's a
misleading comment because the offset is 0 in the verifier. I'll
rephrase this to reflect that it's a nested pointer (or a walked
pointer, whatever nomenclature we end up going with).
> > + if (!acquired)
> > + return 0;
> > + bpf_task_release(acquired);
> > +
> > + return 0;
> > +}
> > +
> > [...]
> > +
> > +static int test_acquire_release(struct task_struct *task)
> > +{
> > + struct task_struct *acquired;
> > +
> > + acquired = bpf_task_acquire(task);
>
> Unfortunately a side effect of this change is that now since
> PTR_TO_BTF_ID without ref_obj_id is considered trusted, the bpf_ct_*
> functions would begin working with tp_btf args. That probably needs
> be fixed so that they reject them (ideally with a failing test case to
> make sure it doesn't resurface), probably with a new suffix __ref/or
> __owned as added here [0].
>
> Alexei, since you've suggested avoiding adding that suffix, do you see
> any other way out here?
> It's questionable whether bpf_ct_set_timeout/status should work for CT
> not owned by the BPF program.
>
> [0]: https://lore.kernel.org/bpf/dfb859a6b76a9234baa194e795ae89cb7ca5694b.1662383493.git.lorenzo@kerne
Ah, yeah, it makes sense that some kfuncs really should only ever be
passed an object if the program owns a reference on it. Specifically for
e.g. bpf_ct_set_timeout/status() as you point out, which should only be
passed a struct nf_conn__init that was allocated by bpf_skb_ct_alloc().
It'd be nice if we could just add another flag like KF_REFERENCED_ARGS
or KF_OWNED_ARGS, which would allow a subset of arguments affored by
KF_TRUSTED_ARGS, only those with ref_obj_id > 0. That approach wouldn't
allow the flexibility of having per-argument specifications as your
proposal to use __ref or __owned suffixes on the names, but that already
applies to KF_TRUSTED_ARGS as well.
Personally I'm in agreement with Alexei that it's not a user friendly
API to use suffixes in the name like this. If we want to allow kfunc
authors to have per-argument specifiers, using compiler attributes
and/or some kind of tagging is probably the way to do it?
My proposal for now is to add a new KF_OWNED_ARGS flag, and to very
clearly document exactly what that and KF_TRUSTED_ARGS implies for
kfuncs. Later on, we could explore solutions for having per-arg
specifiers. What do you and Alexei think?
On Wed, Oct 19, 2022 at 11:09:05PM IST, David Vernet wrote:
> > On Sat, 15 Oct 2022 at 01:45, David Vernet <[email protected]> wrote:
> > >
> > > A previous change added a series of kfuncs for storing struct
> > > task_struct objects as referenced kptrs. This patch adds a new
> > > task_kfunc test suite for validating their expected behavior.
> > >
> > > Signed-off-by: David Vernet <[email protected]>
> > > ---
> > > [...]
> > > +
> > > +SEC("tp_btf/task_newtask")
> > > +int BPF_PROG(task_kfunc_acquire_trusted_nested, struct task_struct *task, u64 clone_flags)
> > > +{
> > > + struct task_struct *acquired;
> > > +
> > > + if (!is_test_kfunc_task())
> > > + return 0;
> > > +
> > > + /* Can't invoke bpf_task_acquire() on a trusted pointer at a nonzero offset. */
> > > + acquired = bpf_task_acquire(task->last_wakee);
> >
> > The comment is incorrect, that would be &task->last_wakee instead,
> > this is PTR_TO_BTF_ID | PTR_NESTED.
>
> Well, it's a nonzero offset from task. But yes, to your point, it's a
> misleading comment because the offset is 0 in the verifier. I'll
The load insn has a non-zero offset, but not the destination reg.
What you did was:
r1 = rX + offsetof(task_struct, last_wakee); // r1 == rX + off
r1 = *(r1); // r1 == PTR_TO_BTF_ID of task_struct, off = 0
Embedded structs are different,
&file->f_path means non-zero offset into PTR_TO_BTF_ID of struct file
> rephrase this to reflect that it's a nested pointer (or a walked
> pointer, whatever nomenclature we end up going with).
>
> > > + if (!acquired)
> > > + return 0;
> > > + bpf_task_release(acquired);
> > > +
> > > + return 0;
> > > +}
> > > +
> > > [...]
> > > +
> > > +static int test_acquire_release(struct task_struct *task)
> > > +{
> > > + struct task_struct *acquired;
> > > +
> > > + acquired = bpf_task_acquire(task);
> >
> > Unfortunately a side effect of this change is that now since
> > PTR_TO_BTF_ID without ref_obj_id is considered trusted, the bpf_ct_*
> > functions would begin working with tp_btf args. That probably needs
> > be fixed so that they reject them (ideally with a failing test case to
> > make sure it doesn't resurface), probably with a new suffix __ref/or
> > __owned as added here [0].
> >
> > Alexei, since you've suggested avoiding adding that suffix, do you see
> > any other way out here?
> > It's questionable whether bpf_ct_set_timeout/status should work for CT
> > not owned by the BPF program.
> >
> > [0]: https://lore.kernel.org/bpf/dfb859a6b76a9234baa194e795ae89cb7ca5694b.1662383493.git.lorenzo@kerne
>
> Ah, yeah, it makes sense that some kfuncs really should only ever be
> passed an object if the program owns a reference on it. Specifically for
> e.g. bpf_ct_set_timeout/status() as you point out, which should only be
> passed a struct nf_conn__init that was allocated by bpf_skb_ct_alloc().
>
> It'd be nice if we could just add another flag like KF_REFERENCED_ARGS
> or KF_OWNED_ARGS, which would allow a subset of arguments affored by
> KF_TRUSTED_ARGS, only those with ref_obj_id > 0. That approach wouldn't
> allow the flexibility of having per-argument specifications as your
> proposal to use __ref or __owned suffixes on the names, but that already
> applies to KF_TRUSTED_ARGS as well.
>
> Personally I'm in agreement with Alexei that it's not a user friendly
> API to use suffixes in the name like this. If we want to allow kfunc
> authors to have per-argument specifiers, using compiler attributes
> and/or some kind of tagging is probably the way to do it?
Sadly GCC doesn't support BTF tags. So this is the next best tagging approach.
There was also another horrendous proposal from me to add flags to each argument:
https://gist.github.com/kkdwivedi/7839cc9e4f002acc3e15350b1b86c88c#file-kfunc-arg-patch-L137
>
> My proposal for now is to add a new KF_OWNED_ARGS flag, and to very
> clearly document exactly what that and KF_TRUSTED_ARGS implies for
> kfuncs. Later on, we could explore solutions for having per-arg
> specifiers. What do you and Alexei think?
Based on your proposal above:
For KF_OWNED_ARGS, any PTR_TO_BTF_ID should have non-zero ref_obj_id, for
KF_TRUSTED_ARGS there will be no such condition. Otherwise they will be the
same. Then switch all CT helpers to KF_OWNED_ARGS.
This should work fine.
On Thu, Oct 20, 2022 at 11:49:03AM +0530, Kumar Kartikeya Dwivedi wrote:
> On Wed, Oct 19, 2022 at 11:09:05PM IST, David Vernet wrote:
> > > On Sat, 15 Oct 2022 at 01:45, David Vernet <[email protected]> wrote:
> > > >
> > > > A previous change added a series of kfuncs for storing struct
> > > > task_struct objects as referenced kptrs. This patch adds a new
> > > > task_kfunc test suite for validating their expected behavior.
> > > >
> > > > Signed-off-by: David Vernet <[email protected]>
> > > > ---
> > > > [...]
> > > > +
> > > > +SEC("tp_btf/task_newtask")
> > > > +int BPF_PROG(task_kfunc_acquire_trusted_nested, struct task_struct *task, u64 clone_flags)
> > > > +{
> > > > + struct task_struct *acquired;
> > > > +
> > > > + if (!is_test_kfunc_task())
> > > > + return 0;
> > > > +
> > > > + /* Can't invoke bpf_task_acquire() on a trusted pointer at a nonzero offset. */
> > > > + acquired = bpf_task_acquire(task->last_wakee);
> > >
> > > The comment is incorrect, that would be &task->last_wakee instead,
> > > this is PTR_TO_BTF_ID | PTR_NESTED.
> >
> > Well, it's a nonzero offset from task. But yes, to your point, it's a
> > misleading comment because the offset is 0 in the verifier. I'll
>
> The load insn has a non-zero offset, but not the destination reg.
Yeah, this is what I meant by hand-wavily saying "0 in the verifier". I
agree that the comment is incorrect as stated, I'll fix it in the next
revision per your suggestion.
> What you did was:
> r1 = rX + offsetof(task_struct, last_wakee); // r1 == rX + off
> r1 = *(r1); // r1 == PTR_TO_BTF_ID of task_struct, off = 0
>
> Embedded structs are different,
> &file->f_path means non-zero offset into PTR_TO_BTF_ID of struct file
>
> > rephrase this to reflect that it's a nested pointer (or a walked
> > pointer, whatever nomenclature we end up going with).
> >
> > > > + if (!acquired)
> > > > + return 0;
> > > > + bpf_task_release(acquired);
> > > > +
> > > > + return 0;
> > > > +}
> > > > +
> > > > [...]
> > > > +
> > > > +static int test_acquire_release(struct task_struct *task)
> > > > +{
> > > > + struct task_struct *acquired;
> > > > +
> > > > + acquired = bpf_task_acquire(task);
> > >
> > > Unfortunately a side effect of this change is that now since
> > > PTR_TO_BTF_ID without ref_obj_id is considered trusted, the bpf_ct_*
> > > functions would begin working with tp_btf args. That probably needs
> > > be fixed so that they reject them (ideally with a failing test case to
> > > make sure it doesn't resurface), probably with a new suffix __ref/or
> > > __owned as added here [0].
> > >
> > > Alexei, since you've suggested avoiding adding that suffix, do you see
> > > any other way out here?
> > > It's questionable whether bpf_ct_set_timeout/status should work for CT
> > > not owned by the BPF program.
> > >
> > > [0]: https://lore.kernel.org/bpf/dfb859a6b76a9234baa194e795ae89cb7ca5694b.1662383493.git.lorenzo@kerne
> >
> > Ah, yeah, it makes sense that some kfuncs really should only ever be
> > passed an object if the program owns a reference on it. Specifically for
> > e.g. bpf_ct_set_timeout/status() as you point out, which should only be
> > passed a struct nf_conn__init that was allocated by bpf_skb_ct_alloc().
> >
> > It'd be nice if we could just add another flag like KF_REFERENCED_ARGS
> > or KF_OWNED_ARGS, which would allow a subset of arguments affored by
> > KF_TRUSTED_ARGS, only those with ref_obj_id > 0. That approach wouldn't
> > allow the flexibility of having per-argument specifications as your
> > proposal to use __ref or __owned suffixes on the names, but that already
> > applies to KF_TRUSTED_ARGS as well.
> >
> > Personally I'm in agreement with Alexei that it's not a user friendly
> > API to use suffixes in the name like this. If we want to allow kfunc
> > authors to have per-argument specifiers, using compiler attributes
> > and/or some kind of tagging is probably the way to do it?
>
> Sadly GCC doesn't support BTF tags. So this is the next best tagging approach.
>
> There was also another horrendous proposal from me to add flags to each argument:
> https://gist.github.com/kkdwivedi/7839cc9e4f002acc3e15350b1b86c88c#file-kfunc-arg-patch-L137
I feel like I must be missing something given that you said this was
horrendous, but I actually don't hate this. This is pretty similar to
what we do for helpers anyways, no? I certainly prefer it over the
suffix naming approach. IMO the problem with that is it kind of requires
users to dive into the verifier to understand how to implement kfuncs.
That also reminds me that in the next revision, I'll also update the
documentation for KF_TRUSTED_ARGS (and KF_OWNED_ARGS) to reflect the new
state of things.
> > My proposal for now is to add a new KF_OWNED_ARGS flag, and to very
> > clearly document exactly what that and KF_TRUSTED_ARGS implies for
> > kfuncs. Later on, we could explore solutions for having per-arg
> > specifiers. What do you and Alexei think?
>
> Based on your proposal above:
>
> For KF_OWNED_ARGS, any PTR_TO_BTF_ID should have non-zero ref_obj_id, for
> KF_TRUSTED_ARGS there will be no such condition. Otherwise they will be the
> same. Then switch all CT helpers to KF_OWNED_ARGS.
Exactly
> This should work fine.
Great, I'll get to work on this. Unless Alexei or anyone else objects?