From: Feng Zhou <[email protected]>
Trace sched related functions, such as enqueue_task_fair, it is necessary to
specify a task instead of the current task which within a given cgroup.
Feng Zhou (2):
bpf: Add bpf_task_under_cgroup() kfunc
selftests/bpf: Add testcase for bpf_task_under_cgroup
Changelog:
v3->v4: Addressed comments from Yonghong Song
- Modify test cases and test other tasks, not the current task.
Details in here:
https://lore.kernel.org/all/[email protected]/
v2->v3: Addressed comments from Alexei Starovoitov
- Modify the comment information of the function.
- Narrow down the testcase's hook point
Details in here:
https://lore.kernel.org/all/[email protected]/
v1->v2: Addressed comments from Alexei Starovoitov
- Add kfunc instead.
Details in here:
https://lore.kernel.org/all/[email protected]/
kernel/bpf/helpers.c | 20 +++++++
tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
.../bpf/prog_tests/task_under_cgroup.c | 55 +++++++++++++++++++
.../bpf/progs/test_task_under_cgroup.c | 51 +++++++++++++++++
4 files changed, 127 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
create mode 100644 tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
--
2.20.1
From: Feng Zhou <[email protected]>
Add a kfunc that's similar to the bpf_current_task_under_cgroup.
The difference is that it is a designated task.
When hook sched related functions, sometimes it is necessary to
specify a task instead of the current task.
Signed-off-by: Feng Zhou <[email protected]>
---
kernel/bpf/helpers.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bb6b4637ebf2..453cbd312366 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2149,6 +2149,25 @@ __bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
return NULL;
return cgrp;
}
+
+/**
+ * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test
+ * task's membership of cgroup ancestry.
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+__bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task,
+ struct cgroup *ancestor)
+{
+ if (unlikely(!ancestor || !task))
+ return -EINVAL;
+
+ return task_under_cgroup_hierarchy(task, ancestor);
+}
#endif /* CONFIG_CGROUPS */
/**
@@ -2400,6 +2419,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_SET8_END(generic_btf_ids)
--
2.20.1
From: Feng Zhou <[email protected]>
test_progs:
Tests new kfunc bpf_task_under_cgroup().
The bpf program saves the new task's pid within a given cgroup to
the remote_pid, which is convenient for the user-mode program to
verify the test correctness.
The user-mode program creates its own mount namespace, and mounts the
cgroupsv2 hierarchy in there, call the fork syscall, then check if
remote_pid and local_pid are unequal.
Signed-off-by: Feng Zhou <[email protected]>
---
tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
.../bpf/prog_tests/task_under_cgroup.c | 55 +++++++++++++++++++
.../bpf/progs/test_task_under_cgroup.c | 51 +++++++++++++++++
3 files changed, 107 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
create mode 100644 tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index c7463f3ec3c0..5061d9e24c16 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -26,3 +26,4 @@ user_ringbuf # failed to find kernel BTF type ID of
verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
xdp_metadata # JIT does not support calling kernel function (kfunc)
+test_task_under_cgroup # JIT does not support calling kernel function (kfunc)
diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
new file mode 100644
index 000000000000..5e79dff86dec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include "test_task_under_cgroup.skel.h"
+
+#define FOO "/foo"
+
+void test_task_under_cgroup(void)
+{
+ struct test_task_under_cgroup *skel;
+ int ret, foo = -1;
+ pid_t pid;
+
+ foo = test__join_cgroup(FOO);
+ if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
+ return;
+
+ skel = test_task_under_cgroup__open();
+ if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
+ goto cleanup;
+
+ skel->rodata->local_pid = getpid();
+ skel->bss->remote_pid = getpid();
+ skel->rodata->cgid = get_cgroup_id(FOO);
+
+ ret = test_task_under_cgroup__load(skel);
+ if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
+ goto cleanup;
+
+ ret = test_task_under_cgroup__attach(skel);
+ if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
+ goto cleanup;
+
+ pid = fork();
+ if (pid == 0)
+ exit(0);
+ else if (pid == -1)
+ printf("Couldn't fork process!\n");
+
+ wait(NULL);
+
+ test_task_under_cgroup__detach(skel);
+
+ ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid,
+ "test task_under_cgroup");
+
+cleanup:
+ if (foo >= 0)
+ close(foo);
+
+ test_task_under_cgroup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
new file mode 100644
index 000000000000..5bcb726d6d0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+const volatile int local_pid;
+const volatile long cgid;
+int remote_pid;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags)
+{
+ struct cgroup *cgrp = NULL;
+ struct task_struct *acquired = NULL;
+
+ if (local_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ if (local_pid == acquired->tgid)
+ goto out;
+
+ cgrp = bpf_cgroup_from_id(cgid);
+ if (!cgrp)
+ goto out;
+
+ if (bpf_task_under_cgroup(acquired, cgrp))
+ remote_pid = acquired->tgid;
+
+out:
+ if (acquired)
+ bpf_task_release(acquired);
+ if (cgrp)
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.20.1
On 4/28/23 12:17 AM, Feng zhou wrote:
> From: Feng Zhou <[email protected]>
>
> Add a kfunc that's similar to the bpf_current_task_under_cgroup.
> The difference is that it is a designated task.
>
> When hook sched related functions, sometimes it is necessary to
> specify a task instead of the current task.
>
> Signed-off-by: Feng Zhou <[email protected]>
Acked-by: Yonghong Song <[email protected]>
On 4/28/23 12:17 AM, Feng zhou wrote:
> From: Feng Zhou <[email protected]>
>
> test_progs:
> Tests new kfunc bpf_task_under_cgroup().
>
> The bpf program saves the new task's pid within a given cgroup to
> the remote_pid, which is convenient for the user-mode program to
> verify the test correctness.
>
> The user-mode program creates its own mount namespace, and mounts the
> cgroupsv2 hierarchy in there, call the fork syscall, then check if
> remote_pid and local_pid are unequal.
>
> Signed-off-by: Feng Zhou <[email protected]>
Ack with a few nits below.
Acked-by: Yonghong Song <[email protected]>
> ---
> tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
> .../bpf/prog_tests/task_under_cgroup.c | 55 +++++++++++++++++++
> .../bpf/progs/test_task_under_cgroup.c | 51 +++++++++++++++++
> 3 files changed, 107 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
> create mode 100644 tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
>
> diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
> index c7463f3ec3c0..5061d9e24c16 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.s390x
> +++ b/tools/testing/selftests/bpf/DENYLIST.s390x
> @@ -26,3 +26,4 @@ user_ringbuf # failed to find kernel BTF type ID of
> verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?)
> xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
> xdp_metadata # JIT does not support calling kernel function (kfunc)
> +test_task_under_cgroup # JIT does not support calling kernel function (kfunc)
> diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
> new file mode 100644
> index 000000000000..5e79dff86dec
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
> @@ -0,0 +1,55 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2023 Bytedance */
> +
> +#include <sys/syscall.h>
> +#include <test_progs.h>
> +#include <cgroup_helpers.h>
> +#include "test_task_under_cgroup.skel.h"
> +
> +#define FOO "/foo"
> +
> +void test_task_under_cgroup(void)
> +{
> + struct test_task_under_cgroup *skel;
> + int ret, foo = -1;
> + pid_t pid;
> +
> + foo = test__join_cgroup(FOO);
> + if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
> + return;
> +
> + skel = test_task_under_cgroup__open();
> + if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
> + goto cleanup;
> +
> + skel->rodata->local_pid = getpid();
> + skel->bss->remote_pid = getpid();
> + skel->rodata->cgid = get_cgroup_id(FOO);
> +
> + ret = test_task_under_cgroup__load(skel);
> + if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
> + goto cleanup;
> +
> + ret = test_task_under_cgroup__attach(skel);
> + if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
> + goto cleanup;
> +
> + pid = fork();
> + if (pid == 0)
> + exit(0);
> + else if (pid == -1)
> + printf("Couldn't fork process!\n");
ASSERT_* is preferred compared to 'printf'. Maybe ASSERT_TRUE(0,
"Couldn't fork process")?
> +
> + wait(NULL);
> +
> + test_task_under_cgroup__detach(skel);
> +
> + ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid,
> + "test task_under_cgroup");
> +
> +cleanup:
> + if (foo >= 0)
"if (foo >= 0)" is not needed. 'foo' is guaranteed ">= 0" as this point.
> + close(foo);
> +
> + test_task_under_cgroup__destroy(skel);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
> new file mode 100644
> index 000000000000..5bcb726d6d0a
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
> @@ -0,0 +1,51 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2023 Bytedance */
> +
> +#include <vmlinux.h>
> +#include <bpf/bpf_tracing.h>
> +#include <bpf/bpf_helpers.h>
> +
> +#include "bpf_misc.h"
> +
> +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
> +long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
> +void bpf_cgroup_release(struct cgroup *p) __ksym;
> +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
> +void bpf_task_release(struct task_struct *p) __ksym;
> +
> +const volatile int local_pid;
> +const volatile long cgid;
cgid cannot be a negative number. So let us do
const volatile __u64 cgid;
> +int remote_pid;
> +
> +SEC("tp_btf/task_newtask")
> +int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags)
> +{
> + struct cgroup *cgrp = NULL;
> + struct task_struct *acquired = NULL;
"acquired = NULL" is not needed. Just do "struct task_struct *acquired;".
> +
> + if (local_pid != (bpf_get_current_pid_tgid() >> 32))
> + return 0;
> +
> + acquired = bpf_task_acquire(task);
> + if (!acquired)
> + return 0;
> +
> + if (local_pid == acquired->tgid)
> + goto out;
> +
> + cgrp = bpf_cgroup_from_id(cgid);
> + if (!cgrp)
> + goto out;
> +
> + if (bpf_task_under_cgroup(acquired, cgrp))
> + remote_pid = acquired->tgid;
> +
> +out:
> + if (acquired)
> + bpf_task_release(acquired);
> + if (cgrp)
> + bpf_cgroup_release(cgrp);
> + return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
在 2023/4/29 00:32, Yonghong Song 写道:
>
>
> On 4/28/23 12:17 AM, Feng zhou wrote:
>> From: Feng Zhou <[email protected]>
>>
>> test_progs:
>> Tests new kfunc bpf_task_under_cgroup().
>>
>> The bpf program saves the new task's pid within a given cgroup to
>> the remote_pid, which is convenient for the user-mode program to
>> verify the test correctness.
>>
>> The user-mode program creates its own mount namespace, and mounts the
>> cgroupsv2 hierarchy in there, call the fork syscall, then check if
>> remote_pid and local_pid are unequal.
>>
>> Signed-off-by: Feng Zhou <[email protected]>
>
> Ack with a few nits below.
>
> Acked-by: Yonghong Song <[email protected]>
>
>> ---
>> tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
>> .../bpf/prog_tests/task_under_cgroup.c | 55 +++++++++++++++++++
>> .../bpf/progs/test_task_under_cgroup.c | 51 +++++++++++++++++
>> 3 files changed, 107 insertions(+)
>> create mode 100644
>> tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
>> create mode 100644
>> tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
>>
>> diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x
>> b/tools/testing/selftests/bpf/DENYLIST.s390x
>> index c7463f3ec3c0..5061d9e24c16 100644
>> --- a/tools/testing/selftests/bpf/DENYLIST.s390x
>> +++ b/tools/testing/selftests/bpf/DENYLIST.s390x
>> @@ -26,3 +26,4 @@ user_ringbuf # failed to
>> find kernel BTF type ID of
>> verif_stats #
>> trace_vprintk__open_and_load unexpected error:
>> -9 (?)
>> xdp_bonding # failed to auto-attach
>> program 'trace_on_entry': -524 (trampoline)
>> xdp_metadata # JIT does not support
>> calling kernel function (kfunc)
>> +test_task_under_cgroup # JIT does not support
>> calling kernel function (kfunc)
>> diff --git
>> a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
>> b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
>> new file mode 100644
>> index 000000000000..5e79dff86dec
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
>> @@ -0,0 +1,55 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (c) 2023 Bytedance */
>> +
>> +#include <sys/syscall.h>
>> +#include <test_progs.h>
>> +#include <cgroup_helpers.h>
>> +#include "test_task_under_cgroup.skel.h"
>> +
>> +#define FOO "/foo"
>> +
>> +void test_task_under_cgroup(void)
>> +{
>> + struct test_task_under_cgroup *skel;
>> + int ret, foo = -1;
>> + pid_t pid;
>> +
>> + foo = test__join_cgroup(FOO);
>> + if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
>> + return;
>> +
>> + skel = test_task_under_cgroup__open();
>> + if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
>> + goto cleanup;
>> +
>> + skel->rodata->local_pid = getpid();
>> + skel->bss->remote_pid = getpid();
>> + skel->rodata->cgid = get_cgroup_id(FOO);
>> +
>> + ret = test_task_under_cgroup__load(skel);
>> + if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
>> + goto cleanup;
>> +
>> + ret = test_task_under_cgroup__attach(skel);
>> + if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
>> + goto cleanup;
>> +
>> + pid = fork();
>> + if (pid == 0)
>> + exit(0);
>> + else if (pid == -1)
>> + printf("Couldn't fork process!\n");
>
> ASSERT_* is preferred compared to 'printf'. Maybe ASSERT_TRUE(0,
> "Couldn't fork process")?
>
Will do.
>> +
>> + wait(NULL);
>> +
>> + test_task_under_cgroup__detach(skel);
>> +
>> + ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid,
>> + "test task_under_cgroup");
>> +
>> +cleanup:
>> + if (foo >= 0)
>
> "if (foo >= 0)" is not needed. 'foo' is guaranteed ">= 0" as this point.
>
Yes
>> + close(foo);
>> +
>> + test_task_under_cgroup__destroy(skel);
>> +}
>> diff --git
>> a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
>> b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
>> new file mode 100644
>> index 000000000000..5bcb726d6d0a
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
>> @@ -0,0 +1,51 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/* Copyright (c) 2023 Bytedance */
>> +
>> +#include <vmlinux.h>
>> +#include <bpf/bpf_tracing.h>
>> +#include <bpf/bpf_helpers.h>
>> +
>> +#include "bpf_misc.h"
>> +
>> +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
>> +long bpf_task_under_cgroup(struct task_struct *task, struct cgroup
>> *ancestor) __ksym;
>> +void bpf_cgroup_release(struct cgroup *p) __ksym;
>> +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
>> +void bpf_task_release(struct task_struct *p) __ksym;
>> +
>> +const volatile int local_pid;
>> +const volatile long cgid;
>
> cgid cannot be a negative number. So let us do
> const volatile __u64 cgid;
>
Ok
>> +int remote_pid;
>> +
>> +SEC("tp_btf/task_newtask")
>> +int BPF_PROG(handle__task_newtask, struct task_struct *task, u64
>> clone_flags)
>> +{
>> + struct cgroup *cgrp = NULL;
>> + struct task_struct *acquired = NULL;
>
> "acquired = NULL" is not needed. Just do "struct task_struct *acquired;".
>
Ok
>> +
>> + if (local_pid != (bpf_get_current_pid_tgid() >> 32))
>> + return 0;
>> +
>> + acquired = bpf_task_acquire(task);
>> + if (!acquired)
>> + return 0;
>> +
>> + if (local_pid == acquired->tgid)
>> + goto out;
>> +
>> + cgrp = bpf_cgroup_from_id(cgid);
>> + if (!cgrp)
>> + goto out;
>> +
>> + if (bpf_task_under_cgroup(acquired, cgrp))
>> + remote_pid = acquired->tgid;
>> +
>> +out:
>> + if (acquired)
>> + bpf_task_release(acquired);
>> + if (cgrp)
>> + bpf_cgroup_release(cgrp);
>> + return 0;
>> +}
>> +
>> +char _license[] SEC("license") = "GPL";