This patch enhances the BPF helpers by adding a kfunc to retrieve the
cgroup v2 of a task, addressing a previous limitation where only
bpf_task_get_cgroup1 was available for cgroup v1. The new kfunc is
particularly useful for scenarios where obtaining the cgroup ID of a
task other than the "current" one is necessary, which the existing
bpf_get_current_cgroup_id helper cannot accommodate. A specific use
case at Netflix involved the sched_switch tracepoint, where we had to
get the cgroup IDs of both the prev and next tasks.
The bpf_task_get_cgroup kfunc acquires and returns a reference to a
task's default cgroup, ensuring thread-safe access by correctly
implementing RCU read locking and unlocking. It leverages the existing
cgroup.h helper, and cgroup_tryget to safely acquire a reference to it.
Signed-off-by: Jose Fernandez <[email protected]>
Reviewed-by: Tycho Andersen <[email protected]>
Acked-by: Yonghong Song <[email protected]>
Acked-by: Stanislav Fomichev <[email protected]>
---
V2 -> V3: No changes
V1 -> V2: Return a pointer to the cgroup instead of the cgroup ID
kernel/bpf/helpers.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a89587859571..bbd19d5eedb6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2266,6 +2266,31 @@ bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
return NULL;
return cgrp;
}
+
+/**
+ * bpf_task_get_cgroup - Acquire a reference to the default cgroup of a task.
+ * @task: The target task
+ *
+ * This function returns the task's default cgroup, primarily
+ * designed for use with cgroup v2. In cgroup v1, the concept of default
+ * cgroup varies by subsystem, and while this function will work with
+ * cgroup v1, it's recommended to use bpf_task_get_cgroup1 instead.
+ * A cgroup returned by this kfunc which is not subsequently stored in a
+ * map, must be released by calling bpf_cgroup_release().
+ *
+ * Return: On success, the cgroup is returned. On failure, NULL is returned.
+ */
+__bpf_kfunc struct cgroup *bpf_task_get_cgroup(struct task_struct *task)
+{
+ struct cgroup *cgrp;
+
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(task);
+ if (!cgroup_tryget(cgrp))
+ cgrp = NULL;
+ rcu_read_unlock();
+ return cgrp;
+}
#endif /* CONFIG_CGROUPS */
/**
@@ -2573,6 +2598,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_under_cgroup, KF_RCU)
BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_task_get_cgroup, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
#endif
BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_throw)
base-commit: c733239f8f530872a1f80d8c45dcafbaff368737
--
2.40.1
This patch adds a selftest for the `bpf_task_get_cgroup` kfunc. The test
focuses on the use case of obtaining the cgroup ID of the previous task
in a `sched_switch` tracepoint.
The selftest involves creating a test cgroup, attaching a BPF program
that utilizes the `bpf_task_get_cgroup` during a `sched_switch`
tracepoint, and validating that the obtained cgroup ID for the previous
task matches the expected cgroup ID.
Signed-off-by: Jose Fernandez <[email protected]>
Reviewed-by: Tycho Andersen <[email protected]>
---
V2 -> v3: Use _open_and_load(), usleep helper, and drop map usage
V1 -> V2: Refactor test to work with a cgroup pointer instead of the ID
.../bpf/prog_tests/task_get_cgroup.c | 43 +++++++++++++++++++
.../bpf/progs/test_task_get_cgroup.c | 32 ++++++++++++++
2 files changed, 75 insertions(+)
create mode 100644 tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c
create mode 100644 tools/testing/selftests/bpf/progs/test_task_get_cgroup.c
diff --git a/tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c
new file mode 100644
index 000000000000..031623067e7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2024 Netflix, Inc.
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include "test_task_get_cgroup.skel.h"
+#include <unistd.h>
+
+#define TEST_CGROUP "/test-task-get-cgroup/"
+
+void test_task_get_cgroup(void)
+{
+ struct test_task_get_cgroup *skel;
+ int err, fd;
+ __u64 expected_cgroup_id;
+
+ fd = test__join_cgroup(TEST_CGROUP);
+ if (!ASSERT_OK(fd < 0, "test_join_cgroup_TEST_CGROUP"))
+ return;
+
+ skel = test_task_get_cgroup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_task_get_cgroup__open_and_load"))
+ goto cleanup;
+
+ err = test_task_get_cgroup__attach(skel);
+ if (!ASSERT_OK(err, "test_task_get_cgroup__attach"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ expected_cgroup_id = get_cgroup_id(TEST_CGROUP);
+ if (!ASSERT_GT(expected_cgroup_id, 0, "get_cgroup_id"))
+ goto cleanup;
+
+ /* Trigger nanosleep to enter the sched_switch tracepoint */
+ /* The previous task should be this process */
+ usleep(100);
+
+ ASSERT_EQ(skel->bss->cgroup_id, expected_cgroup_id, "cgroup_id");
+
+cleanup:
+ test_task_get_cgroup__destroy(skel);
+ close(fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_task_get_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_get_cgroup.c
new file mode 100644
index 000000000000..30d4499c6bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_get_cgroup.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2024 Netflix, Inc.
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct cgroup *bpf_task_get_cgroup(struct task_struct *task) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+int pid = 0;
+u64 cgroup_id = 0;
+
+SEC("tp_btf/sched_switch")
+int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev,
+ struct task_struct *next)
+{
+ struct cgroup *cgrp;
+
+ if (prev->pid != pid)
+ return 0;
+
+ cgrp = bpf_task_get_cgroup(prev);
+ if (cgrp == NULL)
+ return 0;
+ cgroup_id = cgrp->kn->id;
+
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.40.1
On Mon, Mar 18, 2024 at 11:03:02PM -0600, Jose Fernandez wrote:
> This patch adds a selftest for the `bpf_task_get_cgroup` kfunc. The test
> focuses on the use case of obtaining the cgroup ID of the previous task
> in a `sched_switch` tracepoint.
>
> The selftest involves creating a test cgroup, attaching a BPF program
> that utilizes the `bpf_task_get_cgroup` during a `sched_switch`
> tracepoint, and validating that the obtained cgroup ID for the previous
> task matches the expected cgroup ID.
>
> Signed-off-by: Jose Fernandez <[email protected]>
> Reviewed-by: Tycho Andersen <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
jirka
> ---
> V2 -> v3: Use _open_and_load(), usleep helper, and drop map usage
> V1 -> V2: Refactor test to work with a cgroup pointer instead of the ID
>
> .../bpf/prog_tests/task_get_cgroup.c | 43 +++++++++++++++++++
> .../bpf/progs/test_task_get_cgroup.c | 32 ++++++++++++++
> 2 files changed, 75 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c
> create mode 100644 tools/testing/selftests/bpf/progs/test_task_get_cgroup.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c
> new file mode 100644
> index 000000000000..031623067e7e
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/task_get_cgroup.c
> @@ -0,0 +1,43 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright 2024 Netflix, Inc.
> +
> +#include <test_progs.h>
> +#include <cgroup_helpers.h>
> +#include "test_task_get_cgroup.skel.h"
> +#include <unistd.h>
> +
> +#define TEST_CGROUP "/test-task-get-cgroup/"
> +
> +void test_task_get_cgroup(void)
> +{
> + struct test_task_get_cgroup *skel;
> + int err, fd;
> + __u64 expected_cgroup_id;
> +
> + fd = test__join_cgroup(TEST_CGROUP);
> + if (!ASSERT_OK(fd < 0, "test_join_cgroup_TEST_CGROUP"))
> + return;
> +
> + skel = test_task_get_cgroup__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "test_task_get_cgroup__open_and_load"))
> + goto cleanup;
> +
> + err = test_task_get_cgroup__attach(skel);
> + if (!ASSERT_OK(err, "test_task_get_cgroup__attach"))
> + goto cleanup;
> +
> + skel->bss->pid = getpid();
> + expected_cgroup_id = get_cgroup_id(TEST_CGROUP);
> + if (!ASSERT_GT(expected_cgroup_id, 0, "get_cgroup_id"))
> + goto cleanup;
> +
> + /* Trigger nanosleep to enter the sched_switch tracepoint */
> + /* The previous task should be this process */
> + usleep(100);
> +
> + ASSERT_EQ(skel->bss->cgroup_id, expected_cgroup_id, "cgroup_id");
> +
> +cleanup:
> + test_task_get_cgroup__destroy(skel);
> + close(fd);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_task_get_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_get_cgroup.c
> new file mode 100644
> index 000000000000..30d4499c6bc5
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_task_get_cgroup.c
> @@ -0,0 +1,32 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright 2024 Netflix, Inc.
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +struct cgroup *bpf_task_get_cgroup(struct task_struct *task) __ksym;
> +void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
> +
> +int pid = 0;
> +u64 cgroup_id = 0;
> +
> +SEC("tp_btf/sched_switch")
> +int BPF_PROG(sched_switch, bool preempt, struct task_struct *prev,
> + struct task_struct *next)
> +{
> + struct cgroup *cgrp;
> +
> + if (prev->pid != pid)
> + return 0;
> +
> + cgrp = bpf_task_get_cgroup(prev);
> + if (cgrp == NULL)
> + return 0;
> + cgroup_id = cgrp->kn->id;
> +
> + bpf_cgroup_release(cgrp);
> + return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.40.1
>
On Mon, Mar 18, 2024 at 10:04 PM Jose Fernandez <[email protected]> wrote:
>
> This patch enhances the BPF helpers by adding a kfunc to retrieve the
> cgroup v2 of a task, addressing a previous limitation where only
> bpf_task_get_cgroup1 was available for cgroup v1. The new kfunc is
> particularly useful for scenarios where obtaining the cgroup ID of a
> task other than the "current" one is necessary, which the existing
> bpf_get_current_cgroup_id helper cannot accommodate. A specific use
> case at Netflix involved the sched_switch tracepoint, where we had to
> get the cgroup IDs of both the prev and next tasks.
>
> The bpf_task_get_cgroup kfunc acquires and returns a reference to a
> task's default cgroup, ensuring thread-safe access by correctly
> implementing RCU read locking and unlocking. It leverages the existing
> cgroup.h helper, and cgroup_tryget to safely acquire a reference to it.
>
> Signed-off-by: Jose Fernandez <[email protected]>
> Reviewed-by: Tycho Andersen <[email protected]>
> Acked-by: Yonghong Song <[email protected]>
> Acked-by: Stanislav Fomichev <[email protected]>
> ---
> V2 -> V3: No changes
> V1 -> V2: Return a pointer to the cgroup instead of the cgroup ID
>
> kernel/bpf/helpers.c | 26 ++++++++++++++++++++++++++
> 1 file changed, 26 insertions(+)
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index a89587859571..bbd19d5eedb6 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -2266,6 +2266,31 @@ bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
> return NULL;
> return cgrp;
> }
> +
> +/**
> + * bpf_task_get_cgroup - Acquire a reference to the default cgroup of a task.
> + * @task: The target task
> + *
> + * This function returns the task's default cgroup, primarily
> + * designed for use with cgroup v2. In cgroup v1, the concept of default
> + * cgroup varies by subsystem, and while this function will work with
> + * cgroup v1, it's recommended to use bpf_task_get_cgroup1 instead.
> + * A cgroup returned by this kfunc which is not subsequently stored in a
> + * map, must be released by calling bpf_cgroup_release().
> + *
> + * Return: On success, the cgroup is returned. On failure, NULL is returned.
> + */
> +__bpf_kfunc struct cgroup *bpf_task_get_cgroup(struct task_struct *task)
> +{
> + struct cgroup *cgrp;
> +
> + rcu_read_lock();
> + cgrp = task_dfl_cgroup(task);
> + if (!cgroup_tryget(cgrp))
> + cgrp = NULL;
> + rcu_read_unlock();
> + return cgrp;
> +}
Tejun,
Does this patch look good to you?
On Mon, Mar 18, 2024 at 11:03:01PM -0600, Jose Fernandez wrote:
> This patch enhances the BPF helpers by adding a kfunc to retrieve the
> cgroup v2 of a task, addressing a previous limitation where only
> bpf_task_get_cgroup1 was available for cgroup v1. The new kfunc is
> particularly useful for scenarios where obtaining the cgroup ID of a
> task other than the "current" one is necessary, which the existing
> bpf_get_current_cgroup_id helper cannot accommodate. A specific use
> case at Netflix involved the sched_switch tracepoint, where we had to
> get the cgroup IDs of both the prev and next tasks.
>
> The bpf_task_get_cgroup kfunc acquires and returns a reference to a
> task's default cgroup, ensuring thread-safe access by correctly
> implementing RCU read locking and unlocking. It leverages the existing
> cgroup.h helper, and cgroup_tryget to safely acquire a reference to it.
>
> Signed-off-by: Jose Fernandez <[email protected]>
> Reviewed-by: Tycho Andersen <[email protected]>
> Acked-by: Yonghong Song <[email protected]>
> Acked-by: Stanislav Fomichev <[email protected]>
Acked-by: Tejun Heo <[email protected]>
but some questions below
> +__bpf_kfunc struct cgroup *bpf_task_get_cgroup(struct task_struct *task)
> +{
> + struct cgroup *cgrp;
> +
> + rcu_read_lock();
> + cgrp = task_dfl_cgroup(task);
> + if (!cgroup_tryget(cgrp))
> + cgrp = NULL;
> + rcu_read_unlock();
> + return cgrp;
> +}
So, as this is a lot easier in cgroup2, the above can probably be written
directly in BPF (untested and not sure the necessary annotations are in
place, so please take it with a big grain of salt):
bpf_rcu_read_lock();
cgrp = task->cgroups->dfl_cgrp;
cgrp = bpf_cgroup_from_id(cgrp->kn.id);
bpf_rcu_read_unlock();
If all you need is ID, it's even simpler:
bpf_rcu_read_lock();
cgrp_id = task->cgroups->dfl_cgrp->kn.id;
bpf_rcu_read_unlock();
In the first example, it's not great that we go from task pointer to cgroup
pointer to ID and then back to acquired cgroup pointer. I wonder whether
what we really want is to support something like the following:
bpf_rcu_read_lock();
cgrp = bpf_cgroup_tryget(task->cgroups->dfl_cgrp);
bpf_rcu_read_unlock();
What do you think?
Thanks.
--
tejun
On Wed, Mar 20, 2024 at 7:41 AM Tejun Heo <[email protected]> wrote:
>
> On Mon, Mar 18, 2024 at 11:03:01PM -0600, Jose Fernandez wrote:
> > This patch enhances the BPF helpers by adding a kfunc to retrieve the
> > cgroup v2 of a task, addressing a previous limitation where only
> > bpf_task_get_cgroup1 was available for cgroup v1. The new kfunc is
> > particularly useful for scenarios where obtaining the cgroup ID of a
> > task other than the "current" one is necessary, which the existing
> > bpf_get_current_cgroup_id helper cannot accommodate. A specific use
> > case at Netflix involved the sched_switch tracepoint, where we had to
> > get the cgroup IDs of both the prev and next tasks.
> >
> > The bpf_task_get_cgroup kfunc acquires and returns a reference to a
> > task's default cgroup, ensuring thread-safe access by correctly
> > implementing RCU read locking and unlocking. It leverages the existing
> > cgroup.h helper, and cgroup_tryget to safely acquire a reference to it.
> >
> > Signed-off-by: Jose Fernandez <[email protected]>
> > Reviewed-by: Tycho Andersen <[email protected]>
> > Acked-by: Yonghong Song <[email protected]>
> > Acked-by: Stanislav Fomichev <[email protected]>
>
> Acked-by: Tejun Heo <[email protected]>
>
> but some questions below
>
> > +__bpf_kfunc struct cgroup *bpf_task_get_cgroup(struct task_struct *task)
> > +{
> > + struct cgroup *cgrp;
> > +
> > + rcu_read_lock();
> > + cgrp = task_dfl_cgroup(task);
> > + if (!cgroup_tryget(cgrp))
> > + cgrp = NULL;
> > + rcu_read_unlock();
> > + return cgrp;
> > +}
>
> So, as this is a lot easier in cgroup2, the above can probably be written
> directly in BPF (untested and not sure the necessary annotations are in
> place, so please take it with a big grain of salt):
>
> bpf_rcu_read_lock();
> cgrp = task->cgroups->dfl_cgrp;
> cgrp = bpf_cgroup_from_id(cgrp->kn.id);
> bpf_rcu_read_unlock();
>
> If all you need is ID, it's even simpler:
>
> bpf_rcu_read_lock();
> cgrp_id = task->cgroups->dfl_cgrp->kn.id;
> bpf_rcu_read_unlock();
Good point.
Looks like this kfunc isn't really needed.
We even have the following in one of the selftests:
/* simulate bpf_get_current_cgroup_id() helper */
bpf_rcu_read_lock();
cgroups = task->cgroups;
if (!cgroups)
goto unlock;
cgroup_id = cgroups->dfl_cgrp->kn->id;
unlock:
bpf_rcu_read_unlock();
> In the first example, it's not great that we go from task pointer to cgroup
> pointer to ID and then back to acquired cgroup pointer. I wonder whether
> what we really want is to support something like the following:
>
> bpf_rcu_read_lock();
> cgrp = bpf_cgroup_tryget(task->cgroups->dfl_cgrp);
> bpf_rcu_read_unlock();
this one should work already.
that's existing bpf_cgroup_acquire().
pw-bot: cr