This patchset implements a cgroup bpf auto-detachment functionality:
bpf programs are detached as soon as possible after removal of the
cgroup, without waiting for the release of all associated resources.
Patches 2 and 3 are required to implement a corresponding kselftest
in patch 4.
v3:
1) some minor changes and typo fixes
v2:
1) removed a bogus check in patch 4
2) moved buf[len] = 0 in patch 2
Roman Gushchin (4):
bpf: decouple the lifetime of cgroup_bpf from cgroup itself
selftests/bpf: convert test_cgrp2_attach2 example into kselftest
selftests/bpf: enable all available cgroup v2 controllers
selftests/bpf: add auto-detach test
include/linux/bpf-cgroup.h | 8 +-
include/linux/cgroup.h | 18 +++
kernel/bpf/cgroup.c | 25 ++-
kernel/cgroup/cgroup.c | 11 +-
samples/bpf/Makefile | 2 -
tools/testing/selftests/bpf/Makefile | 4 +-
tools/testing/selftests/bpf/cgroup_helpers.c | 57 +++++++
.../selftests/bpf/test_cgroup_attach.c | 146 ++++++++++++++++--
8 files changed, 243 insertions(+), 28 deletions(-)
rename samples/bpf/test_cgrp2_attach2.c => tools/testing/selftests/bpf/test_cgroup_attach.c (79%)
--
2.20.1
Enable all available cgroup v2 controllers when setting up
the environment for the bpf kselftests. It's required to properly test
the bpf prog auto-detach feature. Also it will generally increase
the code coverage.
Signed-off-by: Roman Gushchin <[email protected]>
Acked-by: Yonghong Song <[email protected]>
---
tools/testing/selftests/bpf/cgroup_helpers.c | 57 ++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 6692a40a6979..0d89f0396be4 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -33,6 +33,60 @@
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
CGROUP_WORK_DIR, path)
+/**
+ * enable_all_controllers() - Enable all available cgroup v2 controllers
+ *
+ * Enable all available cgroup v2 controllers in order to increase
+ * the code coverage.
+ *
+ * If successful, 0 is returned.
+ */
+int enable_all_controllers(char *cgroup_path)
+{
+ char path[PATH_MAX + 1];
+ char buf[PATH_MAX];
+ char *c, *c2;
+ int fd, cfd;
+ size_t len;
+
+ snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening cgroup.controllers: %s", path);
+ return 1;
+ }
+
+ len = read(fd, buf, sizeof(buf) - 1);
+ if (len < 0) {
+ close(fd);
+ log_err("Reading cgroup.controllers: %s", path);
+ return 1;
+ }
+ buf[len] = 0;
+ close(fd);
+
+ /* No controllers available? We're probably on cgroup v1. */
+ if (len == 0)
+ return 0;
+
+ snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
+ cfd = open(path, O_RDWR);
+ if (cfd < 0) {
+ log_err("Opening cgroup.subtree_control: %s", path);
+ return 1;
+ }
+
+ for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
+ if (dprintf(cfd, "+%s\n", c) <= 0) {
+ log_err("Enabling controller %s: %s", c, path);
+ close(cfd);
+ return 1;
+ }
+ }
+ close(cfd);
+ return 0;
+}
+
/**
* setup_cgroup_environment() - Setup the cgroup environment
*
@@ -71,6 +125,9 @@ int setup_cgroup_environment(void)
return 1;
}
+ if (enable_all_controllers(cgroup_workdir))
+ return 1;
+
return 0;
}
--
2.20.1
Convert test_cgrp2_attach2 example into a proper test_cgroup_attach
kselftest. It's better because we do run kselftest on a constant
basis, so there are better chances to spot a potential regression.
Also make it slightly less verbose to conform kselftests output style.
Output example:
$ ./test_cgroup_attach
#override:PASS
#multi:PASS
test_cgroup_attach:PASS
Signed-off-by: Roman Gushchin <[email protected]>
Acked-by: Yonghong Song <[email protected]>
---
samples/bpf/Makefile | 2 -
tools/testing/selftests/bpf/Makefile | 4 +-
.../selftests/bpf/test_cgroup_attach.c | 50 ++++++++++++-------
3 files changed, 36 insertions(+), 20 deletions(-)
rename samples/bpf/test_cgrp2_attach2.c => tools/testing/selftests/bpf/test_cgroup_attach.c (91%)
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4f0a1cdbfe7c..253e5a2856be 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -26,7 +26,6 @@ hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
hostprogs-y += test_cgrp2_attach
-hostprogs-y += test_cgrp2_attach2
hostprogs-y += test_cgrp2_sock
hostprogs-y += test_cgrp2_sock2
hostprogs-y += xdp1
@@ -81,7 +80,6 @@ map_perf_test-objs := bpf_load.o map_perf_test_user.o
test_overhead-objs := bpf_load.o test_overhead_user.o
test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o
test_cgrp2_attach-objs := test_cgrp2_attach.o
-test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS)
test_cgrp2_sock-objs := test_cgrp2_sock.o
test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o
xdp1-objs := xdp1_user.o
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 66f2dca1dee1..e09f419f4d7e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \
- test_netcnt test_tcpnotify_user test_sock_fields test_sysctl
+ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
+ test_cgroup_attach
BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c)))
TEST_GEN_FILES = $(BPF_OBJ_FILES)
@@ -96,6 +97,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
.PHONY: force
diff --git a/samples/bpf/test_cgrp2_attach2.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
similarity index 91%
rename from samples/bpf/test_cgrp2_attach2.c
rename to tools/testing/selftests/bpf/test_cgroup_attach.c
index 0bb6507256b7..2d6d57f50e10 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -1,3 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
/* eBPF example program:
*
* - Creates arraymap in kernel with 4 bytes keys and 8 byte values
@@ -25,20 +27,27 @@
#include <sys/resource.h>
#include <sys/time.h>
#include <unistd.h>
+#include <linux/filter.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
-#include "bpf_insn.h"
+#include "bpf_util.h"
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define FOO "/foo"
#define BAR "/foo/bar/"
-#define PING_CMD "ping -c1 -w1 127.0.0.1 > /dev/null"
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
+#ifdef DEBUG
+#define debug(args...) printf(args)
+#else
+#define debug(args...)
+#endif
+
static int prog_load(int verdict)
{
int ret;
@@ -89,7 +98,7 @@ static int test_foo_bar(void)
goto err;
}
- printf("Attached DROP prog. This ping in cgroup /foo should fail...\n");
+ debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
assert(system(PING_CMD) != 0);
/* Create cgroup /foo/bar, get fd, and join it */
@@ -100,7 +109,7 @@ static int test_foo_bar(void)
if (join_cgroup(BAR))
goto err;
- printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
+ debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
@@ -109,7 +118,7 @@ static int test_foo_bar(void)
goto err;
}
- printf("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
+ debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
@@ -117,7 +126,7 @@ static int test_foo_bar(void)
goto err;
}
- printf("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
+ debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
"This ping in cgroup /foo/bar should fail...\n");
assert(system(PING_CMD) != 0);
@@ -132,7 +141,7 @@ static int test_foo_bar(void)
goto err;
}
- printf("Attached PASS from /foo/bar and detached DROP from /foo.\n"
+ debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
"This ping in cgroup /foo/bar should pass...\n");
assert(system(PING_CMD) == 0);
@@ -199,9 +208,9 @@ static int test_foo_bar(void)
close(bar);
cleanup_cgroup_environment();
if (!rc)
- printf("### override:PASS\n");
+ printf("#override:PASS\n");
else
- printf("### override:FAIL\n");
+ printf("#override:FAIL\n");
return rc;
}
@@ -441,19 +450,26 @@ static int test_multiprog(void)
close(cg5);
cleanup_cgroup_environment();
if (!rc)
- printf("### multi:PASS\n");
+ printf("#multi:PASS\n");
else
- printf("### multi:FAIL\n");
+ printf("#multi:FAIL\n");
return rc;
}
-int main(int argc, char **argv)
+int main(void)
{
- int rc = 0;
+ int (*tests[])(void) = {test_foo_bar, test_multiprog};
+ int errors = 0;
+ int i;
- rc = test_foo_bar();
- if (rc)
- return rc;
+ for (i = 0; i < ARRAY_SIZE(tests); i++)
+ if (tests[i]())
+ errors++;
+
+ if (errors)
+ printf("test_cgroup_attach:FAIL\n");
+ else
+ printf("test_cgroup_attach:PASS\n");
- return test_multiprog();
+ return errors ? EXIT_FAILURE : EXIT_SUCCESS;
}
--
2.20.1
Add a kselftest to cover bpf auto-detachment functionality.
The test creates a cgroup, associates some resources with it,
attaches a couple of bpf programs and deletes the cgroup.
Then it checks that bpf programs are going away in 5 seconds.
Expected output:
$ ./test_cgroup_attach
#override:PASS
#multi:PASS
#autodetach:PASS
test_cgroup_attach:PASS
On a kernel without auto-detaching:
$ ./test_cgroup_attach
#override:PASS
#multi:PASS
#autodetach:FAIL
test_cgroup_attach:FAIL
Signed-off-by: Roman Gushchin <[email protected]>
---
.../selftests/bpf/test_cgroup_attach.c | 98 ++++++++++++++++++-
1 file changed, 97 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
index 2d6d57f50e10..7671909ee1cb 100644
--- a/tools/testing/selftests/bpf/test_cgroup_attach.c
+++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
@@ -456,9 +456,105 @@ static int test_multiprog(void)
return rc;
}
+static int test_autodetach(void)
+{
+ __u32 prog_cnt = 4, attach_flags;
+ int allow_prog[2] = {0};
+ __u32 prog_ids[2] = {0};
+ int cg = 0, i, rc = -1;
+ void *ptr = NULL;
+ int attempts;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ allow_prog[i] = prog_load_cnt(1, 1 << i);
+ if (!allow_prog[i])
+ goto err;
+ }
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ /* create a cgroup, attach two programs and remember their ids */
+ cg = create_and_get_cgroup("/cg_autodetach");
+ if (cg < 0)
+ goto err;
+
+ if (join_cgroup("/cg_autodetach"))
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI)) {
+ log_err("Attaching prog[%d] to cg:egress", i);
+ goto err;
+ }
+ }
+
+ /* make sure that programs are attached and run some traffic */
+ assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
+ prog_ids, &prog_cnt) == 0);
+ assert(system(PING_CMD) == 0);
+
+ /* allocate some memory (4Mb) to pin the original cgroup */
+ ptr = malloc(4 * (1 << 20));
+ if (!ptr)
+ goto err;
+
+ /* close programs and cgroup fd */
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ close(allow_prog[i]);
+ allow_prog[i] = 0;
+ }
+
+ close(cg);
+ cg = 0;
+
+ /* leave the cgroup and remove it. don't detach programs */
+ cleanup_cgroup_environment();
+
+ /* wait for the asynchronous auto-detachment.
+ * wait for no more than 5 sec and give up.
+ */
+ for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
+ for (attempts = 5; attempts >= 0; attempts--) {
+ int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
+
+ if (fd < 0)
+ break;
+
+ /* don't leave the fd open */
+ close(fd);
+
+ if (!attempts)
+ goto err;
+
+ sleep(1);
+ }
+ }
+
+ rc = 0;
+err:
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+ if (allow_prog[i] > 0)
+ close(allow_prog[i]);
+ if (cg)
+ close(cg);
+ free(ptr);
+ cleanup_cgroup_environment();
+ if (!rc)
+ printf("#autodetach:PASS\n");
+ else
+ printf("#autodetach:FAIL\n");
+ return rc;
+}
+
int main(void)
{
- int (*tests[])(void) = {test_foo_bar, test_multiprog};
+ int (*tests[])(void) = {
+ test_foo_bar,
+ test_multiprog,
+ test_autodetach,
+ };
int errors = 0;
int i;
--
2.20.1
On 5/23/19 12:45 PM, Roman Gushchin wrote:
> Add a kselftest to cover bpf auto-detachment functionality.
> The test creates a cgroup, associates some resources with it,
> attaches a couple of bpf programs and deletes the cgroup.
>
> Then it checks that bpf programs are going away in 5 seconds.
>
> Expected output:
> $ ./test_cgroup_attach
> #override:PASS
> #multi:PASS
> #autodetach:PASS
> test_cgroup_attach:PASS
>
> On a kernel without auto-detaching:
> $ ./test_cgroup_attach
> #override:PASS
> #multi:PASS
> #autodetach:FAIL
> test_cgroup_attach:FAIL
>
> Signed-off-by: Roman Gushchin <[email protected]>
Looks good to me. It will be good if you can add test_cgroup_attach
to .gitignore to avoid it shows up in `git status`. With that,
Acked-by: Yonghong Song <[email protected]>
> ---
> .../selftests/bpf/test_cgroup_attach.c | 98 ++++++++++++++++++-
> 1 file changed, 97 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
> index 2d6d57f50e10..7671909ee1cb 100644
> --- a/tools/testing/selftests/bpf/test_cgroup_attach.c
> +++ b/tools/testing/selftests/bpf/test_cgroup_attach.c
> @@ -456,9 +456,105 @@ static int test_multiprog(void)
> return rc;
> }
>
> +static int test_autodetach(void)
> +{
> + __u32 prog_cnt = 4, attach_flags;
> + int allow_prog[2] = {0};
> + __u32 prog_ids[2] = {0};
> + int cg = 0, i, rc = -1;
> + void *ptr = NULL;
> + int attempts;
> +
> + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
> + allow_prog[i] = prog_load_cnt(1, 1 << i);
> + if (!allow_prog[i])
> + goto err;
> + }
> +
> + if (setup_cgroup_environment())
> + goto err;
> +
> + /* create a cgroup, attach two programs and remember their ids */
> + cg = create_and_get_cgroup("/cg_autodetach");
> + if (cg < 0)
> + goto err;
> +
> + if (join_cgroup("/cg_autodetach"))
> + goto err;
> +
> + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
> + if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
> + BPF_F_ALLOW_MULTI)) {
> + log_err("Attaching prog[%d] to cg:egress", i);
> + goto err;
> + }
> + }
> +
> + /* make sure that programs are attached and run some traffic */
> + assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
> + prog_ids, &prog_cnt) == 0);
> + assert(system(PING_CMD) == 0);
> +
> + /* allocate some memory (4Mb) to pin the original cgroup */
> + ptr = malloc(4 * (1 << 20));
> + if (!ptr)
> + goto err;
> +
> + /* close programs and cgroup fd */
> + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
> + close(allow_prog[i]);
> + allow_prog[i] = 0;
> + }
> +
> + close(cg);
> + cg = 0;
> +
> + /* leave the cgroup and remove it. don't detach programs */
> + cleanup_cgroup_environment();
> +
> + /* wait for the asynchronous auto-detachment.
> + * wait for no more than 5 sec and give up.
> + */
> + for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
> + for (attempts = 5; attempts >= 0; attempts--) {
> + int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
> +
> + if (fd < 0)
> + break;
> +
> + /* don't leave the fd open */
> + close(fd);
> +
> + if (!attempts)
> + goto err;
> +
> + sleep(1);
> + }
> + }
> +
> + rc = 0;
> +err:
> + for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
> + if (allow_prog[i] > 0)
> + close(allow_prog[i]);
> + if (cg)
> + close(cg);
> + free(ptr);
> + cleanup_cgroup_environment();
> + if (!rc)
> + printf("#autodetach:PASS\n");
> + else
> + printf("#autodetach:FAIL\n");
> + return rc;
> +}
> +
> int main(void)
> {
> - int (*tests[])(void) = {test_foo_bar, test_multiprog};
> + int (*tests[])(void) = {
> + test_foo_bar,
> + test_multiprog,
> + test_autodetach,
> + };
> int errors = 0;
> int i;
>
>
On Thu, May 23, 2019 at 04:09:30PM -0700, Yonghong Song wrote:
>
>
> On 5/23/19 12:45 PM, Roman Gushchin wrote:
> > Add a kselftest to cover bpf auto-detachment functionality.
> > The test creates a cgroup, associates some resources with it,
> > attaches a couple of bpf programs and deletes the cgroup.
> >
> > Then it checks that bpf programs are going away in 5 seconds.
> >
> > Expected output:
> > $ ./test_cgroup_attach
> > #override:PASS
> > #multi:PASS
> > #autodetach:PASS
> > test_cgroup_attach:PASS
> >
> > On a kernel without auto-detaching:
> > $ ./test_cgroup_attach
> > #override:PASS
> > #multi:PASS
> > #autodetach:FAIL
> > test_cgroup_attach:FAIL
> >
> > Signed-off-by: Roman Gushchin <[email protected]>
>
> Looks good to me. It will be good if you can add test_cgroup_attach
> to .gitignore to avoid it shows up in `git status`. With that,
I don't think it deserves a new version, I'll prepare a separate patch
for it.
>
> Acked-by: Yonghong Song <[email protected]>
Thank you for the review!
On Thu, May 23, 2019 at 04:09:30PM -0700, Yonghong Song wrote:
>
>
> On 5/23/19 12:45 PM, Roman Gushchin wrote:
> > Add a kselftest to cover bpf auto-detachment functionality.
> > The test creates a cgroup, associates some resources with it,
> > attaches a couple of bpf programs and deletes the cgroup.
> >
> > Then it checks that bpf programs are going away in 5 seconds.
> >
> > Expected output:
> > $ ./test_cgroup_attach
> > #override:PASS
> > #multi:PASS
> > #autodetach:PASS
> > test_cgroup_attach:PASS
> >
> > On a kernel without auto-detaching:
> > $ ./test_cgroup_attach
> > #override:PASS
> > #multi:PASS
> > #autodetach:FAIL
> > test_cgroup_attach:FAIL
> >
> > Signed-off-by: Roman Gushchin <[email protected]>
>
> Looks good to me. It will be good if you can add test_cgroup_attach
> to .gitignore to avoid it shows up in `git status`. With that,
>
Here it is!
Thanks!
---
From 150fb4db7ab37f0e8b6482386e8830f3d11b64e1 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <[email protected]>
Date: Thu, 23 May 2019 16:46:58 -0700
Subject: [PATCH bpf-next] selftests/bpf: add test_cgroup_attach to .gitignore
Add test_cgroup_attach binary to .gitignore.
Signed-off-by: Roman Gushchin <[email protected]>
---
tools/testing/selftests/bpf/.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index dd5d69529382..86a546e5e4db 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -22,6 +22,7 @@ test_lirc_mode2_user
get_cgroup_id_user
test_skb_cgroup_id_user
test_socket_cookie
+test_cgroup_attach
test_cgroup_storage
test_select_reuseport
test_flow_dissector
--
2.20.1
On Thu, May 23, 2019 at 12:45:28PM -0700, Roman Gushchin wrote:
> This patchset implements a cgroup bpf auto-detachment functionality:
> bpf programs are detached as soon as possible after removal of the
> cgroup, without waiting for the release of all associated resources.
The idea looks great, but doesn't quite work:
$ ./test_cgroup_attach
#override:PASS
[ 66.475219] BUG: sleeping function called from invalid context at ../include/linux/percpu-rwsem.h:34
[ 66.476095] in_atomic(): 1, irqs_disabled(): 0, pid: 21, name: ksoftirqd/2
[ 66.476706] CPU: 2 PID: 21 Comm: ksoftirqd/2 Not tainted 5.2.0-rc1-00211-g1861420d0162 #1564
[ 66.477595] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
[ 66.478360] Call Trace:
[ 66.478591] dump_stack+0x5b/0x8b
[ 66.478892] ___might_sleep+0x22f/0x290
[ 66.479230] cpus_read_lock+0x18/0x50
[ 66.479550] static_key_slow_dec+0x41/0x70
[ 66.479914] cgroup_bpf_release+0x1a6/0x400
[ 66.480285] percpu_ref_switch_to_atomic_rcu+0x203/0x330
[ 66.480754] rcu_core+0x475/0xcc0
[ 66.481047] ? switch_mm_irqs_off+0x684/0xa40
[ 66.481422] ? rcu_note_context_switch+0x260/0x260
[ 66.481842] __do_softirq+0x1cf/0x5ff
[ 66.482174] ? takeover_tasklets+0x5f0/0x5f0
[ 66.482542] ? smpboot_thread_fn+0xab/0x780
[ 66.482911] run_ksoftirqd+0x1a/0x40
[ 66.483225] smpboot_thread_fn+0x3ad/0x780
[ 66.483583] ? sort_range+0x20/0x20
[ 66.483894] ? __kthread_parkme+0xb0/0x190
[ 66.484253] ? sort_range+0x20/0x20
[ 66.484562] ? sort_range+0x20/0x20
[ 66.484878] kthread+0x2e2/0x3e0
[ 66.485166] ? kthread_create_worker_on_cpu+0xb0/0xb0
[ 66.485620] ret_from_fork+0x1f/0x30
Same test runs fine before the patches.
On Fri, May 24, 2019 at 02:03:23PM -0700, Alexei Starovoitov wrote:
> On Thu, May 23, 2019 at 12:45:28PM -0700, Roman Gushchin wrote:
> > This patchset implements a cgroup bpf auto-detachment functionality:
> > bpf programs are detached as soon as possible after removal of the
> > cgroup, without waiting for the release of all associated resources.
>
> The idea looks great, but doesn't quite work:
>
> $ ./test_cgroup_attach
> #override:PASS
> [ 66.475219] BUG: sleeping function called from invalid context at ../include/linux/percpu-rwsem.h:34
> [ 66.476095] in_atomic(): 1, irqs_disabled(): 0, pid: 21, name: ksoftirqd/2
> [ 66.476706] CPU: 2 PID: 21 Comm: ksoftirqd/2 Not tainted 5.2.0-rc1-00211-g1861420d0162 #1564
> [ 66.477595] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
> [ 66.478360] Call Trace:
> [ 66.478591] dump_stack+0x5b/0x8b
> [ 66.478892] ___might_sleep+0x22f/0x290
> [ 66.479230] cpus_read_lock+0x18/0x50
> [ 66.479550] static_key_slow_dec+0x41/0x70
> [ 66.479914] cgroup_bpf_release+0x1a6/0x400
> [ 66.480285] percpu_ref_switch_to_atomic_rcu+0x203/0x330
> [ 66.480754] rcu_core+0x475/0xcc0
> [ 66.481047] ? switch_mm_irqs_off+0x684/0xa40
> [ 66.481422] ? rcu_note_context_switch+0x260/0x260
> [ 66.481842] __do_softirq+0x1cf/0x5ff
> [ 66.482174] ? takeover_tasklets+0x5f0/0x5f0
> [ 66.482542] ? smpboot_thread_fn+0xab/0x780
> [ 66.482911] run_ksoftirqd+0x1a/0x40
> [ 66.483225] smpboot_thread_fn+0x3ad/0x780
> [ 66.483583] ? sort_range+0x20/0x20
> [ 66.483894] ? __kthread_parkme+0xb0/0x190
> [ 66.484253] ? sort_range+0x20/0x20
> [ 66.484562] ? sort_range+0x20/0x20
> [ 66.484878] kthread+0x2e2/0x3e0
> [ 66.485166] ? kthread_create_worker_on_cpu+0xb0/0xb0
> [ 66.485620] ret_from_fork+0x1f/0x30
>
> Same test runs fine before the patches.
>
Ouch, static_branch_dec() might block, so it's not possible to call it from
percpu ref counter release callback. It's not what I expected, tbh.
Good catch, thanks!