2022-01-13 09:00:56

by zhudi (E)

[permalink] [raw]
Subject: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap

Right now there is no way to query whether BPF programs are
attached to a sockmap or not.

we can use the standard interface in libbpf to query, such as:
bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
the mapFd is the fd of sockmap.

Signed-off-by: Di Zhu <[email protected]>
Acked-by: Yonghong Song <[email protected]>
---
include/linux/bpf.h | 9 +++++
kernel/bpf/syscall.c | 5 +++
net/core/sock_map.c | 78 ++++++++++++++++++++++++++++++++++++++++----
3 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6e947cd91152..c4ca14c9f838 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
void sock_map_unhash(struct sock *sk);
void sock_map_close(struct sock *sk, long timeout);
#else
@@ -2124,6 +2127,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
{
return -EOPNOTSUPP;
}
+
+static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_BPF_SYSCALL */
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fa4505f9b611..9e0631f091a6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3318,6 +3318,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
case BPF_FLOW_DISSECTOR:
case BPF_SK_LOOKUP:
return netns_bpf_prog_query(attr, uattr);
+ case BPF_SK_SKB_STREAM_PARSER:
+ case BPF_SK_SKB_STREAM_VERDICT:
+ case BPF_SK_MSG_VERDICT:
+ case BPF_SK_SKB_VERDICT:
+ return sock_map_bpf_prog_query(attr, uattr);
default:
return -EINVAL;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1827669eedd6..a424f51041ca 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
return NULL;
}

-static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- struct bpf_prog *old, u32 which)
+static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
+ u32 which)
{
struct sk_psock_progs *progs = sock_map_progs(map);
- struct bpf_prog **pprog;

if (!progs)
return -EOPNOTSUPP;

switch (which) {
case BPF_SK_MSG_VERDICT:
- pprog = &progs->msg_parser;
+ *pprog = &progs->msg_parser;
break;
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
case BPF_SK_SKB_STREAM_PARSER:
- pprog = &progs->stream_parser;
+ *pprog = &progs->stream_parser;
break;
#endif
case BPF_SK_SKB_STREAM_VERDICT:
if (progs->skb_verdict)
return -EBUSY;
- pprog = &progs->stream_verdict;
+ *pprog = &progs->stream_verdict;
break;
case BPF_SK_SKB_VERDICT:
if (progs->stream_verdict)
return -EBUSY;
- pprog = &progs->skb_verdict;
+ *pprog = &progs->skb_verdict;
break;
default:
return -EOPNOTSUPP;
}

+ return 0;
+}
+
+static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+ struct bpf_prog *old, u32 which)
+{
+ struct bpf_prog **pprog;
+ int ret;
+
+ ret = sock_map_prog_lookup(map, &pprog, which);
+ if (ret)
+ return ret;
+
if (old)
return psock_replace_prog(pprog, prog, old);

@@ -1455,6 +1467,58 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
return 0;
}

+int sock_map_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+ u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
+ struct bpf_prog **pprog;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ struct fd f;
+ u32 id = 0;
+ int ret;
+
+ if (attr->query.query_flags)
+ return -EINVAL;
+
+ f = fdget(ufd);
+ map = __bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ rcu_read_lock();
+
+ ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
+ if (ret)
+ goto end;
+
+ prog = *pprog;
+ prog_cnt = !prog ? 0 : 1;
+
+ if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+ goto end;
+
+ id = prog->aux->id;
+
+ /* we do not hold the refcnt, the bpf prog may be released
+ * asynchronously and the id would be set to 0.
+ */
+ if (id == 0)
+ prog_cnt = 0;
+
+end:
+ rcu_read_unlock();
+
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
+ (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
+ copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+ ret = -EFAULT;
+
+ fdput(f);
+ return ret;
+}
+
static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
{
switch (link->map->map_type) {
--
2.27.0



2022-01-13 09:00:58

by zhudi (E)

[permalink] [raw]
Subject: [PATCH bpf-next v5 2/2] selftests: bpf: test BPF_PROG_QUERY for progs attached to sockmap

Add test for querying progs attached to sockmap. we use an existing
libbpf query interface to query prog cnt before and after progs
attaching to sockmap and check whether the queried prog id is right.

Signed-off-by: Di Zhu <[email protected]>
Acked-by: Yonghong Song <[email protected]>
---
.../selftests/bpf/prog_tests/sockmap_basic.c | 70 +++++++++++++++++++
.../bpf/progs/test_sockmap_progs_query.c | 24 +++++++
2 files changed, 94 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 85db0f4cdd95..06923ea44bad 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -8,6 +8,7 @@
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
#include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
#include "bpf_iter_sockmap.skel.h"

#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -315,6 +316,69 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
test_sockmap_skb_verdict_attach__destroy(skel);
}

+static __u32 query_prog_id(int prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
+ !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
+ return 0;
+
+ return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+ struct test_sockmap_progs_query *skel;
+ int err, map_fd, verdict_fd, duration = 0;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+
+ skel = test_sockmap_progs_query__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ if (attach_type == BPF_SK_MSG_VERDICT)
+ verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+ else
+ verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ if (!ASSERT_OK(err, "bpf_prog_query failed"))
+ goto out;
+
+ if (!ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"))
+ goto out;
+
+ if (!ASSERT_EQ(prog_cnt, 0, "wrong program count on query"))
+ goto out;
+
+ err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+ goto out;
+
+ prog_cnt = 1;
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+ ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+ "wrong prog_ids on query");
+
+ bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+ test_sockmap_progs_query__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -341,4 +405,10 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
BPF_SK_SKB_VERDICT);
}
+ if (test__start_subtest("sockmap progs query")) {
+ test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+ test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
+ }
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
new file mode 100644
index 000000000000..9d58d61c0dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
--
2.27.0


2022-01-13 16:15:53

by Jakub Sitnicki

[permalink] [raw]
Subject: Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap

On Thu, Jan 13, 2022 at 10:00 AM CET, Di Zhu wrote:
> Right now there is no way to query whether BPF programs are
> attached to a sockmap or not.
>
> we can use the standard interface in libbpf to query, such as:
> bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> the mapFd is the fd of sockmap.
>
> Signed-off-by: Di Zhu <[email protected]>
> Acked-by: Yonghong Song <[email protected]>
> ---
> include/linux/bpf.h | 9 +++++
> kernel/bpf/syscall.c | 5 +++
> net/core/sock_map.c | 78 ++++++++++++++++++++++++++++++++++++++++----
> 3 files changed, 85 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 6e947cd91152..c4ca14c9f838 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
> int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
> int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
> int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
> +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> + union bpf_attr __user *uattr);
> +
> void sock_map_unhash(struct sock *sk);
> void sock_map_close(struct sock *sk, long timeout);
> #else
> @@ -2124,6 +2127,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> {
> return -EOPNOTSUPP;
> }
> +
> +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
> + union bpf_attr __user *uattr)
> +{
> + return -EINVAL;
> +}
> #endif /* CONFIG_BPF_SYSCALL */
> #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index fa4505f9b611..9e0631f091a6 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -3318,6 +3318,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
> case BPF_FLOW_DISSECTOR:
> case BPF_SK_LOOKUP:
> return netns_bpf_prog_query(attr, uattr);
> + case BPF_SK_SKB_STREAM_PARSER:
> + case BPF_SK_SKB_STREAM_VERDICT:
> + case BPF_SK_MSG_VERDICT:
> + case BPF_SK_SKB_VERDICT:
> + return sock_map_bpf_prog_query(attr, uattr);
> default:
> return -EINVAL;
> }
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index 1827669eedd6..a424f51041ca 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
> return NULL;
> }
>
> -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> - struct bpf_prog *old, u32 which)
> +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
> + u32 which)
> {
> struct sk_psock_progs *progs = sock_map_progs(map);
> - struct bpf_prog **pprog;
>
> if (!progs)
> return -EOPNOTSUPP;
>
> switch (which) {
> case BPF_SK_MSG_VERDICT:
> - pprog = &progs->msg_parser;
> + *pprog = &progs->msg_parser;
> break;
> #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> case BPF_SK_SKB_STREAM_PARSER:
> - pprog = &progs->stream_parser;
> + *pprog = &progs->stream_parser;
> break;
> #endif
> case BPF_SK_SKB_STREAM_VERDICT:
> if (progs->skb_verdict)
> return -EBUSY;
> - pprog = &progs->stream_verdict;
> + *pprog = &progs->stream_verdict;
> break;
> case BPF_SK_SKB_VERDICT:
> if (progs->stream_verdict)
> return -EBUSY;
> - pprog = &progs->skb_verdict;
> + *pprog = &progs->skb_verdict;
> break;
> default:
> return -EOPNOTSUPP;
> }
>
> + return 0;
> +}
> +
> +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> + struct bpf_prog *old, u32 which)
> +{
> + struct bpf_prog **pprog;
> + int ret;
> +
> + ret = sock_map_prog_lookup(map, &pprog, which);
> + if (ret)
> + return ret;
> +
> if (old)
> return psock_replace_prog(pprog, prog, old);
>
> @@ -1455,6 +1467,58 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> return 0;
> }
>
> +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> + union bpf_attr __user *uattr)
> +{
> + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> + struct bpf_prog **pprog;
> + struct bpf_prog *prog;
> + struct bpf_map *map;
> + struct fd f;
> + u32 id = 0;
> + int ret;
> +
> + if (attr->query.query_flags)
> + return -EINVAL;
> +
> + f = fdget(ufd);
> + map = __bpf_map_get(f);
> + if (IS_ERR(map))
> + return PTR_ERR(map);
> +
> + rcu_read_lock();
> +
> + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> + if (ret)
> + goto end;
> +
> + prog = *pprog;
> + prog_cnt = !prog ? 0 : 1;
> +
> + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> + goto end;
> +
> + id = prog->aux->id;

^ This looks like a concurrent read/write.

Would wrap with READ_ONCE() and corresponding WRITE_ONCE() in
bpf_prog_free_id(). See [1] for rationale.

[1] https://github.com/google/kernel-sanitizers/blob/master/other/READ_WRITE_ONCE.md

> +
> + /* we do not hold the refcnt, the bpf prog may be released
> + * asynchronously and the id would be set to 0.
> + */
> + if (id == 0)
> + prog_cnt = 0;
> +
> +end:
> + rcu_read_unlock();
> +
> + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> + (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> + copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
> + ret = -EFAULT;
> +
> + fdput(f);
> + return ret;
> +}
> +
> static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> {
> switch (link->map->map_type) {

2022-01-14 05:45:03

by zhudi (E)

[permalink] [raw]
Subject: Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap

> On Thu, Jan 13, 2022 at 10:00 AM CET, Di Zhu wrote:
> > Right now there is no way to query whether BPF programs are
> > attached to a sockmap or not.
> >
> > we can use the standard interface in libbpf to query, such as:
> > bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> > the mapFd is the fd of sockmap.
> >
> > Signed-off-by: Di Zhu <[email protected]>
> > Acked-by: Yonghong Song <[email protected]>
> > ---
> > include/linux/bpf.h | 9 +++++
> > kernel/bpf/syscall.c | 5 +++
> > net/core/sock_map.c | 78
> ++++++++++++++++++++++++++++++++++++++++----
> > 3 files changed, 85 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 6e947cd91152..c4ca14c9f838 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog
> *prog,
> > int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog
> *prog);
> > int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type
> ptype);
> > int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
> u64 flags);


.......


> > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > + union bpf_attr __user *uattr)
> > +{
> > + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> > + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> > + struct bpf_prog **pprog;
> > + struct bpf_prog *prog;
> > + struct bpf_map *map;
> > + struct fd f;
> > + u32 id = 0;
> > + int ret;
> > +
> > + if (attr->query.query_flags)
> > + return -EINVAL;
> > +
> > + f = fdget(ufd);
> > + map = __bpf_map_get(f);
> > + if (IS_ERR(map))
> > + return PTR_ERR(map);
> > +
> > + rcu_read_lock();
> > +
> > + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> > + if (ret)
> > + goto end;
> > +
> > + prog = *pprog;
> > + prog_cnt = !prog ? 0 : 1;
> > +
> > + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> > + goto end;
> > +
> > + id = prog->aux->id;
>
> ^ This looks like a concurrent read/write.
>
> Would wrap with READ_ONCE() and corresponding WRITE_ONCE() in
> bpf_prog_free_id(). See [1] for rationale.
>
> [1]
> https://github.com/google/kernel-sanitizers/blob/master/other/READ_WRITE_O
> NCE.md


Thanks for your advice, I will modify this code.


> > +
> > + /* we do not hold the refcnt, the bpf prog may be released
> > + * asynchronously and the id would be set to 0.
> > + */
> > + if (id == 0)
> > + prog_cnt = 0;
> > +
> > +end:
> > + rcu_read_unlock();
> > +
> > + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> > + (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> > + copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
> > + ret = -EFAULT;
> > +
> > + fdput(f);
> > + return ret;
> > +}
> > +
> > static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> > {
> > switch (link->map->map_type) {

2022-01-15 15:22:53

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH bpf-next v5 2/2] selftests: bpf: test BPF_PROG_QUERY for progs attached to sockmap

On Thu, Jan 13, 2022 at 1:01 AM Di Zhu <[email protected]> wrote:
>
> Add test for querying progs attached to sockmap. we use an existing
> libbpf query interface to query prog cnt before and after progs
> attaching to sockmap and check whether the queried prog id is right.
>
> Signed-off-by: Di Zhu <[email protected]>
> Acked-by: Yonghong Song <[email protected]>
> ---
> .../selftests/bpf/prog_tests/sockmap_basic.c | 70 +++++++++++++++++++
> .../bpf/progs/test_sockmap_progs_query.c | 24 +++++++
> 2 files changed, 94 insertions(+)
> create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
> index 85db0f4cdd95..06923ea44bad 100644
> --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
> +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
> @@ -8,6 +8,7 @@
> #include "test_sockmap_update.skel.h"
> #include "test_sockmap_invalid_update.skel.h"
> #include "test_sockmap_skb_verdict_attach.skel.h"
> +#include "test_sockmap_progs_query.skel.h"
> #include "bpf_iter_sockmap.skel.h"
>
> #define TCP_REPAIR 19 /* TCP sock is under repair right now */
> @@ -315,6 +316,69 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
> test_sockmap_skb_verdict_attach__destroy(skel);
> }
>
> +static __u32 query_prog_id(int prog_fd)
> +{
> + struct bpf_prog_info info = {};
> + __u32 info_len = sizeof(info);
> + int err;
> +
> + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
> + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
> + !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
> + return 0;
> +
> + return info.id;
> +}
> +
> +static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
> +{
> + struct test_sockmap_progs_query *skel;
> + int err, map_fd, verdict_fd, duration = 0;
> + __u32 attach_flags = 0;
> + __u32 prog_ids[3] = {};
> + __u32 prog_cnt = 3;
> +
> + skel = test_sockmap_progs_query__open_and_load();
> + if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
> + return;
> +
> + map_fd = bpf_map__fd(skel->maps.sock_map);
> +
> + if (attach_type == BPF_SK_MSG_VERDICT)
> + verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
> + else
> + verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
> +
> + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
> + &attach_flags, prog_ids, &prog_cnt);
> + if (!ASSERT_OK(err, "bpf_prog_query failed"))
> + goto out;
> +
> + if (!ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"))
> + goto out;
> +
> + if (!ASSERT_EQ(prog_cnt, 0, "wrong program count on query"))
> + goto out;
> +
> + err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
> + if (!ASSERT_OK(err, "bpf_prog_attach failed"))
> + goto out;
> +
> + prog_cnt = 1;
> + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
> + &attach_flags, prog_ids, &prog_cnt);
> +
> + ASSERT_OK(err, "bpf_prog_query failed");
> + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
> + ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
> + ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
> + "wrong prog_ids on query");

See how much easier it is to follow these tests, why didn't you do the
same with err, attach_flags and prog above?


> +
> + bpf_prog_detach2(verdict_fd, map_fd, attach_type);
> +out:
> + test_sockmap_progs_query__destroy(skel);
> +}
> +
> void test_sockmap_basic(void)
> {
> if (test__start_subtest("sockmap create_update_free"))
> @@ -341,4 +405,10 @@ void test_sockmap_basic(void)
> test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
> BPF_SK_SKB_VERDICT);
> }
> + if (test__start_subtest("sockmap progs query")) {
> + test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
> + test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
> + test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
> + test_sockmap_progs_query(BPF_SK_SKB_VERDICT);

Why are these not separate subtests? What's the benefit of bundling
them into one subtest?

> + }
> }
> diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
> new file mode 100644
> index 000000000000..9d58d61c0dee
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
> @@ -0,0 +1,24 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +
> +struct {
> + __uint(type, BPF_MAP_TYPE_SOCKMAP);
> + __uint(max_entries, 1);
> + __type(key, __u32);
> + __type(value, __u64);
> +} sock_map SEC(".maps");
> +
> +SEC("sk_skb")
> +int prog_skb_verdict(struct __sk_buff *skb)
> +{
> + return SK_PASS;
> +}
> +
> +SEC("sk_msg")
> +int prog_skmsg_verdict(struct sk_msg_md *msg)
> +{
> + return SK_PASS;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> --
> 2.27.0
>

2022-01-15 15:22:56

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap

On Thu, Jan 13, 2022 at 8:15 AM Jakub Sitnicki <[email protected]> wrote:
>
> On Thu, Jan 13, 2022 at 10:00 AM CET, Di Zhu wrote:
> > Right now there is no way to query whether BPF programs are
> > attached to a sockmap or not.
> >
> > we can use the standard interface in libbpf to query, such as:
> > bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> > the mapFd is the fd of sockmap.
> >
> > Signed-off-by: Di Zhu <[email protected]>
> > Acked-by: Yonghong Song <[email protected]>
> > ---
> > include/linux/bpf.h | 9 +++++
> > kernel/bpf/syscall.c | 5 +++
> > net/core/sock_map.c | 78 ++++++++++++++++++++++++++++++++++++++++----
> > 3 files changed, 85 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 6e947cd91152..c4ca14c9f838 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog,
> > int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
> > int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
> > int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
> > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > + union bpf_attr __user *uattr);
> > +
> > void sock_map_unhash(struct sock *sk);
> > void sock_map_close(struct sock *sk, long timeout);
> > #else
> > @@ -2124,6 +2127,12 @@ static inline int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> > {
> > return -EOPNOTSUPP;
> > }
> > +
> > +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > + union bpf_attr __user *uattr)
> > +{
> > + return -EINVAL;
> > +}
> > #endif /* CONFIG_BPF_SYSCALL */
> > #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
> >
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index fa4505f9b611..9e0631f091a6 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -3318,6 +3318,11 @@ static int bpf_prog_query(const union bpf_attr *attr,
> > case BPF_FLOW_DISSECTOR:
> > case BPF_SK_LOOKUP:
> > return netns_bpf_prog_query(attr, uattr);
> > + case BPF_SK_SKB_STREAM_PARSER:
> > + case BPF_SK_SKB_STREAM_VERDICT:
> > + case BPF_SK_MSG_VERDICT:
> > + case BPF_SK_SKB_VERDICT:
> > + return sock_map_bpf_prog_query(attr, uattr);
> > default:
> > return -EINVAL;
> > }
> > diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> > index 1827669eedd6..a424f51041ca 100644
> > --- a/net/core/sock_map.c
> > +++ b/net/core/sock_map.c
> > @@ -1416,38 +1416,50 @@ static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
> > return NULL;
> > }
> >
> > -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> > - struct bpf_prog *old, u32 which)
> > +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog,
> > + u32 which)
> > {
> > struct sk_psock_progs *progs = sock_map_progs(map);
> > - struct bpf_prog **pprog;
> >
> > if (!progs)
> > return -EOPNOTSUPP;
> >
> > switch (which) {
> > case BPF_SK_MSG_VERDICT:
> > - pprog = &progs->msg_parser;
> > + *pprog = &progs->msg_parser;
> > break;
> > #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > case BPF_SK_SKB_STREAM_PARSER:
> > - pprog = &progs->stream_parser;
> > + *pprog = &progs->stream_parser;
> > break;
> > #endif
> > case BPF_SK_SKB_STREAM_VERDICT:
> > if (progs->skb_verdict)
> > return -EBUSY;
> > - pprog = &progs->stream_verdict;
> > + *pprog = &progs->stream_verdict;
> > break;
> > case BPF_SK_SKB_VERDICT:
> > if (progs->stream_verdict)
> > return -EBUSY;
> > - pprog = &progs->skb_verdict;
> > + *pprog = &progs->skb_verdict;
> > break;
> > default:
> > return -EOPNOTSUPP;
> > }
> >
> > + return 0;
> > +}
> > +
> > +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> > + struct bpf_prog *old, u32 which)
> > +{
> > + struct bpf_prog **pprog;
> > + int ret;
> > +
> > + ret = sock_map_prog_lookup(map, &pprog, which);
> > + if (ret)
> > + return ret;
> > +
> > if (old)
> > return psock_replace_prog(pprog, prog, old);
> >
> > @@ -1455,6 +1467,58 @@ static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
> > return 0;
> > }
> >
> > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > + union bpf_attr __user *uattr)
> > +{
> > + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> > + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> > + struct bpf_prog **pprog;
> > + struct bpf_prog *prog;
> > + struct bpf_map *map;
> > + struct fd f;
> > + u32 id = 0;
> > + int ret;
> > +
> > + if (attr->query.query_flags)
> > + return -EINVAL;
> > +
> > + f = fdget(ufd);
> > + map = __bpf_map_get(f);
> > + if (IS_ERR(map))
> > + return PTR_ERR(map);
> > +
> > + rcu_read_lock();
> > +
> > + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> > + if (ret)
> > + goto end;
> > +
> > + prog = *pprog;
> > + prog_cnt = !prog ? 0 : 1;
> > +
> > + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> > + goto end;
> > +
> > + id = prog->aux->id;
>
> ^ This looks like a concurrent read/write.

You mean that bpf_prog_load() might be setting it in a different
thread? I think ID is allocated and fixed before prog FD is available
to the user-space, so prog->aux->id is set in stone and immutable in
that regard.

>
> Would wrap with READ_ONCE() and corresponding WRITE_ONCE() in
> bpf_prog_free_id(). See [1] for rationale.
>
> [1] https://github.com/google/kernel-sanitizers/blob/master/other/READ_WRITE_ONCE.md
>
> > +
> > + /* we do not hold the refcnt, the bpf prog may be released
> > + * asynchronously and the id would be set to 0.
> > + */
> > + if (id == 0)
> > + prog_cnt = 0;
> > +
> > +end:
> > + rcu_read_unlock();
> > +
> > + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> > + (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> > + copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
> > + ret = -EFAULT;
> > +
> > + fdput(f);
> > + return ret;
> > +}
> > +
> > static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> > {
> > switch (link->map->map_type) {

2022-01-15 16:47:20

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH bpf-next v5 2/2] selftests: bpf: test BPF_PROG_QUERY for progs attached to sockmap

On Fri, Jan 14, 2022 at 6:34 PM zhudi (E) <[email protected]> wrote:
>
> > On Thu, Jan 13, 2022 at 1:01 AM Di Zhu <[email protected]> wrote:
> > >
> > > Add test for querying progs attached to sockmap. we use an existing
> > > libbpf query interface to query prog cnt before and after progs
> > > attaching to sockmap and check whether the queried prog id is right.
> > >
> > > Signed-off-by: Di Zhu <[email protected]>
> > > Acked-by: Yonghong Song <[email protected]>
> > > ---
> > > .../selftests/bpf/prog_tests/sockmap_basic.c | 70 +++++++++++++++++++
> > > .../bpf/progs/test_sockmap_progs_query.c | 24 +++++++
> > > 2 files changed, 94 insertions(+)
> > > create mode 100644
> > tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
> > b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
> > > index 85db0f4cdd95..06923ea44bad 100644
> > > --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
> > > +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
> > > @@ -8,6 +8,7 @@
> > > #include "test_sockmap_update.skel.h"
> > > #include "test_sockmap_invalid_update.skel.h"
> > > #include "test_sockmap_skb_verdict_attach.skel.h"
> > > +#include "test_sockmap_progs_query.skel.h"
> > > #include "bpf_iter_sockmap.skel.h"
> > >
> > > #define TCP_REPAIR 19 /* TCP sock is under repair
> > right now */
> > > @@ -315,6 +316,69 @@ static void test_sockmap_skb_verdict_attach(enum
> > bpf_attach_type first,
> > > test_sockmap_skb_verdict_attach__destroy(skel);
> > > }
> > >
> > > +static __u32 query_prog_id(int prog_fd)
> > > +{
> > > + struct bpf_prog_info info = {};
> > > + __u32 info_len = sizeof(info);
> > > + int err;
> > > +
> > > + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
> > > + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") ||
> > > + !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd"))
> > > + return 0;
> > > +
> > > + return info.id;
> > > +}
> > > +
> > > +static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
> > > +{
> > > + struct test_sockmap_progs_query *skel;
> > > + int err, map_fd, verdict_fd, duration = 0;
> > > + __u32 attach_flags = 0;
> > > + __u32 prog_ids[3] = {};
> > > + __u32 prog_cnt = 3;
> > > +
> > > + skel = test_sockmap_progs_query__open_and_load();
> > > + if (!ASSERT_OK_PTR(skel,
> > "test_sockmap_progs_query__open_and_load"))
> > > + return;
> > > +
> > > + map_fd = bpf_map__fd(skel->maps.sock_map);
> > > +
> > > + if (attach_type == BPF_SK_MSG_VERDICT)
> > > + verdict_fd =
> > bpf_program__fd(skel->progs.prog_skmsg_verdict);
> > > + else
> > > + verdict_fd =
> > bpf_program__fd(skel->progs.prog_skb_verdict);
> > > +
> > > + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
> > > + &attach_flags, prog_ids, &prog_cnt);
> > > + if (!ASSERT_OK(err, "bpf_prog_query failed"))
> > > + goto out;
> > > +
> > > + if (!ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"))
> > > + goto out;
> > > +
> > > + if (!ASSERT_EQ(prog_cnt, 0, "wrong program count on query"))
> > > + goto out;

I mean here that you can do just

ASSERT_OK(err, ...);
ASSERT_EQ(attach_flags, ...);
ASSERT_EQ(prog_cnt, ...);

No if + goto necessary.

> > > +
> > > + err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
> > > + if (!ASSERT_OK(err, "bpf_prog_attach failed"))
> > > + goto out;
> > > +
> > > + prog_cnt = 1;
> > > + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
> > > + &attach_flags, prog_ids, &prog_cnt);
> > > +
> > > + ASSERT_OK(err, "bpf_prog_query failed");
> > > + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
> > > + ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
> > > + ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
> > > + "wrong prog_ids on query");
> >
> > See how much easier it is to follow these tests, why didn't you do the
> > same with err, attach_flags and prog above?
>
> It is recommended by Yonghong Song to increase the test coverage.

see above

>
> >
> >
> > > +
> > > + bpf_prog_detach2(verdict_fd, map_fd, attach_type);
> > > +out:
> > > + test_sockmap_progs_query__destroy(skel);
> > > +}
> > > +
> > > void test_sockmap_basic(void)
> > > {
> > > if (test__start_subtest("sockmap create_update_free"))
> > > @@ -341,4 +405,10 @@ void test_sockmap_basic(void)
> > >
> > test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
> > >
> > BPF_SK_SKB_VERDICT);
> > > }
> > > + if (test__start_subtest("sockmap progs query")) {
> > > + test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
> > > +
> > test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
> > > +
> > test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
> > > + test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
> >
> > Why are these not separate subtests? What's the benefit of bundling
> > them into one subtest?
> >
>
> These are essentially doing the same thing, just for different program attach types.

Right, so they are independent subtests, no? Not separate tests, but
not one subtest either.

>
> > > + }
> > > }
> > > diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
> > b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
> > > new file mode 100644
> > > index 000000000000..9d58d61c0dee
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
> > > @@ -0,0 +1,24 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +#include "vmlinux.h"
> > > +#include <bpf/bpf_helpers.h>
> > > +
> > > +struct {
> > > + __uint(type, BPF_MAP_TYPE_SOCKMAP);
> > > + __uint(max_entries, 1);
> > > + __type(key, __u32);
> > > + __type(value, __u64);
> > > +} sock_map SEC(".maps");
> > > +
> > > +SEC("sk_skb")
> > > +int prog_skb_verdict(struct __sk_buff *skb)
> > > +{
> > > + return SK_PASS;
> > > +}
> > > +
> > > +SEC("sk_msg")
> > > +int prog_skmsg_verdict(struct sk_msg_md *msg)
> > > +{
> > > + return SK_PASS;
> > > +}
> > > +
> > > +char _license[] SEC("license") = "GPL";
> > > --
> > > 2.27.0
> > >

2022-01-15 16:47:21

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap

On Fri, Jan 14, 2022 at 6:38 PM zhudi (E) <[email protected]> wrote:
>
> > On Thu, Jan 13, 2022 at 8:15 AM Jakub Sitnicki <[email protected]> wrote:
> > >
> > > On Thu, Jan 13, 2022 at 10:00 AM CET, Di Zhu wrote:
> > > > Right now there is no way to query whether BPF programs are
> > > > attached to a sockmap or not.
> > > >
> > > > we can use the standard interface in libbpf to query, such as:
> > > > bpf_prog_query(mapFd, BPF_SK_SKB_STREAM_PARSER, 0, NULL, ...);
> > > > the mapFd is the fd of sockmap.
> > > >
> > > > Signed-off-by: Di Zhu <[email protected]>
> > > > Acked-by: Yonghong Song <[email protected]>
> > > > ---
> > > > include/linux/bpf.h | 9 +++++
> > > > kernel/bpf/syscall.c | 5 +++
> > > > net/core/sock_map.c | 78
> > ++++++++++++++++++++++++++++++++++++++++----
> > > > 3 files changed, 85 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index 6e947cd91152..c4ca14c9f838 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -2071,6 +2071,9 @@ int bpf_prog_test_run_syscall(struct bpf_prog
> > *prog,
> > > > int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog
> > *prog);
> > > > int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type
> > ptype);
> > > > int sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> > *value, u64 flags);
> > > > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > > > + union bpf_attr __user *uattr);
> > > > +
> > > > void sock_map_unhash(struct sock *sk);
> > > > void sock_map_close(struct sock *sk, long timeout);
> > > > #else
> > > > @@ -2124,6 +2127,12 @@ static inline int
> > sock_map_update_elem_sys(struct bpf_map *map, void *key, void
> > > > {
> > > > return -EOPNOTSUPP;
> > > > }
> > > > +
> > > > +static inline int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > > > + union bpf_attr __user
> > *uattr)
> > > > +{
> > > > + return -EINVAL;
> > > > +}
> > > > #endif /* CONFIG_BPF_SYSCALL */
> > > > #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
> > > >
> > > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > > > index fa4505f9b611..9e0631f091a6 100644
> > > > --- a/kernel/bpf/syscall.c
> > > > +++ b/kernel/bpf/syscall.c
> > > > @@ -3318,6 +3318,11 @@ static int bpf_prog_query(const union bpf_attr
> > *attr,
> > > > case BPF_FLOW_DISSECTOR:
> > > > case BPF_SK_LOOKUP:
> > > > return netns_bpf_prog_query(attr, uattr);
> > > > + case BPF_SK_SKB_STREAM_PARSER:
> > > > + case BPF_SK_SKB_STREAM_VERDICT:
> > > > + case BPF_SK_MSG_VERDICT:
> > > > + case BPF_SK_SKB_VERDICT:
> > > > + return sock_map_bpf_prog_query(attr, uattr);
> > > > default:
> > > > return -EINVAL;
> > > > }
> > > > diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> > > > index 1827669eedd6..a424f51041ca 100644
> > > > --- a/net/core/sock_map.c
> > > > +++ b/net/core/sock_map.c
> > > > @@ -1416,38 +1416,50 @@ static struct sk_psock_progs
> > *sock_map_progs(struct bpf_map *map)
> > > > return NULL;
> > > > }
> > > >
> > > > -static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> > *prog,
> > > > - struct bpf_prog *old, u32 which)
> > > > +static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog
> > ***pprog,
> > > > + u32 which)
> > > > {
> > > > struct sk_psock_progs *progs = sock_map_progs(map);
> > > > - struct bpf_prog **pprog;
> > > >
> > > > if (!progs)
> > > > return -EOPNOTSUPP;
> > > >
> > > > switch (which) {
> > > > case BPF_SK_MSG_VERDICT:
> > > > - pprog = &progs->msg_parser;
> > > > + *pprog = &progs->msg_parser;
> > > > break;
> > > > #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
> > > > case BPF_SK_SKB_STREAM_PARSER:
> > > > - pprog = &progs->stream_parser;
> > > > + *pprog = &progs->stream_parser;
> > > > break;
> > > > #endif
> > > > case BPF_SK_SKB_STREAM_VERDICT:
> > > > if (progs->skb_verdict)
> > > > return -EBUSY;
> > > > - pprog = &progs->stream_verdict;
> > > > + *pprog = &progs->stream_verdict;
> > > > break;
> > > > case BPF_SK_SKB_VERDICT:
> > > > if (progs->stream_verdict)
> > > > return -EBUSY;
> > > > - pprog = &progs->skb_verdict;
> > > > + *pprog = &progs->skb_verdict;
> > > > break;
> > > > default:
> > > > return -EOPNOTSUPP;
> > > > }
> > > >
> > > > + return 0;
> > > > +}
> > > > +
> > > > +static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog
> > *prog,
> > > > + struct bpf_prog *old, u32 which)
> > > > +{
> > > > + struct bpf_prog **pprog;
> > > > + int ret;
> > > > +
> > > > + ret = sock_map_prog_lookup(map, &pprog, which);
> > > > + if (ret)
> > > > + return ret;
> > > > +
> > > > if (old)
> > > > return psock_replace_prog(pprog, prog, old);
> > > >
> > > > @@ -1455,6 +1467,58 @@ static int sock_map_prog_update(struct bpf_map
> > *map, struct bpf_prog *prog,
> > > > return 0;
> > > > }
> > > >
> > > > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
> > > > + union bpf_attr __user *uattr)
> > > > +{
> > > > + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
> > > > + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
> > > > + struct bpf_prog **pprog;
> > > > + struct bpf_prog *prog;
> > > > + struct bpf_map *map;
> > > > + struct fd f;
> > > > + u32 id = 0;
> > > > + int ret;
> > > > +
> > > > + if (attr->query.query_flags)
> > > > + return -EINVAL;
> > > > +
> > > > + f = fdget(ufd);
> > > > + map = __bpf_map_get(f);
> > > > + if (IS_ERR(map))
> > > > + return PTR_ERR(map);
> > > > +
> > > > + rcu_read_lock();
> > > > +
> > > > + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
> > > > + if (ret)
> > > > + goto end;
> > > > +
> > > > + prog = *pprog;
> > > > + prog_cnt = !prog ? 0 : 1;
> > > > +
> > > > + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
> > > > + goto end;
> > > > +
> > > > + id = prog->aux->id;
> > >
> > > ^ This looks like a concurrent read/write.
> >
> > You mean that bpf_prog_load() might be setting it in a different
> > thread? I think ID is allocated and fixed before prog FD is available
> > to the user-space, so prog->aux->id is set in stone and immutable in
> > that regard.
>
> What we're talking about here is that bpf_prog_free_id() will write the id
> identifier synchronously.

Hm.. let's say bpf_prog_free_id() happens right after we read id 123.
It's impossible to distinguish that from reading valid ID (that's not
yet freed), returning it to user-space and before user-space can do
anything about that this program and it's ID are freed. User-space
either way will get an ID that's not valid anymore. I don't see any
use of READ_ONCE/WRITE_ONCE with prog->aux->id, which is why I was
asking what changed.

>
> >
> > >
> > > Would wrap with READ_ONCE() and corresponding WRITE_ONCE() in
> > > bpf_prog_free_id(). See [1] for rationale.
> > >
> > > [1]
> > https://github.com/google/kernel-sanitizers/blob/master/other/READ_WRITE_O
> > NCE.md
> > >
> > > > +
> > > > + /* we do not hold the refcnt, the bpf prog may be released
> > > > + * asynchronously and the id would be set to 0.
> > > > + */
> > > > + if (id == 0)
> > > > + prog_cnt = 0;
> > > > +
> > > > +end:
> > > > + rcu_read_unlock();
> > > > +
> > > > + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) ||
> > > > + (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) ||
> > > > + copy_to_user(&uattr->query.prog_cnt, &prog_cnt,
> > sizeof(prog_cnt)))
> > > > + ret = -EFAULT;
> > > > +
> > > > + fdput(f);
> > > > + return ret;
> > > > +}
> > > > +
> > > > static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
> > > > {
> > > > switch (link->map->map_type) {

2022-01-16 16:23:08

by Jakub Sitnicki

[permalink] [raw]
Subject: Re: [PATCH bpf-next v5 1/2] bpf: support BPF_PROG_QUERY for progs attached to sockmap

On Sat, Jan 15, 2022 at 03:53 AM CET, Andrii Nakryiko wrote:
> On Fri, Jan 14, 2022 at 6:38 PM zhudi (E) <[email protected]> wrote:
>>
>> > On Thu, Jan 13, 2022 at 8:15 AM Jakub Sitnicki <[email protected]> wrote:
>> > >
>> > > On Thu, Jan 13, 2022 at 10:00 AM CET, Di Zhu wrote:

[...]

>> > > > +int sock_map_bpf_prog_query(const union bpf_attr *attr,
>> > > > + union bpf_attr __user *uattr)
>> > > > +{
>> > > > + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
>> > > > + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd;
>> > > > + struct bpf_prog **pprog;
>> > > > + struct bpf_prog *prog;
>> > > > + struct bpf_map *map;
>> > > > + struct fd f;
>> > > > + u32 id = 0;
>> > > > + int ret;
>> > > > +
>> > > > + if (attr->query.query_flags)
>> > > > + return -EINVAL;
>> > > > +
>> > > > + f = fdget(ufd);
>> > > > + map = __bpf_map_get(f);
>> > > > + if (IS_ERR(map))
>> > > > + return PTR_ERR(map);
>> > > > +
>> > > > + rcu_read_lock();
>> > > > +
>> > > > + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type);
>> > > > + if (ret)
>> > > > + goto end;
>> > > > +
>> > > > + prog = *pprog;
>> > > > + prog_cnt = !prog ? 0 : 1;
>> > > > +
>> > > > + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
>> > > > + goto end;
>> > > > +
>> > > > + id = prog->aux->id;
>> > >
>> > > ^ This looks like a concurrent read/write.
>> >
>> > You mean that bpf_prog_load() might be setting it in a different
>> > thread? I think ID is allocated and fixed before prog FD is available
>> > to the user-space, so prog->aux->id is set in stone and immutable in
>> > that regard.
>>
>> What we're talking about here is that bpf_prog_free_id() will write the id
>> identifier synchronously.
>
> Hm.. let's say bpf_prog_free_id() happens right after we read id 123.
> It's impossible to distinguish that from reading valid ID (that's not
> yet freed), returning it to user-space and before user-space can do
> anything about that this program and it's ID are freed. User-space
> either way will get an ID that's not valid anymore. I don't see any
> use of READ_ONCE/WRITE_ONCE with prog->aux->id, which is why I was
> asking what changed.
>

You're right, READ_ONCE/WRITE_ONCE is not improving anything here.

I've suggested it not to make the query op more reliable, but rather to
mark the shared access.

But in this case annotating it with data_race() [1] would be a better
fit, I think, because we don't care if we get the old or the new value.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/memory-model/Documentation/access-marking.txt#n58

>>
>> >
>> > >
>> > > Would wrap with READ_ONCE() and corresponding WRITE_ONCE() in
>> > > bpf_prog_free_id(). See [1] for rationale.
>> > >
>> > > [1]
>> > https://github.com/google/kernel-sanitizers/blob/master/other/READ_WRITE_O
>> > NCE.md
>> > >

[...]