2019-04-12 10:02:40

by Alban Crequy

[permalink] [raw]
Subject: [PATCH bpf-next v1 1/2] bpf: sock ops: add netns in bpf context

From: Alban Crequy <[email protected]>

sockops programs can now access the network namespace inode via
(struct bpf_sock_ops)->netns. This can be useful to apply different
policies on different network namespaces.

In the unlikely case where network namespaces are not compiled in
(CONFIG_NET_NS=n), the verifier will not allow access to ->netns.

Signed-off-by: Alban Crequy <[email protected]>
---
include/uapi/linux/bpf.h | 1 +
net/core/filter.c | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 41 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 31a27dd337dc..5afaab25f205 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3069,6 +3069,7 @@ struct bpf_sock_ops {
__u32 sk_txhash;
__u64 bytes_received;
__u64 bytes_acked;
+ __u64 netns;
};

/* Definitions for bpf_sock_ops_cb_flags */
diff --git a/net/core/filter.c b/net/core/filter.c
index 22eb2edf5573..f5e75b6fecb2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6758,6 +6758,14 @@ static bool sock_ops_is_valid_access(int off, int size,
}
} else {
switch (off) {
+ case offsetof(struct bpf_sock_ops, netns):
+#ifdef CONFIG_NET_NS
+ if (size != sizeof(__u64))
+ return false;
+#else
+ return false;
+#endif
+ break;
case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
bytes_acked):
if (size != sizeof(__u64))
@@ -7908,6 +7916,38 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
struct sock, type);
break;
+
+ case offsetof(struct bpf_sock_ops, netns):
+#ifdef CONFIG_NET_NS
+ /* Loading: sk_ops->sk->__sk_common.skc_net.net->ns.inum
+ * Type: (struct bpf_sock_ops_kern *)
+ * ->(struct sock *)
+ * ->(struct sock_common)
+ * .possible_net_t
+ * .(struct net *)
+ * ->(struct ns_common)
+ * .(unsigned int)
+ */
+ BUILD_BUG_ON(offsetof(struct sock, __sk_common) != 0);
+ BUILD_BUG_ON(offsetof(possible_net_t, net) != 0);
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ possible_net_t, net),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_net));
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct ns_common, inum),
+ si->dst_reg, si->dst_reg,
+ offsetof(struct net, ns) +
+ offsetof(struct ns_common, inum));
+#else
+ *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
+#endif
+ break;
+
}
return insn - insn_buf;
}
--
2.20.1


2019-04-12 10:03:10

by Alban Crequy

[permalink] [raw]
Subject: [PATCH bpf-next v1 2/2] selftests: bpf: read netns from struct bpf_sock_ops

From: Alban Crequy <[email protected]>

This shows how a sockops program could be restricted to a specific
network namespace. The sockops program looks at the current netns via
(struct bpf_sock_ops)->netns and checks if the value matches the
configuration in the new BPF map "sock_netns".

The test program ./test_sockmap accepts a new parameter "--netns"; the
default value is the current netns found by stat() on /proc/self/ns/net,
so the previous tests still pass:

sudo ./test_sockmap
...
Summary: 412 PASSED 0 FAILED
...
Summary: 824 PASSED 0 FAILED

I run my additional test in the following way:

NETNS=$(readlink /proc/self/ns/net | sed 's/^net:\[\(.*\)\]$/\1/')
CGR=/sys/fs/cgroup/unified/user.slice/user-1000.slice/session-5.scope/
sudo ./test_sockmap --cgroup $CGR --netns $NETNS &

cat /sys/kernel/debug/tracing/trace_pipe

echo foo | nc -l 127.0.0.1 8080 &
echo bar | nc 127.0.0.1 8080

=> the connection goes through the sockmap

When testing with a wrong $NETNS, I get the trace_pipe log:
> not binding connection on netns 4026531992

Signed-off-by: Alban Crequy <[email protected]>
---
tools/include/uapi/linux/bpf.h | 1 +
tools/testing/selftests/bpf/test_sockmap.c | 38 +++++++++++++++++--
.../testing/selftests/bpf/test_sockmap_kern.h | 19 ++++++++++
3 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 31a27dd337dc..5afaab25f205 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3069,6 +3069,7 @@ struct bpf_sock_ops {
__u32 sk_txhash;
__u64 bytes_received;
__u64 bytes_acked;
+ __u64 netns;
};

/* Definitions for bpf_sock_ops_cb_flags */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 3845144e2c91..5a1b9c96fca1 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -2,6 +2,7 @@
// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/select.h>
@@ -21,6 +22,7 @@
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/sendfile.h>
+#include <sys/stat.h>

#include <linux/netlink.h>
#include <linux/socket.h>
@@ -63,8 +65,8 @@ int s1, s2, c1, c2, p1, p2;
int test_cnt;
int passed;
int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
int prog_fd[11];

int txmsg_pass;
@@ -84,6 +86,7 @@ int txmsg_ingress;
int txmsg_skb;
int ktls;
int peek_flag;
+uint64_t netns_opt;

static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@@ -111,6 +114,7 @@ static const struct option long_options[] = {
{"txmsg_skb", no_argument, &txmsg_skb, 1 },
{"ktls", no_argument, &ktls, 1 },
{"peek", no_argument, &peek_flag, 1 },
+ {"netns", required_argument, NULL, 'n'},
{0, 0, NULL, 0 }
};

@@ -1585,6 +1589,7 @@ char *map_names[] = {
"sock_bytes",
"sock_redir_flags",
"sock_skb_opts",
+ "sock_netns",
};

int prog_attach_type[] = {
@@ -1619,6 +1624,8 @@ static int populate_progs(char *bpf_file)
struct bpf_object *obj;
int i = 0;
long err;
+ struct stat netns_sb;
+ uint64_t netns_ino;

obj = bpf_object__open(bpf_file);
err = libbpf_get_error(obj);
@@ -1655,6 +1662,28 @@ static int populate_progs(char *bpf_file)
}
}

+ if (netns_opt == 0) {
+ err = stat("/proc/self/ns/net", &netns_sb);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: cannot stat network namespace: %ld (%s)\n",
+ err, strerror(errno));
+ return -1;
+ }
+ netns_ino = netns_sb.st_ino;
+ } else {
+ netns_ino = netns_opt;
+ }
+ i = 1;
+ err = bpf_map_update_elem(map_fd[8], &netns_ino, &i, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (netns): %ld (%s)\n",
+ err, strerror(errno));
+ return -1;
+ }
+
+
return 0;
}

@@ -1738,7 +1767,7 @@ int main(int argc, char **argv)
if (argc < 2)
return test_suite(-1);

- while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:n:",
long_options, &longindex)) != -1) {
switch (opt) {
case 's':
@@ -1805,6 +1834,9 @@ int main(int argc, char **argv)
return -1;
}
break;
+ case 'n':
+ netns_opt = strtoull(optarg, NULL, 10);
+ break;
case 0:
break;
case 'h':
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
index e7639f66a941..3bad9c70376b 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -91,6 +91,13 @@ struct bpf_map_def SEC("maps") sock_skb_opts = {
.max_entries = 1
};

+struct bpf_map_def SEC("maps") sock_netns = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u64),
+ .value_size = sizeof(int),
+ .max_entries = 16
+};
+
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
@@ -132,9 +139,21 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
int op, err = 0, index, key, ret;
+ int i = 0;
+ __u64 netns;
+ int *allowed;


op = (int) skops->op;
+ netns = skops->netns;
+ bpf_printk("bpf_sockmap: netns = %lu\n", netns);
+
+ // Only allow sockmap connection on the configured network namespace
+ allowed = bpf_map_lookup_elem(&sock_netns, &netns);
+ if (allowed == NULL || *allowed == 0) {
+ bpf_printk("not binding connection on netns %lu\n", netns);
+ return 0;
+ }

switch (op) {
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
--
2.20.1

2019-04-12 18:17:48

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1 1/2] bpf: sock ops: add netns in bpf context

On Fri, Apr 12, 2019 at 3:02 AM Alban Crequy <[email protected]> wrote:
>
> From: Alban Crequy <[email protected]>
>
> sockops programs can now access the network namespace inode via
> (struct bpf_sock_ops)->netns. This can be useful to apply different
> policies on different network namespaces.
>
> In the unlikely case where network namespaces are not compiled in
> (CONFIG_NET_NS=n), the verifier will not allow access to ->netns.
>
> Signed-off-by: Alban Crequy <[email protected]>

Acked-by: Song Liu <[email protected]>

> ---
> include/uapi/linux/bpf.h | 1 +
> net/core/filter.c | 40 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 41 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 31a27dd337dc..5afaab25f205 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3069,6 +3069,7 @@ struct bpf_sock_ops {
> __u32 sk_txhash;
> __u64 bytes_received;
> __u64 bytes_acked;
> + __u64 netns;
> };
>
> /* Definitions for bpf_sock_ops_cb_flags */
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 22eb2edf5573..f5e75b6fecb2 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6758,6 +6758,14 @@ static bool sock_ops_is_valid_access(int off, int size,
> }
> } else {
> switch (off) {
> + case offsetof(struct bpf_sock_ops, netns):
> +#ifdef CONFIG_NET_NS
> + if (size != sizeof(__u64))
> + return false;
> +#else
> + return false;
> +#endif
> + break;
> case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
> bytes_acked):
> if (size != sizeof(__u64))
> @@ -7908,6 +7916,38 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
> SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
> struct sock, type);
> break;
> +
> + case offsetof(struct bpf_sock_ops, netns):
> +#ifdef CONFIG_NET_NS
> + /* Loading: sk_ops->sk->__sk_common.skc_net.net->ns.inum
> + * Type: (struct bpf_sock_ops_kern *)
> + * ->(struct sock *)
> + * ->(struct sock_common)
> + * .possible_net_t
> + * .(struct net *)
> + * ->(struct ns_common)
> + * .(unsigned int)
> + */
> + BUILD_BUG_ON(offsetof(struct sock, __sk_common) != 0);
> + BUILD_BUG_ON(offsetof(possible_net_t, net) != 0);
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct bpf_sock_ops_kern, sk),
> + si->dst_reg, si->src_reg,
> + offsetof(struct bpf_sock_ops_kern, sk));
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + possible_net_t, net),
> + si->dst_reg, si->dst_reg,
> + offsetof(struct sock_common, skc_net));
> + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
> + struct ns_common, inum),
> + si->dst_reg, si->dst_reg,
> + offsetof(struct net, ns) +
> + offsetof(struct ns_common, inum));
> +#else
> + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
> +#endif
> + break;
> +
> }
> return insn - insn_buf;
> }
> --
> 2.20.1
>

2019-04-12 18:22:07

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1 2/2] selftests: bpf: read netns from struct bpf_sock_ops

On Fri, Apr 12, 2019 at 3:02 AM Alban Crequy <[email protected]> wrote:
>
> From: Alban Crequy <[email protected]>
>
> This shows how a sockops program could be restricted to a specific
> network namespace. The sockops program looks at the current netns via
> (struct bpf_sock_ops)->netns and checks if the value matches the
> configuration in the new BPF map "sock_netns".
>
> The test program ./test_sockmap accepts a new parameter "--netns"; the
> default value is the current netns found by stat() on /proc/self/ns/net,
> so the previous tests still pass:
>
> sudo ./test_sockmap
> ...
> Summary: 412 PASSED 0 FAILED
> ...
> Summary: 824 PASSED 0 FAILED
>
> I run my additional test in the following way:
>
> NETNS=$(readlink /proc/self/ns/net | sed 's/^net:\[\(.*\)\]$/\1/')
> CGR=/sys/fs/cgroup/unified/user.slice/user-1000.slice/session-5.scope/
> sudo ./test_sockmap --cgroup $CGR --netns $NETNS &
>
> cat /sys/kernel/debug/tracing/trace_pipe
>
> echo foo | nc -l 127.0.0.1 8080 &
> echo bar | nc 127.0.0.1 8080
>
> => the connection goes through the sockmap
>
> When testing with a wrong $NETNS, I get the trace_pipe log:
> > not binding connection on netns 4026531992
>
> Signed-off-by: Alban Crequy <[email protected]>

Acked-by: Song Liu <[email protected]>

I think we should also add verifier tests for this?

Thanks,
Song

> ---
> tools/include/uapi/linux/bpf.h | 1 +
> tools/testing/selftests/bpf/test_sockmap.c | 38 +++++++++++++++++--
> .../testing/selftests/bpf/test_sockmap_kern.h | 19 ++++++++++
> 3 files changed, 55 insertions(+), 3 deletions(-)
>
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 31a27dd337dc..5afaab25f205 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -3069,6 +3069,7 @@ struct bpf_sock_ops {
> __u32 sk_txhash;
> __u64 bytes_received;
> __u64 bytes_acked;
> + __u64 netns;
> };
>
> /* Definitions for bpf_sock_ops_cb_flags */
> diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
> index 3845144e2c91..5a1b9c96fca1 100644
> --- a/tools/testing/selftests/bpf/test_sockmap.c
> +++ b/tools/testing/selftests/bpf/test_sockmap.c
> @@ -2,6 +2,7 @@
> // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
> #include <stdio.h>
> #include <stdlib.h>
> +#include <stdint.h>
> #include <sys/socket.h>
> #include <sys/ioctl.h>
> #include <sys/select.h>
> @@ -21,6 +22,7 @@
> #include <sys/resource.h>
> #include <sys/types.h>
> #include <sys/sendfile.h>
> +#include <sys/stat.h>
>
> #include <linux/netlink.h>
> #include <linux/socket.h>
> @@ -63,8 +65,8 @@ int s1, s2, c1, c2, p1, p2;
> int test_cnt;
> int passed;
> int failed;
> -int map_fd[8];
> -struct bpf_map *maps[8];
> +int map_fd[9];
> +struct bpf_map *maps[9];
> int prog_fd[11];
>
> int txmsg_pass;
> @@ -84,6 +86,7 @@ int txmsg_ingress;
> int txmsg_skb;
> int ktls;
> int peek_flag;
> +uint64_t netns_opt;
>
> static const struct option long_options[] = {
> {"help", no_argument, NULL, 'h' },
> @@ -111,6 +114,7 @@ static const struct option long_options[] = {
> {"txmsg_skb", no_argument, &txmsg_skb, 1 },
> {"ktls", no_argument, &ktls, 1 },
> {"peek", no_argument, &peek_flag, 1 },
> + {"netns", required_argument, NULL, 'n'},
> {0, 0, NULL, 0 }
> };
>
> @@ -1585,6 +1589,7 @@ char *map_names[] = {
> "sock_bytes",
> "sock_redir_flags",
> "sock_skb_opts",
> + "sock_netns",
> };
>
> int prog_attach_type[] = {
> @@ -1619,6 +1624,8 @@ static int populate_progs(char *bpf_file)
> struct bpf_object *obj;
> int i = 0;
> long err;
> + struct stat netns_sb;
> + uint64_t netns_ino;
>
> obj = bpf_object__open(bpf_file);
> err = libbpf_get_error(obj);
> @@ -1655,6 +1662,28 @@ static int populate_progs(char *bpf_file)
> }
> }
>
> + if (netns_opt == 0) {
> + err = stat("/proc/self/ns/net", &netns_sb);
> + if (err) {
> + fprintf(stderr,
> + "ERROR: cannot stat network namespace: %ld (%s)\n",
> + err, strerror(errno));
> + return -1;
> + }
> + netns_ino = netns_sb.st_ino;
> + } else {
> + netns_ino = netns_opt;
> + }
> + i = 1;
> + err = bpf_map_update_elem(map_fd[8], &netns_ino, &i, BPF_ANY);
> + if (err) {
> + fprintf(stderr,
> + "ERROR: bpf_map_update_elem (netns): %ld (%s)\n",
> + err, strerror(errno));
> + return -1;
> + }
> +
> +
> return 0;
> }
>
> @@ -1738,7 +1767,7 @@ int main(int argc, char **argv)
> if (argc < 2)
> return test_suite(-1);
>
> - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
> + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:n:",
> long_options, &longindex)) != -1) {
> switch (opt) {
> case 's':
> @@ -1805,6 +1834,9 @@ int main(int argc, char **argv)
> return -1;
> }
> break;
> + case 'n':
> + netns_opt = strtoull(optarg, NULL, 10);
> + break;
> case 0:
> break;
> case 'h':
> diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
> index e7639f66a941..3bad9c70376b 100644
> --- a/tools/testing/selftests/bpf/test_sockmap_kern.h
> +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
> @@ -91,6 +91,13 @@ struct bpf_map_def SEC("maps") sock_skb_opts = {
> .max_entries = 1
> };
>
> +struct bpf_map_def SEC("maps") sock_netns = {
> + .type = BPF_MAP_TYPE_HASH,
> + .key_size = sizeof(__u64),
> + .value_size = sizeof(int),
> + .max_entries = 16
> +};
> +
> SEC("sk_skb1")
> int bpf_prog1(struct __sk_buff *skb)
> {
> @@ -132,9 +139,21 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
> {
> __u32 lport, rport;
> int op, err = 0, index, key, ret;
> + int i = 0;
> + __u64 netns;
> + int *allowed;
>
>
> op = (int) skops->op;
> + netns = skops->netns;
> + bpf_printk("bpf_sockmap: netns = %lu\n", netns);
> +
> + // Only allow sockmap connection on the configured network namespace
> + allowed = bpf_map_lookup_elem(&sock_netns, &netns);
> + if (allowed == NULL || *allowed == 0) {
> + bpf_printk("not binding connection on netns %lu\n", netns);
> + return 0;
> + }
>
> switch (op) {
> case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
> --
> 2.20.1
>

2019-04-16 09:08:04

by Alban Crequy

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1 2/2] selftests: bpf: read netns from struct bpf_sock_ops

On Fri, Apr 12, 2019 at 8:21 PM Song Liu <[email protected]> wrote:
>
> On Fri, Apr 12, 2019 at 3:02 AM Alban Crequy <[email protected]> wrote:
> >
> > From: Alban Crequy <[email protected]>
> >
> > This shows how a sockops program could be restricted to a specific
> > network namespace. The sockops program looks at the current netns via
> > (struct bpf_sock_ops)->netns and checks if the value matches the
> > configuration in the new BPF map "sock_netns".
> >
> > The test program ./test_sockmap accepts a new parameter "--netns"; the
> > default value is the current netns found by stat() on /proc/self/ns/net,
> > so the previous tests still pass:
> >
> > sudo ./test_sockmap
> > ...
> > Summary: 412 PASSED 0 FAILED
> > ...
> > Summary: 824 PASSED 0 FAILED
> >
> > I run my additional test in the following way:
> >
> > NETNS=$(readlink /proc/self/ns/net | sed 's/^net:\[\(.*\)\]$/\1/')
> > CGR=/sys/fs/cgroup/unified/user.slice/user-1000.slice/session-5.scope/
> > sudo ./test_sockmap --cgroup $CGR --netns $NETNS &
> >
> > cat /sys/kernel/debug/tracing/trace_pipe
> >
> > echo foo | nc -l 127.0.0.1 8080 &
> > echo bar | nc 127.0.0.1 8080
> >
> > => the connection goes through the sockmap
> >
> > When testing with a wrong $NETNS, I get the trace_pipe log:
> > > not binding connection on netns 4026531992
> >
> > Signed-off-by: Alban Crequy <[email protected]>
>
> Acked-by: Song Liu <[email protected]>
>
> I think we should also add verifier tests for this?

Ok. In tools/testing/selftests/bpf/verifier/var_off.c I could add a
test with expected result = ACCEPT that reads the 'netns' field.

Were you thinking of something else or would that be enough?

Thanks,
Alban


> Thanks,
> Song
>
> > ---
> > tools/include/uapi/linux/bpf.h | 1 +
> > tools/testing/selftests/bpf/test_sockmap.c | 38 +++++++++++++++++--
> > .../testing/selftests/bpf/test_sockmap_kern.h | 19 ++++++++++
> > 3 files changed, 55 insertions(+), 3 deletions(-)
> >
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index 31a27dd337dc..5afaab25f205 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -3069,6 +3069,7 @@ struct bpf_sock_ops {
> > __u32 sk_txhash;
> > __u64 bytes_received;
> > __u64 bytes_acked;
> > + __u64 netns;
> > };
> >
> > /* Definitions for bpf_sock_ops_cb_flags */
> > diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
> > index 3845144e2c91..5a1b9c96fca1 100644
> > --- a/tools/testing/selftests/bpf/test_sockmap.c
> > +++ b/tools/testing/selftests/bpf/test_sockmap.c
> > @@ -2,6 +2,7 @@
> > // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
> > #include <stdio.h>
> > #include <stdlib.h>
> > +#include <stdint.h>
> > #include <sys/socket.h>
> > #include <sys/ioctl.h>
> > #include <sys/select.h>
> > @@ -21,6 +22,7 @@
> > #include <sys/resource.h>
> > #include <sys/types.h>
> > #include <sys/sendfile.h>
> > +#include <sys/stat.h>
> >
> > #include <linux/netlink.h>
> > #include <linux/socket.h>
> > @@ -63,8 +65,8 @@ int s1, s2, c1, c2, p1, p2;
> > int test_cnt;
> > int passed;
> > int failed;
> > -int map_fd[8];
> > -struct bpf_map *maps[8];
> > +int map_fd[9];
> > +struct bpf_map *maps[9];
> > int prog_fd[11];
> >
> > int txmsg_pass;
> > @@ -84,6 +86,7 @@ int txmsg_ingress;
> > int txmsg_skb;
> > int ktls;
> > int peek_flag;
> > +uint64_t netns_opt;
> >
> > static const struct option long_options[] = {
> > {"help", no_argument, NULL, 'h' },
> > @@ -111,6 +114,7 @@ static const struct option long_options[] = {
> > {"txmsg_skb", no_argument, &txmsg_skb, 1 },
> > {"ktls", no_argument, &ktls, 1 },
> > {"peek", no_argument, &peek_flag, 1 },
> > + {"netns", required_argument, NULL, 'n'},
> > {0, 0, NULL, 0 }
> > };
> >
> > @@ -1585,6 +1589,7 @@ char *map_names[] = {
> > "sock_bytes",
> > "sock_redir_flags",
> > "sock_skb_opts",
> > + "sock_netns",
> > };
> >
> > int prog_attach_type[] = {
> > @@ -1619,6 +1624,8 @@ static int populate_progs(char *bpf_file)
> > struct bpf_object *obj;
> > int i = 0;
> > long err;
> > + struct stat netns_sb;
> > + uint64_t netns_ino;
> >
> > obj = bpf_object__open(bpf_file);
> > err = libbpf_get_error(obj);
> > @@ -1655,6 +1662,28 @@ static int populate_progs(char *bpf_file)
> > }
> > }
> >
> > + if (netns_opt == 0) {
> > + err = stat("/proc/self/ns/net", &netns_sb);
> > + if (err) {
> > + fprintf(stderr,
> > + "ERROR: cannot stat network namespace: %ld (%s)\n",
> > + err, strerror(errno));
> > + return -1;
> > + }
> > + netns_ino = netns_sb.st_ino;
> > + } else {
> > + netns_ino = netns_opt;
> > + }
> > + i = 1;
> > + err = bpf_map_update_elem(map_fd[8], &netns_ino, &i, BPF_ANY);
> > + if (err) {
> > + fprintf(stderr,
> > + "ERROR: bpf_map_update_elem (netns): %ld (%s)\n",
> > + err, strerror(errno));
> > + return -1;
> > + }
> > +
> > +
> > return 0;
> > }
> >
> > @@ -1738,7 +1767,7 @@ int main(int argc, char **argv)
> > if (argc < 2)
> > return test_suite(-1);
> >
> > - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
> > + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:n:",
> > long_options, &longindex)) != -1) {
> > switch (opt) {
> > case 's':
> > @@ -1805,6 +1834,9 @@ int main(int argc, char **argv)
> > return -1;
> > }
> > break;
> > + case 'n':
> > + netns_opt = strtoull(optarg, NULL, 10);
> > + break;
> > case 0:
> > break;
> > case 'h':
> > diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
> > index e7639f66a941..3bad9c70376b 100644
> > --- a/tools/testing/selftests/bpf/test_sockmap_kern.h
> > +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
> > @@ -91,6 +91,13 @@ struct bpf_map_def SEC("maps") sock_skb_opts = {
> > .max_entries = 1
> > };
> >
> > +struct bpf_map_def SEC("maps") sock_netns = {
> > + .type = BPF_MAP_TYPE_HASH,
> > + .key_size = sizeof(__u64),
> > + .value_size = sizeof(int),
> > + .max_entries = 16
> > +};
> > +
> > SEC("sk_skb1")
> > int bpf_prog1(struct __sk_buff *skb)
> > {
> > @@ -132,9 +139,21 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
> > {
> > __u32 lport, rport;
> > int op, err = 0, index, key, ret;
> > + int i = 0;
> > + __u64 netns;
> > + int *allowed;
> >
> >
> > op = (int) skops->op;
> > + netns = skops->netns;
> > + bpf_printk("bpf_sockmap: netns = %lu\n", netns);
> > +
> > + // Only allow sockmap connection on the configured network namespace
> > + allowed = bpf_map_lookup_elem(&sock_netns, &netns);
> > + if (allowed == NULL || *allowed == 0) {
> > + bpf_printk("not binding connection on netns %lu\n", netns);
> > + return 0;
> > + }
> >
> > switch (op) {
> > case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
> > --
> > 2.20.1
> >

2019-04-16 21:51:54

by Song Liu

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1 2/2] selftests: bpf: read netns from struct bpf_sock_ops

On Tue, Apr 16, 2019 at 2:07 AM Alban Crequy <[email protected]> wrote:
>
> On Fri, Apr 12, 2019 at 8:21 PM Song Liu <[email protected]> wrote:
> >
> > On Fri, Apr 12, 2019 at 3:02 AM Alban Crequy <[email protected]> wrote:
> > >
> > > From: Alban Crequy <[email protected]>
> > >
> > > This shows how a sockops program could be restricted to a specific
> > > network namespace. The sockops program looks at the current netns via
> > > (struct bpf_sock_ops)->netns and checks if the value matches the
> > > configuration in the new BPF map "sock_netns".
> > >
> > > The test program ./test_sockmap accepts a new parameter "--netns"; the
> > > default value is the current netns found by stat() on /proc/self/ns/net,
> > > so the previous tests still pass:
> > >
> > > sudo ./test_sockmap
> > > ...
> > > Summary: 412 PASSED 0 FAILED
> > > ...
> > > Summary: 824 PASSED 0 FAILED
> > >
> > > I run my additional test in the following way:
> > >
> > > NETNS=$(readlink /proc/self/ns/net | sed 's/^net:\[\(.*\)\]$/\1/')
> > > CGR=/sys/fs/cgroup/unified/user.slice/user-1000.slice/session-5.scope/
> > > sudo ./test_sockmap --cgroup $CGR --netns $NETNS &
> > >
> > > cat /sys/kernel/debug/tracing/trace_pipe
> > >
> > > echo foo | nc -l 127.0.0.1 8080 &
> > > echo bar | nc 127.0.0.1 8080
> > >
> > > => the connection goes through the sockmap
> > >
> > > When testing with a wrong $NETNS, I get the trace_pipe log:
> > > > not binding connection on netns 4026531992
> > >
> > > Signed-off-by: Alban Crequy <[email protected]>
> >
> > Acked-by: Song Liu <[email protected]>
> >
> > I think we should also add verifier tests for this?
>
> Ok. In tools/testing/selftests/bpf/verifier/var_off.c I could add a
> test with expected result = ACCEPT that reads the 'netns' field.

That sounds good.

Thanks,
Song

>
> Were you thinking of something else or would that be enough?
>
> Thanks,
> Alban
>
>
> > Thanks,
> > Song
> >
> > > ---
> > > tools/include/uapi/linux/bpf.h | 1 +
> > > tools/testing/selftests/bpf/test_sockmap.c | 38 +++++++++++++++++--
> > > .../testing/selftests/bpf/test_sockmap_kern.h | 19 ++++++++++
> > > 3 files changed, 55 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > > index 31a27dd337dc..5afaab25f205 100644
> > > --- a/tools/include/uapi/linux/bpf.h
> > > +++ b/tools/include/uapi/linux/bpf.h
> > > @@ -3069,6 +3069,7 @@ struct bpf_sock_ops {
> > > __u32 sk_txhash;
> > > __u64 bytes_received;
> > > __u64 bytes_acked;
> > > + __u64 netns;
> > > };
> > >
> > > /* Definitions for bpf_sock_ops_cb_flags */
> > > diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
> > > index 3845144e2c91..5a1b9c96fca1 100644
> > > --- a/tools/testing/selftests/bpf/test_sockmap.c
> > > +++ b/tools/testing/selftests/bpf/test_sockmap.c
> > > @@ -2,6 +2,7 @@
> > > // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
> > > #include <stdio.h>
> > > #include <stdlib.h>
> > > +#include <stdint.h>
> > > #include <sys/socket.h>
> > > #include <sys/ioctl.h>
> > > #include <sys/select.h>
> > > @@ -21,6 +22,7 @@
> > > #include <sys/resource.h>
> > > #include <sys/types.h>
> > > #include <sys/sendfile.h>
> > > +#include <sys/stat.h>
> > >
> > > #include <linux/netlink.h>
> > > #include <linux/socket.h>
> > > @@ -63,8 +65,8 @@ int s1, s2, c1, c2, p1, p2;
> > > int test_cnt;
> > > int passed;
> > > int failed;
> > > -int map_fd[8];
> > > -struct bpf_map *maps[8];
> > > +int map_fd[9];
> > > +struct bpf_map *maps[9];
> > > int prog_fd[11];
> > >
> > > int txmsg_pass;
> > > @@ -84,6 +86,7 @@ int txmsg_ingress;
> > > int txmsg_skb;
> > > int ktls;
> > > int peek_flag;
> > > +uint64_t netns_opt;
> > >
> > > static const struct option long_options[] = {
> > > {"help", no_argument, NULL, 'h' },
> > > @@ -111,6 +114,7 @@ static const struct option long_options[] = {
> > > {"txmsg_skb", no_argument, &txmsg_skb, 1 },
> > > {"ktls", no_argument, &ktls, 1 },
> > > {"peek", no_argument, &peek_flag, 1 },
> > > + {"netns", required_argument, NULL, 'n'},
> > > {0, 0, NULL, 0 }
> > > };
> > >
> > > @@ -1585,6 +1589,7 @@ char *map_names[] = {
> > > "sock_bytes",
> > > "sock_redir_flags",
> > > "sock_skb_opts",
> > > + "sock_netns",
> > > };
> > >
> > > int prog_attach_type[] = {
> > > @@ -1619,6 +1624,8 @@ static int populate_progs(char *bpf_file)
> > > struct bpf_object *obj;
> > > int i = 0;
> > > long err;
> > > + struct stat netns_sb;
> > > + uint64_t netns_ino;
> > >
> > > obj = bpf_object__open(bpf_file);
> > > err = libbpf_get_error(obj);
> > > @@ -1655,6 +1662,28 @@ static int populate_progs(char *bpf_file)
> > > }
> > > }
> > >
> > > + if (netns_opt == 0) {
> > > + err = stat("/proc/self/ns/net", &netns_sb);
> > > + if (err) {
> > > + fprintf(stderr,
> > > + "ERROR: cannot stat network namespace: %ld (%s)\n",
> > > + err, strerror(errno));
> > > + return -1;
> > > + }
> > > + netns_ino = netns_sb.st_ino;
> > > + } else {
> > > + netns_ino = netns_opt;
> > > + }
> > > + i = 1;
> > > + err = bpf_map_update_elem(map_fd[8], &netns_ino, &i, BPF_ANY);
> > > + if (err) {
> > > + fprintf(stderr,
> > > + "ERROR: bpf_map_update_elem (netns): %ld (%s)\n",
> > > + err, strerror(errno));
> > > + return -1;
> > > + }
> > > +
> > > +
> > > return 0;
> > > }
> > >
> > > @@ -1738,7 +1767,7 @@ int main(int argc, char **argv)
> > > if (argc < 2)
> > > return test_suite(-1);
> > >
> > > - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
> > > + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:n:",
> > > long_options, &longindex)) != -1) {
> > > switch (opt) {
> > > case 's':
> > > @@ -1805,6 +1834,9 @@ int main(int argc, char **argv)
> > > return -1;
> > > }
> > > break;
> > > + case 'n':
> > > + netns_opt = strtoull(optarg, NULL, 10);
> > > + break;
> > > case 0:
> > > break;
> > > case 'h':
> > > diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
> > > index e7639f66a941..3bad9c70376b 100644
> > > --- a/tools/testing/selftests/bpf/test_sockmap_kern.h
> > > +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
> > > @@ -91,6 +91,13 @@ struct bpf_map_def SEC("maps") sock_skb_opts = {
> > > .max_entries = 1
> > > };
> > >
> > > +struct bpf_map_def SEC("maps") sock_netns = {
> > > + .type = BPF_MAP_TYPE_HASH,
> > > + .key_size = sizeof(__u64),
> > > + .value_size = sizeof(int),
> > > + .max_entries = 16
> > > +};
> > > +
> > > SEC("sk_skb1")
> > > int bpf_prog1(struct __sk_buff *skb)
> > > {
> > > @@ -132,9 +139,21 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
> > > {
> > > __u32 lport, rport;
> > > int op, err = 0, index, key, ret;
> > > + int i = 0;
> > > + __u64 netns;
> > > + int *allowed;
> > >
> > >
> > > op = (int) skops->op;
> > > + netns = skops->netns;
> > > + bpf_printk("bpf_sockmap: netns = %lu\n", netns);
> > > +
> > > + // Only allow sockmap connection on the configured network namespace
> > > + allowed = bpf_map_lookup_elem(&sock_netns, &netns);
> > > + if (allowed == NULL || *allowed == 0) {
> > > + bpf_printk("not binding connection on netns %lu\n", netns);
> > > + return 0;
> > > + }
> > >
> > > switch (op) {
> > > case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
> > > --
> > > 2.20.1
> > >

2019-04-17 03:01:42

by Alexei Starovoitov

[permalink] [raw]
Subject: Re: [PATCH bpf-next v1 1/2] bpf: sock ops: add netns in bpf context

On Fri, Apr 12, 2019 at 3:01 AM Alban Crequy <[email protected]> wrote:
>
> From: Alban Crequy <[email protected]>
>
> sockops programs can now access the network namespace inode via
> (struct bpf_sock_ops)->netns. This can be useful to apply different
> policies on different network namespaces.
>
> In the unlikely case where network namespaces are not compiled in
> (CONFIG_NET_NS=n), the verifier will not allow access to ->netns.
>
> Signed-off-by: Alban Crequy <[email protected]>
> ---
> include/uapi/linux/bpf.h | 1 +
> net/core/filter.c | 40 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 41 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 31a27dd337dc..5afaab25f205 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3069,6 +3069,7 @@ struct bpf_sock_ops {
> __u32 sk_txhash;
> __u64 bytes_received;
> __u64 bytes_acked;
> + __u64 netns;
> };

stat /proc/self/ns/net
returns inode and device.
inum only is not enough to identify netns.
Both need to be exposed. See netns_dev/netns_ino in bpf.h