This option makes it possible to programatically bind sockets
to netdevices. With the help of this option sockets
of VRF unaware applications could be distributed between
multiple VRFs with an eBPF program. This lets the applications
benefit from multiple possible routes.
v2:
- splitting up the patch to three parts
- lock_sk parameter for optional locking in sock_bindtoindex - Stanislav Fomichev
- testing the SO_BINDTODEVICE option - Andrii Nakryiko
Ferenc Fejes (3):
net: Make locking in sock_bindtoindex optional
bpf: Allow SO_BINDTODEVICE opt in bpf_setsockopt
selftests/bpf: Add test for SO_BINDTODEVICE opt of bpf_setsockopt
include/net/sock.h | 2 +-
net/core/filter.c | 27 ++++++++++++++-
net/core/sock.c | 10 +++---
net/ipv4/udp_tunnel.c | 2 +-
net/ipv6/ip6_udp_tunnel.c | 2 +-
.../selftests/bpf/progs/connect4_prog.c | 33 +++++++++++++++++++
6 files changed, 68 insertions(+), 8 deletions(-)
--
2.17.1
Extending the supported sockopts in bpf_setsockopt with
SO_BINDTODEVICE. We call sock_bindtoindex with parameter
lock_sk = false in this context because we already owning
the socket.
Signed-off-by: Ferenc Fejes <[email protected]>
---
net/core/filter.c | 27 ++++++++++++++++++++++++++-
1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index bd2853d23b50..a0958c5ef127 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4248,6 +4248,9 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
+ char devname[IFNAMSIZ];
+ struct net *net;
+ int ifindex;
int ret = 0;
int val;
@@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sock_owned_by_me(sk);
if (level == SOL_SOCKET) {
- if (optlen != sizeof(int))
+ if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
@@ -4298,6 +4301,29 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sk_dst_reset(sk);
}
break;
+ case SO_BINDTODEVICE:
+ ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ optlen = min_t(long, optlen, IFNAMSIZ - 1);
+ strncpy(devname, optval, optlen);
+ devname[optlen] = 0;
+
+ ifindex = 0;
+ if (devname[0] != '\0') {
+ struct net_device *dev;
+
+ ret = -ENODEV;
+
+ net = sock_net(sk);
+ dev = dev_get_by_name(net, devname);
+ if (!dev)
+ break;
+ ifindex = dev->ifindex;
+ dev_put(dev);
+ }
+ ret = sock_bindtoindex(sk, ifindex, false);
+#endif
+ break;
default:
ret = -EINVAL;
}
--
2.17.1
The sock_bindtoindex intended for kernel wide usage however
it will lock the socket regardless of the context. This modification
relax this behavior optionally: locking the socket will be optional
by calling the sock_bindtoindex with lock_sk = true.
The modification applied to all users of the sock_bindtoindex.
Signed-off-by: Ferenc Fejes <[email protected]>
---
include/net/sock.h | 2 +-
net/core/sock.c | 10 ++++++----
net/ipv4/udp_tunnel.c | 2 +-
net/ipv6/ip6_udp_tunnel.c | 2 +-
4 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index d994daa418ec..098d97d9fe72 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,7 +2688,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
void sock_def_readable(struct sock *sk);
-int sock_bindtoindex(struct sock *sk, int ifindex);
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
void sock_enable_timestamps(struct sock *sk);
void sock_no_linger(struct sock *sk);
void sock_set_keepalive(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 2ca3425b519c..c7ee722cfbe9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -594,13 +594,15 @@ static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
return ret;
}
-int sock_bindtoindex(struct sock *sk, int ifindex)
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
{
int ret;
- lock_sock(sk);
+ if (lock_sk)
+ lock_sock(sk);
ret = sock_bindtoindex_locked(sk, ifindex);
- release_sock(sk);
+ if (lock_sk)
+ release_sock(sk);
return ret;
}
@@ -646,7 +648,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
goto out;
}
- return sock_bindtoindex(sk, index);
+ return sock_bindtoindex(sk, index, true);
out:
#endif
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 2158e8bddf41..3eecba0874aa 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,7 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
goto error;
if (cfg->bind_ifindex) {
- err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true);
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 2e0ad1bc84a8..cdc4d4ee2420 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -30,7 +30,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
}
if (cfg->bind_ifindex) {
- err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
+ err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true);
if (err < 0)
goto error;
}
--
2.17.1
This test intended to verify if SO_BINDTODEVICE option works in
bpf_setsockopt. Because we already in the SOL_SOCKET level in this
connect bpf prog its safe to verify the sanity in the beginning of
the connect_v4_prog by calling the bind_to_device test helper.
The testing environment already created by the test_sock_addr.sh
script so this test assume that two netdevices already existing in
the system: veth pair with names test_sock_addr1 and test_sock_addr2.
The test will try to bind the socket to those devices first.
Then the test assume there are no netdevice with "nonexistent_dev"
name so the bpf_setsockopt will give use ENODEV error.
At the end the test remove the device binding from the socket
by binding it to an empty name.
Signed-off-by: Ferenc Fejes <[email protected]>
---
.../selftests/bpf/progs/connect4_prog.c | 33 +++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index c2c85c31cffd..1ab2c5eba86c 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -9,6 +9,8 @@
#include <linux/in6.h>
#include <sys/socket.h>
#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
@@ -21,6 +23,10 @@
#define TCP_CA_NAME_MAX 16
#endif
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
int _version SEC("version") = 1;
__attribute__ ((noinline))
@@ -75,6 +81,29 @@ static __inline int set_cc(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+ char veth1[IFNAMSIZ] = "test_sock_addr1";
+ char veth2[IFNAMSIZ] = "test_sock_addr2";
+ char missing[IFNAMSIZ] = "nonexistent_dev";
+ char del_bind[IFNAMSIZ] = "";
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth1, sizeof(veth1)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth2, sizeof(veth2)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &missing, sizeof(missing)) != -ENODEV)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &del_bind, sizeof(del_bind)))
+ return 1;
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -88,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
+ /* Bind to device and unbind it. */
+ if (bind_to_device(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
--
2.17.1
On Sat, May 30, 2020 at 2:11 PM Ferenc Fejes <[email protected]> wrote:
>
> This option makes it possible to programatically bind sockets
> to netdevices. With the help of this option sockets
> of VRF unaware applications could be distributed between
> multiple VRFs with an eBPF program. This lets the applications
> benefit from multiple possible routes.
>
> v2:
> - splitting up the patch to three parts
> - lock_sk parameter for optional locking in sock_bindtoindex - Stanislav Fomichev
> - testing the SO_BINDTODEVICE option - Andrii Nakryiko
Applied. Thanks