This option makes possible to programatically bind sockets to netdevices.
With the help of this option sockets of VRF unaware applications
could be distributed between multiple VRFs with eBPF sock_ops program.
This let the applications benefit from the multiple possible routes.
Signed-off-by: Ferenc Fejes <[email protected]>
---
net/core/filter.c | 39 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/net/core/filter.c b/net/core/filter.c
index 822d662f97ef..25dac75bfc5d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4248,6 +4248,9 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
static int _bpf_setsockopt(struct sock *sk, int level, int optname,
char *optval, int optlen, u32 flags)
{
+ char devname[IFNAMSIZ];
+ struct net *net;
+ int ifindex;
int ret = 0;
int val;
@@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sock_owned_by_me(sk);
if (level == SOL_SOCKET) {
- if (optlen != sizeof(int))
+ if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
return -EINVAL;
val = *((int *)optval);
@@ -4298,6 +4301,40 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
sk_dst_reset(sk);
}
break;
+ case SO_BINDTODEVICE:
+ ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ net = sock_net(sk);
+ strncpy(devname, optval,
+ min_t(long, optlen, IFNAMSIZ-1));
+ devname[IFNAMSIZ-1] = 0;
+ ifindex = 0;
+ if (devname[0] != '\0') {
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(net, devname);
+ if (dev)
+ ifindex = dev->ifindex;
+ rcu_read_unlock();
+ ret = -ENODEV;
+ if (!dev)
+ break;
+ }
+ ret = -EPERM;
+ if (sk->sk_bound_dev_if &&
+ !ns_capable(net->user_ns, CAP_NET_RAW))
+ break;
+ ret = -EINVAL;
+ if (ifindex < 0)
+ break;
+ sk->sk_bound_dev_if = ifindex;
+ if (sk->sk_prot->rehash)
+ sk->sk_prot->rehash(sk);
+ sk_dst_reset(sk);
+ ret = 0;
+#endif
+ break;
default:
ret = -EINVAL;
}
--
2.17.1
On Thu, May 21, 2020 at 5:54 AM Ferenc Fejes <[email protected]> wrote:
>
> This option makes possible to programatically bind sockets to netdevices.
> With the help of this option sockets of VRF unaware applications
> could be distributed between multiple VRFs with eBPF sock_ops program.
> This let the applications benefit from the multiple possible routes.
>
> Signed-off-by: Ferenc Fejes <[email protected]>
> ---
> net/core/filter.c | 39 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 38 insertions(+), 1 deletion(-)
>
I'll let more networking-familiar folks to comment on functionality,
but features like this needs tests in selftest/bpf.
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 822d662f97ef..25dac75bfc5d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
[...]
On 05/21, Ferenc Fejes wrote:
> This option makes possible to programatically bind sockets to netdevices.
> With the help of this option sockets of VRF unaware applications
> could be distributed between multiple VRFs with eBPF sock_ops program.
> This let the applications benefit from the multiple possible routes.
> Signed-off-by: Ferenc Fejes <[email protected]>
> ---
> net/core/filter.c | 39 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 38 insertions(+), 1 deletion(-)
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 822d662f97ef..25dac75bfc5d 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4248,6 +4248,9 @@ static const struct bpf_func_proto
> bpf_get_socket_uid_proto = {
> static int _bpf_setsockopt(struct sock *sk, int level, int optname,
> char *optval, int optlen, u32 flags)
> {
> + char devname[IFNAMSIZ];
> + struct net *net;
> + int ifindex;
> int ret = 0;
> int val;
> @@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int
> level, int optname,
> sock_owned_by_me(sk);
> if (level == SOL_SOCKET) {
> - if (optlen != sizeof(int))
> + if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
> return -EINVAL;
> val = *((int *)optval);
> @@ -4298,6 +4301,40 @@ static int _bpf_setsockopt(struct sock *sk, int
> level, int optname,
> sk_dst_reset(sk);
> }
> break;
> + case SO_BINDTODEVICE:
> + ret = -ENOPROTOOPT;
> +#ifdef CONFIG_NETDEVICES
Any specific reason you're not reusing sock_setbindtodevice or at least
sock_setbindtodevice_locked here? I think, historically, we've
reimplemented some of the sockopts because they were 'easy' (i.e.
were just setting a flag in the socket), this one looks more involved.
I'd suggest, add an optional 'lock_sk' argument to sock_setbindtodevice,
call it with 'true' from real setsockopt, and call it with 'false'
here.
And, as Andrii pointed out, it would be nice to have a selftest
that exercises this new option.
> Any specific reason you're not reusing sock_setbindtodevice or at least
> sock_setbindtodevice_locked here? I think, historically, we've
> reimplemented some of the sockopts because they were 'easy' (i.e.
> were just setting a flag in the socket), this one looks more involved.
Yes, there is a copy_from_user in the sock_setbindtodevice for copying
the ioctl netdev name from the user which (I think) not necessary
here. However sock_setbindtodevice_locked is the way to go but I was
afraid to forward declare it in sock.h, change the linkage and export
it in sock.c (I find that a little bit too intrusive).
> I'd suggest, add an optional 'lock_sk' argument to sock_setbindtodevice,
> call it with 'true' from real setsockopt, and call it with 'false'
> here.
Thanks for the advice. However I think I'll wait what happens with
this patch: https://lore.kernel.org/netdev/[email protected]/T/#u
Very strange coincidence that patch was submitted a few hours before
mine (but I noticed just now) and refactor the sock_setbindtodevice in
a way that will useful in my case (also define it in sock.h).
> And, as Andrii pointed out, it would be nice to have a selftest
> that exercises this new option.
Thanks, I will implement them in the next iteration.