2022-10-10 17:50:42

by Kuniyuki Iwashima

[permalink] [raw]
Subject: [PATCH v1 net 1/3] udp: Update reuse->has_conns under reuseport_lock.

When we call connect() for a UDP socket in a reuseport group, we have
to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel
could select a unconnected socket wrongly for packets sent to the
connected socket.

However, the current way to set has_conns is illegal and possible to
trigger that problem. reuseport_has_conns() changes has_conns under
rcu_read_lock(), which upgrades the RCU reader to the updater. Then,
it must do the update under the updater's lock, reuseport_lock, but
it doesn't for now.

For this reason, there is a race below where we fail to set has_conns
resulting in the wrong socket selection. To avoid the race, let's split
the reader and updater with proper locking.

cpu1 cpu2
+----+ +----+

__ip[46]_datagram_connect() reuseport_grow()
. .
|- reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size)
| . |
| |- rcu_read_lock()
| |- reuse = rcu_dereference(sk->sk_reuseport_cb)
| |
| | | /* reuse->has_conns == 0 here */
| | |- more_reuse->has_conns = reuse->has_conns
| |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */
| | |
| | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
| | | more_reuse)
| `- rcu_read_unlock() `- kfree_rcu(reuse, rcu)
|
|- sk->sk_state = TCP_ESTABLISHED

Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
Signed-off-by: Kuniyuki Iwashima <[email protected]>
---
include/net/sock_reuseport.h | 23 +++++++++++++++++------
net/ipv4/datagram.c | 2 +-
net/ipv4/udp.c | 2 +-
net/ipv6/datagram.c | 2 +-
net/ipv6/udp.c | 2 +-
5 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 473b0b0fa4ab..fe9779e6d90f 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -43,21 +43,32 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
extern int reuseport_detach_prog(struct sock *sk);

-static inline bool reuseport_has_conns(struct sock *sk, bool set)
+static inline bool reuseport_has_conns(struct sock *sk)
{
struct sock_reuseport *reuse;
bool ret = false;

rcu_read_lock();
reuse = rcu_dereference(sk->sk_reuseport_cb);
- if (reuse) {
- if (set)
- reuse->has_conns = 1;
- ret = reuse->has_conns;
- }
+ if (reuse && reuse->has_conns)
+ ret = true;
rcu_read_unlock();

return ret;
}

+static inline void reuseport_has_conns_set(struct sock *sk)
+{
+ struct sock_reuseport *reuse;
+
+ if (!rcu_access_pointer(sk->sk_reuseport_cb))
+ return;
+
+ spin_lock(&reuseport_lock);
+ reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock));
+ reuse->has_conns = 1;
+ spin_unlock(&reuseport_lock);
+}
+
#endif /* _SOCK_REUSEPORT_H */
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 405a8c2aea64..5e66add7befa 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
}
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
- reuseport_has_conns(sk, true);
+ reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
inet->inet_id = prandom_u32();
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d63118ce5900..29228231b058 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk, false))
+ if (result && !reuseport_has_conns(sk))
return result;

result = result ? : sk;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index df665d4e8f0f..5ecb56522f9d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
goto out;
}

- reuseport_has_conns(sk, true);
+ reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91e795bb9ade..56e4523a3004 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -182,7 +182,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
/* Fall back to scoring if group has connections */
- if (result && !reuseport_has_conns(sk, false))
+ if (result && !reuseport_has_conns(sk))
return result;

result = result ? : sk;
--
2.30.2


2022-10-11 02:16:49

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v1 net 1/3] udp: Update reuse->has_conns under reuseport_lock.

Hi Kuniyuki,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net/master]

url: https://github.com/intel-lab-lkp/linux/commits/Kuniyuki-Iwashima/soreuseport-Fix-issues-related-to-the-faster-selection-algorithm/20221011-015227
base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git af7d23f9d96a3e9647cff8619a6860d73b109b5f
config: powerpc-allmodconfig
compiler: powerpc-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/bb1587d84fea0a2b0b6b84691a92def51c5a7e89
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Kuniyuki-Iwashima/soreuseport-Fix-issues-related-to-the-faster-selection-algorithm/20221011-015227
git checkout bb1587d84fea0a2b0b6b84691a92def51c5a7e89
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>, old ones prefixed by <<):

>> ERROR: modpost: "reuseport_lock" [net/ipv6/ipv6.ko] undefined!

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (1.48 kB)
config (325.92 kB)
Download all attachments

2022-10-11 11:33:23

by Paolo Abeni

[permalink] [raw]
Subject: Re: [PATCH v1 net 1/3] udp: Update reuse->has_conns under reuseport_lock.

On Mon, 2022-10-10 at 10:43 -0700, Kuniyuki Iwashima wrote:
> When we call connect() for a UDP socket in a reuseport group, we have
> to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel
> could select a unconnected socket wrongly for packets sent to the
> connected socket.
>
> However, the current way to set has_conns is illegal and possible to
> trigger that problem. reuseport_has_conns() changes has_conns under
> rcu_read_lock(), which upgrades the RCU reader to the updater. Then,
> it must do the update under the updater's lock, reuseport_lock, but
> it doesn't for now.
>
> For this reason, there is a race below where we fail to set has_conns
> resulting in the wrong socket selection. To avoid the race, let's split
> the reader and updater with proper locking.
>
> cpu1 cpu2
> +----+ +----+
>
> __ip[46]_datagram_connect() reuseport_grow()
> . .
> > - reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size)
> > . |
> > |- rcu_read_lock()
> > |- reuse = rcu_dereference(sk->sk_reuseport_cb)
> > |
> > | | /* reuse->has_conns == 0 here */
> > | |- more_reuse->has_conns = reuse->has_conns
> > |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */
> > | |
> > | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
> > | | more_reuse)
> > `- rcu_read_unlock() `- kfree_rcu(reuse, rcu)
> >
> > - sk->sk_state = TCP_ESTABLISHED
>
> Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
> Signed-off-by: Kuniyuki Iwashima <[email protected]>
> ---
> include/net/sock_reuseport.h | 23 +++++++++++++++++------
> net/ipv4/datagram.c | 2 +-
> net/ipv4/udp.c | 2 +-
> net/ipv6/datagram.c | 2 +-
> net/ipv6/udp.c | 2 +-
> 5 files changed, 21 insertions(+), 10 deletions(-)
>
> diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> index 473b0b0fa4ab..fe9779e6d90f 100644
> --- a/include/net/sock_reuseport.h
> +++ b/include/net/sock_reuseport.h
> @@ -43,21 +43,32 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
> extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
> extern int reuseport_detach_prog(struct sock *sk);
>
> -static inline bool reuseport_has_conns(struct sock *sk, bool set)
> +static inline bool reuseport_has_conns(struct sock *sk)
> {
> struct sock_reuseport *reuse;
> bool ret = false;
>
> rcu_read_lock();
> reuse = rcu_dereference(sk->sk_reuseport_cb);
> - if (reuse) {
> - if (set)
> - reuse->has_conns = 1;
> - ret = reuse->has_conns;
> - }
> + if (reuse && reuse->has_conns)
> + ret = true;
> rcu_read_unlock();
>
> return ret;
> }
>
> +static inline void reuseport_has_conns_set(struct sock *sk)
> +{
> + struct sock_reuseport *reuse;
> +
> + if (!rcu_access_pointer(sk->sk_reuseport_cb))
> + return;
> +
> + spin_lock(&reuseport_lock);
> + reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> + lockdep_is_held(&reuseport_lock));
> + reuse->has_conns = 1;
> + spin_unlock(&reuseport_lock);
> +}

Since the above is not super critical, it's probably better move it
into sock_reuseport.c file and export it (to fix the build issue)

Cheers,

Paolo

2022-10-11 16:50:46

by Kuniyuki Iwashima

[permalink] [raw]
Subject: Re: [PATCH v1 net 1/3] udp: Update reuse->has_conns under reuseport_lock.

From: Paolo Abeni <[email protected]>
Date: Tue, 11 Oct 2022 12:50:52 +0200
> On Mon, 2022-10-10 at 10:43 -0700, Kuniyuki Iwashima wrote:
> > When we call connect() for a UDP socket in a reuseport group, we have
> > to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel
> > could select a unconnected socket wrongly for packets sent to the
> > connected socket.
> >
> > However, the current way to set has_conns is illegal and possible to
> > trigger that problem. reuseport_has_conns() changes has_conns under
> > rcu_read_lock(), which upgrades the RCU reader to the updater. Then,
> > it must do the update under the updater's lock, reuseport_lock, but
> > it doesn't for now.
> >
> > For this reason, there is a race below where we fail to set has_conns
> > resulting in the wrong socket selection. To avoid the race, let's split
> > the reader and updater with proper locking.
> >
> > cpu1 cpu2
> > +----+ +----+
> >
> > __ip[46]_datagram_connect() reuseport_grow()
> > . .
> > > - reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size)
> > > . |
> > > |- rcu_read_lock()
> > > |- reuse = rcu_dereference(sk->sk_reuseport_cb)
> > > |
> > > | | /* reuse->has_conns == 0 here */
> > > | |- more_reuse->has_conns = reuse->has_conns
> > > |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */
> > > | |
> > > | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
> > > | | more_reuse)
> > > `- rcu_read_unlock() `- kfree_rcu(reuse, rcu)
> > >
> > > - sk->sk_state = TCP_ESTABLISHED
> >
> > Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
> > Signed-off-by: Kuniyuki Iwashima <[email protected]>
> > ---
> > include/net/sock_reuseport.h | 23 +++++++++++++++++------
> > net/ipv4/datagram.c | 2 +-
> > net/ipv4/udp.c | 2 +-
> > net/ipv6/datagram.c | 2 +-
> > net/ipv6/udp.c | 2 +-
> > 5 files changed, 21 insertions(+), 10 deletions(-)
> >
> > diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> > index 473b0b0fa4ab..fe9779e6d90f 100644
> > --- a/include/net/sock_reuseport.h
> > +++ b/include/net/sock_reuseport.h
> > @@ -43,21 +43,32 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
> > extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
> > extern int reuseport_detach_prog(struct sock *sk);
> >
> > -static inline bool reuseport_has_conns(struct sock *sk, bool set)
> > +static inline bool reuseport_has_conns(struct sock *sk)
> > {
> > struct sock_reuseport *reuse;
> > bool ret = false;
> >
> > rcu_read_lock();
> > reuse = rcu_dereference(sk->sk_reuseport_cb);
> > - if (reuse) {
> > - if (set)
> > - reuse->has_conns = 1;
> > - ret = reuse->has_conns;
> > - }
> > + if (reuse && reuse->has_conns)
> > + ret = true;
> > rcu_read_unlock();
> >
> > return ret;
> > }
> >
> > +static inline void reuseport_has_conns_set(struct sock *sk)
> > +{
> > + struct sock_reuseport *reuse;
> > +
> > + if (!rcu_access_pointer(sk->sk_reuseport_cb))
> > + return;
> > +
> > + spin_lock(&reuseport_lock);
> > + reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> > + lockdep_is_held(&reuseport_lock));
> > + reuse->has_conns = 1;
> > + spin_unlock(&reuseport_lock);
> > +}
>
> Since the above is not super critical, it's probably better move it
> into sock_reuseport.c file and export it (to fix the build issue)

I'll fix the CONFIG_IPV6=m build failure.
Thank you.