2023-12-19 21:38:32

by Matthieu Baerts (NGI0)

[permalink] [raw]
Subject: [PATCH net-next 0/4] mptcp: cleanup and support more ephemeral ports sockopts

Patch 1 is a cleanup one: mptcp_is_tcpsk() helper was modifying sock_ops
in some cases which is unexpected with that name.

Patch 2 to 4 add support for two socket options: IP_LOCAL_PORT_RANGE and
IP_BIND_ADDRESS_NO_PORT. The first one is a preparation patch, the
second one adds the support while the last one modifies an existing
selftest to validate the new features.

Signed-off-by: Matthieu Baerts <[email protected]>
---
Davide Caratti (1):
mptcp: don't overwrite sock_ops in mptcp_is_tcpsk()

Maxim Galaganov (3):
mptcp: rename mptcp_setsockopt_sol_ip_set_transparent()
mptcp: sockopt: support IP_LOCAL_PORT_RANGE and IP_BIND_ADDRESS_NO_PORT
selftests/net: add MPTCP coverage for IP_LOCAL_PORT_RANGE

net/mptcp/protocol.c | 108 +++++++++-------------
net/mptcp/sockopt.c | 27 +++++-
tools/testing/selftests/net/ip_local_port_range.c | 12 +++
3 files changed, 79 insertions(+), 68 deletions(-)
---
base-commit: 62ed78f3baff396bd928ee77077580c5aa940149
change-id: 20231219-upstream-net-next-20231219-mptcp-sockopts-ephemeral-ports-645522e83161

Best regards,
--
Matthieu Baerts <[email protected]>



2023-12-19 21:38:55

by Matthieu Baerts (NGI0)

[permalink] [raw]
Subject: [PATCH net-next 1/4] mptcp: don't overwrite sock_ops in mptcp_is_tcpsk()

From: Davide Caratti <[email protected]>

Eric Dumazet suggests:

> The fact that mptcp_is_tcpsk() was able to write over sock->ops was a
> bit strange to me.
> mptcp_is_tcpsk() should answer a question, with a read-only argument.

re-factor code to avoid overwriting sock_ops inside that function. Also,
change the helper name to reflect the semantics and to disambiguate from
its dual, sk_is_mptcp(). While at it, collapse mptcp_stream_accept() and
mptcp_accept() into a single function, where fallback / non-fallback are
separated into a single sk_is_mptcp() conditional.

Link: https://github.com/multipath-tcp/mptcp_net-next/issues/432
Suggested-by: Eric Dumazet <[email protected]>
Signed-off-by: Davide Caratti <[email protected]>
Acked-by: Paolo Abeni <[email protected]>
Signed-off-by: Matthieu Baerts <[email protected]>
---
net/mptcp/protocol.c | 108 +++++++++++++++++++++------------------------------
1 file changed, 44 insertions(+), 64 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index bc81ea53a049..5cbbd1221030 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
return READ_ONCE(msk->wnd_end);
}

-static bool mptcp_is_tcpsk(struct sock *sk)
+static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
- struct socket *sock = sk->sk_socket;
-
- if (unlikely(sk->sk_prot == &tcp_prot)) {
- /* we are being invoked after mptcp_accept() has
- * accepted a non-mp-capable flow: sk is a tcp_sk,
- * not an mptcp one.
- *
- * Hand the socket over to tcp so all further socket ops
- * bypass mptcp.
- */
- WRITE_ONCE(sock->ops, &inet_stream_ops);
- return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- } else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- WRITE_ONCE(sock->ops, &inet6_stream_ops);
- return true;
+ if (sk->sk_prot == &tcpv6_prot)
+ return &inet6_stream_ops;
#endif
- }
-
- return false;
+ WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ return &inet_stream_ops;
}

static int __mptcp_socket_create(struct mptcp_sock *msk)
@@ -3258,44 +3244,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
}

-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
- bool kern)
-{
- struct sock *newsk;
-
- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, err, kern);
- if (!newsk)
- return NULL;
-
- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
- if (sk_is_mptcp(newsk)) {
- struct mptcp_subflow_context *subflow;
- struct sock *new_mptcp_sock;
-
- subflow = mptcp_subflow_ctx(newsk);
- new_mptcp_sock = subflow->conn;
-
- /* is_mptcp should be false if subflow->conn is missing, see
- * subflow_syn_recv_sock()
- */
- if (WARN_ON_ONCE(!new_mptcp_sock)) {
- tcp_sk(newsk)->is_mptcp = 0;
- goto out;
- }
-
- newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- } else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
- }
-
-out:
- newsk->sk_kern_sock = kern;
- return newsk;
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
{
struct mptcp_subflow_context *subflow, *tmp;
@@ -3739,7 +3687,6 @@ static struct proto mptcp_prot = {
.connect = mptcp_connect,
.disconnect = mptcp_disconnect,
.close = mptcp_close,
- .accept = mptcp_accept,
.setsockopt = mptcp_setsockopt,
.getsockopt = mptcp_getsockopt,
.shutdown = mptcp_shutdown,
@@ -3849,18 +3796,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk)
return -EINVAL;

- newsk = mptcp_accept(ssk, flags, &err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, &err, kern);
if (!newsk)
return err;

- lock_sock(newsk);
-
- __inet_accept(sock, newsock, newsk);
- if (!mptcp_is_tcpsk(newsock->sk)) {
- struct mptcp_sock *msk = mptcp_sk(newsk);
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+ if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
+ struct sock *new_mptcp_sock;
+
+ subflow = mptcp_subflow_ctx(newsk);
+ new_mptcp_sock = subflow->conn;
+
+ /* is_mptcp should be false if subflow->conn is missing, see
+ * subflow_syn_recv_sock()
+ */
+ if (WARN_ON_ONCE(!new_mptcp_sock)) {
+ tcp_sk(newsk)->is_mptcp = 0;
+ goto tcpfallback;
+ }
+
+ newsk = new_mptcp_sock;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);

set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk = mptcp_sk(newsk);
msk->in_accept_queue = 0;

/* set ssk->sk_socket of accept()ed flows to mptcp socket.
@@ -3882,6 +3847,21 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (unlikely(list_is_singular(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
+ } else {
+ MPTCP_INC_STATS(sock_net(ssk),
+ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+tcpfallback:
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
+ /* we are being invoked after accepting a non-mp-capable
+ * flow: sk is a tcp_sk, not an mptcp one.
+ *
+ * Hand the socket over to tcp so all further socket ops
+ * bypass mptcp.
+ */
+ WRITE_ONCE(newsock->sk->sk_socket->ops,
+ mptcp_fallback_tcp_ops(newsock->sk));
}
release_sock(newsk);


--
2.40.1


2023-12-19 21:39:39

by Matthieu Baerts (NGI0)

[permalink] [raw]
Subject: [PATCH net-next 3/4] mptcp: sockopt: support IP_LOCAL_PORT_RANGE and IP_BIND_ADDRESS_NO_PORT

From: Maxim Galaganov <[email protected]>

Support for IP_BIND_ADDRESS_NO_PORT sockopt was introduced in [1].
Recently [2] allowed its value to be accessed without locking the
socket.

Support for (newer) IP_LOCAL_PORT_RANGE sockopt was introduced in [3].
In the same series a selftest was added in [4]. This selftest also
covers the IP_BIND_ADDRESS_NO_PORT sockopt.

This patch enables getsockopt()/setsockopt() on MPTCP sockets for these
socket options, syncing set values to subflows in sync_socket_options().
Ephemeral port range is synced to subflows, enabling NAT usecase
described in [3].

[1] commit 90c337da1524 ("inet: add IP_BIND_ADDRESS_NO_PORT to overcome
bind(0) limitations")
[2] commit ca571e2eb7eb ("inet: move inet->bind_address_no_port to
inet->inet_flags")
[3] commit 91d0b78c5177 ("inet: Add IP_LOCAL_PORT_RANGE socket option")
[4] commit ae5439658cce ("selftests/net: Cover the IP_LOCAL_PORT_RANGE
socket option")

Signed-off-by: Maxim Galaganov <[email protected]>
Reviewed-by: Mat Martineau <[email protected]>
Signed-off-by: Matthieu Baerts <[email protected]>
---
net/mptcp/sockopt.c | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index a4bf337e6f77..c40f1428e602 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* should work fine */
case IP_FREEBIND:
case IP_TRANSPARENT:
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:

/* the following are control cmsg related */
case IP_PKTINFO:
@@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* common stuff that need some love */
case IP_TOS:
case IP_TTL:
- case IP_BIND_ADDRESS_NO_PORT:
case IP_MTU_DISCOVER:
case IP_RECVERR:

@@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
inet_assign_bit(TRANSPARENT, ssk,
inet_test_bit(TRANSPARENT, sk));
break;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ break;
+ case IP_LOCAL_PORT_RANGE:
+ WRITE_ONCE(inet_sk(ssk)->local_port_range,
+ READ_ONCE(inet_sk(sk)->local_port_range));
+ break;
default:
release_sock(sk);
WARN_ON_ONCE(1);
@@ -755,6 +764,8 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_FREEBIND:
case IP_TRANSPARENT:
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:
return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
@@ -1350,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_TOS:
return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
+ case IP_BIND_ADDRESS_NO_PORT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ case IP_LOCAL_PORT_RANGE:
+ return mptcp_put_int_option(msk, optval, optlen,
+ READ_ONCE(inet_sk(sk)->local_port_range));
}

return -EOPNOTSUPP;
@@ -1450,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)

inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
}

void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)

--
2.40.1


2023-12-19 21:40:01

by Matthieu Baerts (NGI0)

[permalink] [raw]
Subject: [PATCH net-next 4/4] selftests/net: add MPTCP coverage for IP_LOCAL_PORT_RANGE

From: Maxim Galaganov <[email protected]>

Since previous commit, MPTCP has support for IP_BIND_ADDRESS_NO_PORT and
IP_LOCAL_PORT_RANGE sockopts.

Add ip4_mptcp and ip6_mptcp fixture variants to ip_local_port_range
selftest to provide selftest coverage for these sockopts.

Acked-by: Mat Martineau <[email protected]>
Signed-off-by: Maxim Galaganov <[email protected]>
Signed-off-by: Matthieu Baerts <[email protected]>
---
tools/testing/selftests/net/ip_local_port_range.c | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 75e3fdacdf73..0f217a1cc837 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -146,6 +146,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) {
.so_protocol = IPPROTO_SCTP,
};

+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) {
+ .so_domain = AF_INET,
+ .so_type = SOCK_STREAM,
+ .so_protocol = IPPROTO_MPTCP,
+};
+
FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) {
.so_domain = AF_INET6,
.so_type = SOCK_STREAM,
@@ -164,6 +170,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) {
.so_protocol = IPPROTO_SCTP,
};

+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) {
+ .so_domain = AF_INET6,
+ .so_type = SOCK_STREAM,
+ .so_protocol = IPPROTO_MPTCP,
+};
+
TEST_F(ip_local_port_range, invalid_option_value)
{
__u16 val16;

--
2.40.1


2023-12-19 21:49:05

by Matthieu Baerts (NGI0)

[permalink] [raw]
Subject: [PATCH net-next 2/4] mptcp: rename mptcp_setsockopt_sol_ip_set_transparent()

From: Maxim Galaganov <[email protected]>

Next patch extends this function so that it's not specific to
IP_TRANSPARENT. Change function name to mptcp_setsockopt_sol_ip_set().

Reviewed-by: Mat Martineau <[email protected]>
Signed-off-by: Maxim Galaganov <[email protected]>
Signed-off-by: Matthieu Baerts <[email protected]>
---
net/mptcp/sockopt.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index cabe856b2a45..a4bf337e6f77 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -683,8 +683,8 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
return 0;
}

-static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
- sockptr_t optval, unsigned int optlen)
+static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct sock *ssk;
@@ -755,7 +755,7 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_FREEBIND:
case IP_TRANSPARENT:
- return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
+ return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
}

--
2.40.1


2023-12-26 22:40:43

by patchwork-bot+netdevbpf

[permalink] [raw]
Subject: Re: [PATCH net-next 0/4] mptcp: cleanup and support more ephemeral ports sockopts

Hello:

This series was applied to netdev/net-next.git (main)
by David S. Miller <[email protected]>:

On Tue, 19 Dec 2023 22:31:03 +0100 you wrote:
> Patch 1 is a cleanup one: mptcp_is_tcpsk() helper was modifying sock_ops
> in some cases which is unexpected with that name.
>
> Patch 2 to 4 add support for two socket options: IP_LOCAL_PORT_RANGE and
> IP_BIND_ADDRESS_NO_PORT. The first one is a preparation patch, the
> second one adds the support while the last one modifies an existing
> selftest to validate the new features.
>
> [...]

Here is the summary with links:
- [net-next,1/4] mptcp: don't overwrite sock_ops in mptcp_is_tcpsk()
https://git.kernel.org/netdev/net-next/c/8e2b8a9fa512
- [net-next,2/4] mptcp: rename mptcp_setsockopt_sol_ip_set_transparent()
https://git.kernel.org/netdev/net-next/c/57d3117ca80f
- [net-next,3/4] mptcp: sockopt: support IP_LOCAL_PORT_RANGE and IP_BIND_ADDRESS_NO_PORT
https://git.kernel.org/netdev/net-next/c/c85636a29264
- [net-next,4/4] selftests/net: add MPTCP coverage for IP_LOCAL_PORT_RANGE
https://git.kernel.org/netdev/net-next/c/122db5e3634b

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html