This patch set includes two fixes for using smc with io_uring.
Guangguan Wang (2):
net/smc: non blocking recvmsg() return -EAGAIN when no data and
signal_pending
net/smc: align the connect behaviour with TCP
net/smc/af_smc.c | 53 ++++++++++++++++++++++++++++++++++++++++++++----
net/smc/smc_rx.c | 4 ++--
2 files changed, 51 insertions(+), 6 deletions(-)
--
2.24.3 (Apple Git-128)
Non blocking sendmsg will return -EAGAIN when any signal pending
and no send space left, while non blocking recvmsg return -EINTR
when signal pending and no data received. This may makes confused.
As TCP returns -EAGAIN in the conditions descriped above. Align the
behavior of smc with TCP.
Signed-off-by: Guangguan Wang <[email protected]>
---
net/smc/smc_rx.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 51e8eb2933ff..338b9ef806e8 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
}
break;
}
+ if (!timeo)
+ return -EAGAIN;
if (signal_pending(current)) {
read_done = sock_intr_errno(timeo);
break;
}
- if (!timeo)
- return -EAGAIN;
}
if (!smc_rx_data_available(conn)) {
--
2.24.3 (Apple Git-128)
Connect with O_NONBLOCK will not be completed immediately
and returns -EINPROGRESS. It is possible to use selector/poll
for completion by selecting the socket for writing. After select
indicates writability, a second connect function call will return
0 to indicate connected successfully as TCP does, but smc returns
-EISCONN. Use socket state for smc to indicate connect state, which
can help smc aligning the connect behaviour with TCP.
Signed-off-by: Guangguan Wang <[email protected]>
---
net/smc/af_smc.c | 53 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 49 insertions(+), 4 deletions(-)
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index fce16b9d6e1a..45f9f7c6e776 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1544,9 +1544,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out_err;
lock_sock(sk);
+ switch (sock->state) {
+ default:
+ rc = -EINVAL;
+ goto out;
+ case SS_CONNECTED:
+ rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
+ goto out;
+ case SS_CONNECTING:
+ if (sk->sk_state == SMC_ACTIVE) {
+ sock->state = SS_CONNECTED;
+ rc = 0;
+ goto out;
+ }
+ break;
+ case SS_UNCONNECTED:
+ sock->state = SS_CONNECTING;
+ break;
+ }
+
switch (sk->sk_state) {
default:
goto out;
+ case SMC_CLOSED:
+ rc = sock_error(sk) ? : -ECONNABORTED;
+ sock->state = SS_UNCONNECTED;
+ goto out;
case SMC_ACTIVE:
rc = -EISCONN;
goto out;
@@ -1565,18 +1588,22 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out;
sock_hold(&smc->sk); /* sock put in passive closing */
- if (smc->use_fallback)
+ if (smc->use_fallback) {
+ sock->state = SS_CONNECTED;
goto out;
+ }
if (flags & O_NONBLOCK) {
if (queue_work(smc_hs_wq, &smc->connect_work))
smc->connect_nonblock = 1;
rc = -EINPROGRESS;
} else {
rc = __smc_connect(smc);
- if (rc < 0)
+ if (rc < 0) {
goto out;
- else
+ } else {
rc = 0; /* success cases including fallback */
+ sock->state = SS_CONNECTED;
+ }
}
out:
@@ -1693,6 +1720,7 @@ struct sock *smc_accept_dequeue(struct sock *parent,
}
if (new_sock) {
sock_graft(new_sk, new_sock);
+ new_sock->state = SS_CONNECTED;
if (isk->use_fallback) {
smc_sk(new_sk)->clcsock->file = new_sock->file;
isk->clcsock->file->private_data = isk->clcsock;
@@ -2424,7 +2452,7 @@ static int smc_listen(struct socket *sock, int backlog)
rc = -EINVAL;
if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
- smc->connect_nonblock)
+ smc->connect_nonblock || sock->state != SS_UNCONNECTED)
goto out;
rc = 0;
@@ -2716,6 +2744,17 @@ static int smc_shutdown(struct socket *sock, int how)
lock_sock(sk);
+ if (sock->state == SS_CONNECTING) {
+ if (sk->sk_state == SMC_ACTIVE)
+ sock->state = SS_CONNECTED;
+ else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
+ sk->sk_state == SMC_PEERCLOSEWAIT2 ||
+ sk->sk_state == SMC_APPCLOSEWAIT1 ||
+ sk->sk_state == SMC_APPCLOSEWAIT2 ||
+ sk->sk_state == SMC_APPFINCLOSEWAIT)
+ sock->state = SS_DISCONNECTING;
+ }
+
rc = -ENOTCONN;
if ((sk->sk_state != SMC_ACTIVE) &&
(sk->sk_state != SMC_PEERCLOSEWAIT1) &&
@@ -2729,6 +2768,7 @@ static int smc_shutdown(struct socket *sock, int how)
sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
if (sk->sk_shutdown == SHUTDOWN_MASK) {
sk->sk_state = SMC_CLOSED;
+ sk->sk_socket->state = SS_UNCONNECTED;
sock_put(sk);
}
goto out;
@@ -2754,6 +2794,10 @@ static int smc_shutdown(struct socket *sock, int how)
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk->sk_shutdown |= how + 1;
+ if (sk->sk_state == SMC_CLOSED)
+ sock->state = SS_UNCONNECTED;
+ else
+ sock->state = SS_DISCONNECTING;
out:
release_sock(sk);
return rc ? rc : rc1;
@@ -3139,6 +3183,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
rc = -ENOBUFS;
sock->ops = &smc_sock_ops;
+ sock->state = SS_UNCONNECTED;
sk = smc_sock_alloc(net, sock, protocol);
if (!sk)
goto out;
--
2.24.3 (Apple Git-128)
On Mon, May 09, 2022 at 07:58:36PM +0800, Guangguan Wang wrote:
> Non blocking sendmsg will return -EAGAIN when any signal pending
> and no send space left, while non blocking recvmsg return -EINTR
> when signal pending and no data received. This may makes confused.
> As TCP returns -EAGAIN in the conditions descriped above. Align the
A little typo descriped -> described.
> behavior of smc with TCP.
I also agree with the behavior of aligning TCP.
Fixes tag is preferred:
Fixes: 846e344eb722 ("net/smc: add receive timeout check")
> Signed-off-by: Guangguan Wang <[email protected]>
Reviewed-by: Tony Lu <[email protected]>
Thanks,
Tony Lu
> ---
> net/smc/smc_rx.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
> index 51e8eb2933ff..338b9ef806e8 100644
> --- a/net/smc/smc_rx.c
> +++ b/net/smc/smc_rx.c
> @@ -355,12 +355,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
> }
> break;
> }
> + if (!timeo)
> + return -EAGAIN;
> if (signal_pending(current)) {
> read_done = sock_intr_errno(timeo);
> break;
> }
> - if (!timeo)
> - return -EAGAIN;
> }
>
> if (!smc_rx_data_available(conn)) {
> --
> 2.24.3 (Apple Git-128)
On Mon, 2022-05-09 at 19:58 +0800, Guangguan Wang wrote:
> Connect with O_NONBLOCK will not be completed immediately
> and returns -EINPROGRESS. It is possible to use selector/poll
> for completion by selecting the socket for writing. After select
> indicates writability, a second connect function call will return
> 0 to indicate connected successfully as TCP does, but smc returns
> -EISCONN. Use socket state for smc to indicate connect state, which
> can help smc aligning the connect behaviour with TCP.
>
> Signed-off-by: Guangguan Wang <[email protected]>
> ---
> net/smc/af_smc.c | 53 ++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 49 insertions(+), 4 deletions(-)
>
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index fce16b9d6e1a..45f9f7c6e776 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -1544,9 +1544,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
> goto out_err;
>
> lock_sock(sk);
> + switch (sock->state) {
> + default:
> + rc = -EINVAL;
> + goto out;
> + case SS_CONNECTED:
> + rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
> + goto out;
> + case SS_CONNECTING:
> + if (sk->sk_state == SMC_ACTIVE) {
> + sock->state = SS_CONNECTED;
> + rc = 0;
> + goto out;
> + }
> + break;
> + case SS_UNCONNECTED:
> + sock->state = SS_CONNECTING;
> + break;
> + }
> +
> switch (sk->sk_state) {
> default:
> goto out;
> + case SMC_CLOSED:
> + rc = sock_error(sk) ? : -ECONNABORTED;
> + sock->state = SS_UNCONNECTED;
> + goto out;
> case SMC_ACTIVE:
> rc = -EISCONN;
> goto out;
> @@ -1565,18 +1588,22 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
> goto out;
>
> sock_hold(&smc->sk); /* sock put in passive closing */
> - if (smc->use_fallback)
> + if (smc->use_fallback) {
> + sock->state = SS_CONNECTED;
> goto out;
> + }
> if (flags & O_NONBLOCK) {
> if (queue_work(smc_hs_wq, &smc->connect_work))
> smc->connect_nonblock = 1;
> rc = -EINPROGRESS;
> } else {
> rc = __smc_connect(smc);
> - if (rc < 0)
> + if (rc < 0) {
> goto out;
> - else
> + } else {
> rc = 0; /* success cases including fallback */
> + sock->state = SS_CONNECTED;
'else' is not needed here, you can keep the above 2 statements dropping
an indentation level.
> + }
> }
>
You can avoid a little code duplication adding here the following:
connected:
sock->state = SS_CONNECTED;
and using the new label where appropriate.
> out:
> @@ -1693,6 +1720,7 @@ struct sock *smc_accept_dequeue(struct sock *parent,
> }
> if (new_sock) {
> sock_graft(new_sk, new_sock);
> + new_sock->state = SS_CONNECTED;
> if (isk->use_fallback) {
> smc_sk(new_sk)->clcsock->file = new_sock->file;
> isk->clcsock->file->private_data = isk->clcsock;
> @@ -2424,7 +2452,7 @@ static int smc_listen(struct socket *sock, int backlog)
>
> rc = -EINVAL;
> if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
> - smc->connect_nonblock)
> + smc->connect_nonblock || sock->state != SS_UNCONNECTED)
> goto out;
>
> rc = 0;
> @@ -2716,6 +2744,17 @@ static int smc_shutdown(struct socket *sock, int how)
>
> lock_sock(sk);
>
> + if (sock->state == SS_CONNECTING) {
> + if (sk->sk_state == SMC_ACTIVE)
> + sock->state = SS_CONNECTED;
> + else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
> + sk->sk_state == SMC_PEERCLOSEWAIT2 ||
> + sk->sk_state == SMC_APPCLOSEWAIT1 ||
> + sk->sk_state == SMC_APPCLOSEWAIT2 ||
> + sk->sk_state == SMC_APPFINCLOSEWAIT)
> + sock->state = SS_DISCONNECTING;
> + }
> +
> rc = -ENOTCONN;
> if ((sk->sk_state != SMC_ACTIVE) &&
> (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
> @@ -2729,6 +2768,7 @@ static int smc_shutdown(struct socket *sock, int how)
> sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
> if (sk->sk_shutdown == SHUTDOWN_MASK) {
> sk->sk_state = SMC_CLOSED;
> + sk->sk_socket->state = SS_UNCONNECTED;
> sock_put(sk);
> }
> goto out;
> @@ -2754,6 +2794,10 @@ static int smc_shutdown(struct socket *sock, int how)
> /* map sock_shutdown_cmd constants to sk_shutdown value range */
> sk->sk_shutdown |= how + 1;
>
> + if (sk->sk_state == SMC_CLOSED)
> + sock->state = SS_UNCONNECTED;
> + else
> + sock->state = SS_DISCONNECTING;
> out:
> release_sock(sk);
> return rc ? rc : rc1;
> @@ -3139,6 +3183,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>
> rc = -ENOBUFS;
> sock->ops = &smc_sock_ops;
> + sock->state = SS_UNCONNECTED;
> sk = smc_sock_alloc(net, sock, protocol);
> if (!sk)
> goto out;
On Mon, May 09, 2022 at 07:58:37PM +0800, Guangguan Wang wrote:
> Connect with O_NONBLOCK will not be completed immediately
> and returns -EINPROGRESS. It is possible to use selector/poll
> for completion by selecting the socket for writing. After select
> indicates writability, a second connect function call will return
> 0 to indicate connected successfully as TCP does, but smc returns
If the connection is established successfully, the following up call of
connect() returns -EISCONN (SS_CONNECTED), which is expected and SMC
does it, same as TCP.
In case of misunderstanding, could you append more detailed information?
Thanks,
Tony Lu
> -EISCONN. Use socket state for smc to indicate connect state, which
> can help smc aligning the connect behaviour with TCP.
>
> Signed-off-by: Guangguan Wang <[email protected]>
> ---
> net/smc/af_smc.c | 53 ++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 49 insertions(+), 4 deletions(-)
>
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index fce16b9d6e1a..45f9f7c6e776 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -1544,9 +1544,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
> goto out_err;
>
> lock_sock(sk);
> + switch (sock->state) {
> + default:
> + rc = -EINVAL;
> + goto out;
> + case SS_CONNECTED:
> + rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
> + goto out;
> + case SS_CONNECTING:
> + if (sk->sk_state == SMC_ACTIVE) {
> + sock->state = SS_CONNECTED;
> + rc = 0;
> + goto out;
> + }
> + break;
> + case SS_UNCONNECTED:
> + sock->state = SS_CONNECTING;
> + break;
> + }
> +
> switch (sk->sk_state) {
> default:
> goto out;
> + case SMC_CLOSED:
> + rc = sock_error(sk) ? : -ECONNABORTED;
> + sock->state = SS_UNCONNECTED;
> + goto out;
> case SMC_ACTIVE:
> rc = -EISCONN;
> goto out;
> @@ -1565,18 +1588,22 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
> goto out;
>
> sock_hold(&smc->sk); /* sock put in passive closing */
> - if (smc->use_fallback)
> + if (smc->use_fallback) {
> + sock->state = SS_CONNECTED;
> goto out;
> + }
> if (flags & O_NONBLOCK) {
> if (queue_work(smc_hs_wq, &smc->connect_work))
> smc->connect_nonblock = 1;
> rc = -EINPROGRESS;
> } else {
> rc = __smc_connect(smc);
> - if (rc < 0)
> + if (rc < 0) {
> goto out;
> - else
> + } else {
> rc = 0; /* success cases including fallback */
> + sock->state = SS_CONNECTED;
> + }
> }
>
> out:
> @@ -1693,6 +1720,7 @@ struct sock *smc_accept_dequeue(struct sock *parent,
> }
> if (new_sock) {
> sock_graft(new_sk, new_sock);
> + new_sock->state = SS_CONNECTED;
> if (isk->use_fallback) {
> smc_sk(new_sk)->clcsock->file = new_sock->file;
> isk->clcsock->file->private_data = isk->clcsock;
> @@ -2424,7 +2452,7 @@ static int smc_listen(struct socket *sock, int backlog)
>
> rc = -EINVAL;
> if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
> - smc->connect_nonblock)
> + smc->connect_nonblock || sock->state != SS_UNCONNECTED)
> goto out;
>
> rc = 0;
> @@ -2716,6 +2744,17 @@ static int smc_shutdown(struct socket *sock, int how)
>
> lock_sock(sk);
>
> + if (sock->state == SS_CONNECTING) {
> + if (sk->sk_state == SMC_ACTIVE)
> + sock->state = SS_CONNECTED;
> + else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
> + sk->sk_state == SMC_PEERCLOSEWAIT2 ||
> + sk->sk_state == SMC_APPCLOSEWAIT1 ||
> + sk->sk_state == SMC_APPCLOSEWAIT2 ||
> + sk->sk_state == SMC_APPFINCLOSEWAIT)
> + sock->state = SS_DISCONNECTING;
> + }
> +
> rc = -ENOTCONN;
> if ((sk->sk_state != SMC_ACTIVE) &&
> (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
> @@ -2729,6 +2768,7 @@ static int smc_shutdown(struct socket *sock, int how)
> sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
> if (sk->sk_shutdown == SHUTDOWN_MASK) {
> sk->sk_state = SMC_CLOSED;
> + sk->sk_socket->state = SS_UNCONNECTED;
> sock_put(sk);
> }
> goto out;
> @@ -2754,6 +2794,10 @@ static int smc_shutdown(struct socket *sock, int how)
> /* map sock_shutdown_cmd constants to sk_shutdown value range */
> sk->sk_shutdown |= how + 1;
>
> + if (sk->sk_state == SMC_CLOSED)
> + sock->state = SS_UNCONNECTED;
> + else
> + sock->state = SS_DISCONNECTING;
> out:
> release_sock(sk);
> return rc ? rc : rc1;
> @@ -3139,6 +3183,7 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol,
>
> rc = -ENOBUFS;
> sock->ops = &smc_sock_ops;
> + sock->state = SS_UNCONNECTED;
> sk = smc_sock_alloc(net, sock, protocol);
> if (!sk)
> goto out;
> --
> 2.24.3 (Apple Git-128)
On 2022/5/10 17:30, Tony Lu wrote:
> On Mon, May 09, 2022 at 07:58:37PM +0800, Guangguan Wang wrote:
>> Connect with O_NONBLOCK will not be completed immediately
>> and returns -EINPROGRESS. It is possible to use selector/poll
>> for completion by selecting the socket for writing. After select
>> indicates writability, a second connect function call will return
>> 0 to indicate connected successfully as TCP does, but smc returns
>
> If the connection is established successfully, the following up call of
> connect() returns -EISCONN (SS_CONNECTED), which is expected and SMC
> does it, same as TCP.
>
> In case of misunderstanding, could you append more detailed information?
>
> Thanks,
> Tony Lu
>
io_uring uses nonblocking connect as follow steps:
1) call connect with nonblocking
2) wait for selector/poll to indicate writability
3) call connect to confirm connection's state
In the third step, tcp changes the socket state from SS_CONNECTING to
SS_CONNECTED and returns 0 if the connection is established successfully,
but smc returns -EISCONN.
On 2022/5/10 19:05, Paolo Abeni wrote:
>> } else {
>> rc = __smc_connect(smc);
>> - if (rc < 0)
>> + if (rc < 0) {
>> goto out;
>> - else
>> + } else {
>> rc = 0; /* success cases including fallback */
>> + sock->state = SS_CONNECTED;
>
> 'else' is not needed here, you can keep the above 2 statements dropping
> an indentation level.
>
>> + }
>> }
>>
>
> You can avoid a little code duplication adding here the following:
>
> connected:
> sock->state = SS_CONNECTED;
>
> and using the new label where appropriate.
>
Got it, I will modify it in the next version.
Thanks.
On Tue, May 10, 2022 at 08:58:38PM +0800, Guangguan Wang wrote:
>
>
> On 2022/5/10 17:30, Tony Lu wrote:
> > On Mon, May 09, 2022 at 07:58:37PM +0800, Guangguan Wang wrote:
> >> Connect with O_NONBLOCK will not be completed immediately
> >> and returns -EINPROGRESS. It is possible to use selector/poll
> >> for completion by selecting the socket for writing. After select
> >> indicates writability, a second connect function call will return
> >> 0 to indicate connected successfully as TCP does, but smc returns
> >
> > If the connection is established successfully, the following up call of
> > connect() returns -EISCONN (SS_CONNECTED), which is expected and SMC
> > does it, same as TCP.
> >
> > In case of misunderstanding, could you append more detailed information?
> >
> > Thanks,
> > Tony Lu
> >
>
> io_uring uses nonblocking connect as follow steps:
> 1) call connect with nonblocking
> 2) wait for selector/poll to indicate writability
> 3) call connect to confirm connection's state
>
> In the third step, tcp changes the socket state from SS_CONNECTING to
> SS_CONNECTED and returns 0 if the connection is established successfully,
> but smc returns -EISCONN.
Based on the steps you list, I am wondering if it is finished in the
step #1, the call of connect() in step #3 would return -EISCONN. Should
we check 0 and -EISCONN in step #3?
To fix this issue, I think we should be careful about adding and
handling sock state, maybe we could push it to net-next and take
advantage of sock state. And I will test this patch later in our test
cases.
Thanks,
Tony Lu