2022-03-16 14:24:39

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 1/3] SUNRPC: Fix socket waits for write buffer space

From: Trond Myklebust <[email protected]>

The socket layer requires that we use the socket lock to protect changes
to the sock->sk_write_pending field and others.

Reported-by: Chuck Lever <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 54 +++++++++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7e39f87cde2d..786df8c0cda3 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -763,12 +763,12 @@ xs_stream_start_connect(struct sock_xprt *transport)
/**
* xs_nospace - handle transmit was incomplete
* @req: pointer to RPC request
+ * @transport: pointer to struct sock_xprt
*
*/
-static int xs_nospace(struct rpc_rqst *req)
+static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
{
- struct rpc_xprt *xprt = req->rq_xprt;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_xprt *xprt = &transport->xprt;
struct sock *sk = transport->inet;
int ret = -EAGAIN;

@@ -779,25 +779,49 @@ static int xs_nospace(struct rpc_rqst *req)

/* Don't race with disconnect */
if (xprt_connected(xprt)) {
+ struct socket_wq *wq;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
+ rcu_read_unlock();
+
/* wait for more buffer space */
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
xprt_wait_for_buffer_space(xprt);
} else
ret = -ENOTCONN;

spin_unlock(&xprt->transport_lock);
+ return ret;
+}

- /* Race breaker in case memory is freed before above code is called */
- if (ret == -EAGAIN) {
- struct socket_wq *wq;
+static int xs_sock_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;

- rcu_read_lock();
- wq = rcu_dereference(sk->sk_wq);
- set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
- rcu_read_unlock();
+ lock_sock(sk);
+ if (!sock_writeable(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
+ return ret;
+}

- sk->sk_write_space(sk);
- }
+static int xs_stream_nospace(struct rpc_rqst *req)
+{
+ struct sock_xprt *transport =
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
+ int ret = -EAGAIN;
+
+ lock_sock(sk);
+ if (!sk_stream_memory_free(sk))
+ ret = xs_nospace(req, transport);
+ release_sock(sk);
return ret;
}

@@ -887,7 +911,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
case -ENOBUFS:
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
@@ -963,7 +987,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_sock_nospace(req);
break;
case -ENETUNREACH:
case -ENOBUFS:
@@ -1083,7 +1107,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(req);
+ status = xs_stream_nospace(req);
break;
case -ECONNRESET:
case -ECONNREFUSED:
--
2.35.1


2022-03-17 06:10:20

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH 1/3] SUNRPC: Fix socket waits for write buffer space



> On Mar 15, 2022, at 12:28 PM, [email protected] wrote:
>
> From: Trond Myklebust <[email protected]>
>
> The socket layer requires that we use the socket lock to protect changes
> to the sock->sk_write_pending field and others.
>
> Reported-by: Chuck Lever <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>

Applied the three patches in this series on top of my RPC-with-TLS
prototype. Works nicely!


> ---
> net/sunrpc/xprtsock.c | 54 +++++++++++++++++++++++++++++++------------
> 1 file changed, 39 insertions(+), 15 deletions(-)
>
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 7e39f87cde2d..786df8c0cda3 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -763,12 +763,12 @@ xs_stream_start_connect(struct sock_xprt *transport)
> /**
> * xs_nospace - handle transmit was incomplete
> * @req: pointer to RPC request
> + * @transport: pointer to struct sock_xprt
> *
> */
> -static int xs_nospace(struct rpc_rqst *req)
> +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport)
> {
> - struct rpc_xprt *xprt = req->rq_xprt;
> - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
> + struct rpc_xprt *xprt = &transport->xprt;
> struct sock *sk = transport->inet;
> int ret = -EAGAIN;
>
> @@ -779,25 +779,49 @@ static int xs_nospace(struct rpc_rqst *req)
>
> /* Don't race with disconnect */
> if (xprt_connected(xprt)) {
> + struct socket_wq *wq;
> +
> + rcu_read_lock();
> + wq = rcu_dereference(sk->sk_wq);
> + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
> + rcu_read_unlock();
> +
> /* wait for more buffer space */
> + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
> sk->sk_write_pending++;
> xprt_wait_for_buffer_space(xprt);
> } else
> ret = -ENOTCONN;
>
> spin_unlock(&xprt->transport_lock);
> + return ret;
> +}
>
> - /* Race breaker in case memory is freed before above code is called */
> - if (ret == -EAGAIN) {
> - struct socket_wq *wq;
> +static int xs_sock_nospace(struct rpc_rqst *req)
> +{
> + struct sock_xprt *transport =
> + container_of(req->rq_xprt, struct sock_xprt, xprt);
> + struct sock *sk = transport->inet;
> + int ret = -EAGAIN;
>
> - rcu_read_lock();
> - wq = rcu_dereference(sk->sk_wq);
> - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags);
> - rcu_read_unlock();
> + lock_sock(sk);
> + if (!sock_writeable(sk))
> + ret = xs_nospace(req, transport);
> + release_sock(sk);
> + return ret;
> +}
>
> - sk->sk_write_space(sk);
> - }
> +static int xs_stream_nospace(struct rpc_rqst *req)
> +{
> + struct sock_xprt *transport =
> + container_of(req->rq_xprt, struct sock_xprt, xprt);
> + struct sock *sk = transport->inet;
> + int ret = -EAGAIN;
> +
> + lock_sock(sk);
> + if (!sk_stream_memory_free(sk))
> + ret = xs_nospace(req, transport);
> + release_sock(sk);
> return ret;
> }
>
> @@ -887,7 +911,7 @@ static int xs_local_send_request(struct rpc_rqst *req)
> case -ENOBUFS:
> break;
> case -EAGAIN:
> - status = xs_nospace(req);
> + status = xs_stream_nospace(req);
> break;
> default:
> dprintk("RPC: sendmsg returned unrecognized error %d\n",
> @@ -963,7 +987,7 @@ static int xs_udp_send_request(struct rpc_rqst *req)
> /* Should we call xs_close() here? */
> break;
> case -EAGAIN:
> - status = xs_nospace(req);
> + status = xs_sock_nospace(req);
> break;
> case -ENETUNREACH:
> case -ENOBUFS:
> @@ -1083,7 +1107,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
> /* Should we call xs_close() here? */
> break;
> case -EAGAIN:
> - status = xs_nospace(req);
> + status = xs_stream_nospace(req);
> break;
> case -ECONNRESET:
> case -ECONNREFUSED:
> --
> 2.35.1
>

--
Chuck Lever