When the socket is closed, we need to call xprt_disconnect_done() in order
to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.
However, we also want to ensure that we don't wake them up before the socket
is closed, since that would cause thundering herd issues with everyone
piling up to retransmit before the TCP shutdown dance has completed.
Only the task that holds XPRT_LOCKED needs to wake up early in order to
allow the close to complete.
Reported-by: Dave Wysochanski <[email protected]>
Reported-by: Scott Mayhew <[email protected]>
Cc: Chuck Lever <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/clnt.c | 1 +
net/sunrpc/xprt.c | 5 ++++-
net/sunrpc/xprtsock.c | 6 ++----
3 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c6782aa47525..24cbddc44c88 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1952,6 +1952,7 @@ call_connect_status(struct rpc_task *task)
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
/* fall through */
+ case -ENOTCONN:
case -EAGAIN:
/* Check for timeouts before looping back to call_bind */
case -ETIMEDOUT:
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ce927002862a..3fb001dff670 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ else if (xprt->snd_task)
+ rpc_wake_up_queued_task_set_status(&xprt->pending,
+ xprt->snd_task, -ENOTCONN);
spin_unlock_bh(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
@@ -852,6 +854,7 @@ static void xprt_connect_status(struct rpc_task *task)
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EPIPE:
+ case -ENOTCONN:
case -EAGAIN:
dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8a5e823e0b33..4c471b4235ba 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
trace_rpc_socket_close(xprt, sock);
sock_release(sock);
+
+ xprt_disconnect_done(xprt);
}
/**
@@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
-
- xprt_disconnect_done(xprt);
}
static void xs_inject_disconnect(struct rpc_xprt *xprt)
@@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
&transport->sock_state))
xprt_clear_connecting(xprt);
clear_bit(XPRT_CLOSING, &xprt->state);
- if (sk->sk_err)
- xprt_wake_pending_tasks(xprt, -sk->sk_err);
/* Trigger the socket release */
xs_tcp_force_close(xprt);
}
--
2.19.2
Ensure that we clear XPRT_CONNECTING before releasing the XPRT_LOCK so that
we don't have races between the (asynchronous) socket setup code and
tasks in xprt_connect().
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4c471b4235ba..f0b3700cec95 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2090,8 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
trace_rpc_socket_connect(xprt, sock, 0);
status = 0;
out:
- xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
+ xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
}
@@ -2327,8 +2327,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
}
status = -EAGAIN;
out:
- xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
+ xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
}
--
2.19.2
Over the years, xprt_connect_status() has been superseded by
call_connect_status(), which now handles all the errors that
xprt_connect_status() does and more. Since the latter converts
all errors that it doesn't recognise to EIO, then it is time
for it to be retired.
Reported-by: Chuck Lever <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprt.c | 32 +-------------------------------
1 file changed, 1 insertion(+), 31 deletions(-)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3fb001dff670..73547d17d3c6 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -67,7 +67,6 @@
*/
static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
-static void xprt_connect_status(struct rpc_task *task);
static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
@@ -822,7 +821,7 @@ void xprt_connect(struct rpc_task *task)
if (!xprt_connected(xprt)) {
task->tk_timeout = task->tk_rqstp->rq_timeout;
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
- rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
+ rpc_sleep_on(&xprt->pending, task, NULL);
if (test_bit(XPRT_CLOSING, &xprt->state))
return;
@@ -841,35 +840,6 @@ void xprt_connect(struct rpc_task *task)
xprt_release_write(xprt, task);
}
-static void xprt_connect_status(struct rpc_task *task)
-{
- switch (task->tk_status) {
- case 0:
- dprintk("RPC: %5u xprt_connect_status: connection established\n",
- task->tk_pid);
- break;
- case -ECONNREFUSED:
- case -ECONNRESET:
- case -ECONNABORTED:
- case -ENETUNREACH:
- case -EHOSTUNREACH:
- case -EPIPE:
- case -ENOTCONN:
- case -EAGAIN:
- dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
- break;
- case -ETIMEDOUT:
- dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
- "out\n", task->tk_pid);
- break;
- default:
- dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
- "server %s\n", task->tk_pid, -task->tk_status,
- task->tk_rqstp->rq_xprt->servername);
- task->tk_status = -EIO;
- }
-}
-
enum xprt_xid_rb_cmp {
XID_RB_EQUAL,
XID_RB_LEFT,
--
2.19.2
I ran Dave's test in a loop over night with these 3 patches on top of
v4.20-rc7 and didn't see any more of the XPRT_WRITE_SPACE hangs.
-Scott
On Mon, 17 Dec 2018, Trond Myklebust wrote:
> When the socket is closed, we need to call xprt_disconnect_done() in order
> to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.
>
> However, we also want to ensure that we don't wake them up before the socket
> is closed, since that would cause thundering herd issues with everyone
> piling up to retransmit before the TCP shutdown dance has completed.
> Only the task that holds XPRT_LOCKED needs to wake up early in order to
> allow the close to complete.
>
> Reported-by: Dave Wysochanski <[email protected]>
> Reported-by: Scott Mayhew <[email protected]>
> Cc: Chuck Lever <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> net/sunrpc/clnt.c | 1 +
> net/sunrpc/xprt.c | 5 ++++-
> net/sunrpc/xprtsock.c | 6 ++----
> 3 files changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index c6782aa47525..24cbddc44c88 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -1952,6 +1952,7 @@ call_connect_status(struct rpc_task *task)
> /* retry with existing socket, after a delay */
> rpc_delay(task, 3*HZ);
> /* fall through */
> + case -ENOTCONN:
> case -EAGAIN:
> /* Check for timeouts before looping back to call_bind */
> case -ETIMEDOUT:
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index ce927002862a..3fb001dff670 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
> /* Try to schedule an autoclose RPC call */
> if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
> queue_work(xprtiod_workqueue, &xprt->task_cleanup);
> - xprt_wake_pending_tasks(xprt, -EAGAIN);
> + else if (xprt->snd_task)
> + rpc_wake_up_queued_task_set_status(&xprt->pending,
> + xprt->snd_task, -ENOTCONN);
> spin_unlock_bh(&xprt->transport_lock);
> }
> EXPORT_SYMBOL_GPL(xprt_force_disconnect);
> @@ -852,6 +854,7 @@ static void xprt_connect_status(struct rpc_task *task)
> case -ENETUNREACH:
> case -EHOSTUNREACH:
> case -EPIPE:
> + case -ENOTCONN:
> case -EAGAIN:
> dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
> break;
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 8a5e823e0b33..4c471b4235ba 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
>
> trace_rpc_socket_close(xprt, sock);
> sock_release(sock);
> +
> + xprt_disconnect_done(xprt);
> }
>
> /**
> @@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
>
> xs_reset_transport(transport);
> xprt->reestablish_timeout = 0;
> -
> - xprt_disconnect_done(xprt);
> }
>
> static void xs_inject_disconnect(struct rpc_xprt *xprt)
> @@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
> &transport->sock_state))
> xprt_clear_connecting(xprt);
> clear_bit(XPRT_CLOSING, &xprt->state);
> - if (sk->sk_err)
> - xprt_wake_pending_tasks(xprt, -sk->sk_err);
> /* Trigger the socket release */
> xs_tcp_force_close(xprt);
> }
> --
> 2.19.2
>
On Tue, 2018-12-18 at 09:20 -0500, Scott Mayhew wrote:
> I ran Dave's test in a loop over night with these 3 patches on top of
> v4.20-rc7 and didn't see any more of the XPRT_WRITE_SPACE hangs.
>
Thanks for verifying that!
Cheers
Trond
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]
> On Dec 18, 2018, at 10:55 AM, Trond Myklebust <[email protected]> wrote:
>
> On Tue, 2018-12-18 at 09:20 -0500, Scott Mayhew wrote:
>> I ran Dave's test in a loop over night with these 3 patches on top of
>> v4.20-rc7 and didn't see any more of the XPRT_WRITE_SPACE hangs.
>>
>
> Thanks for verifying that!
With these three patches and my RDMA-related disconnect fixes, fio8 on krb5i
now works fine, even though the connect count was 508. No stalls or deadlocks.
Tested-by: Chuck Lever <[email protected]>
--
Chuck Lever
On Tue, 2018-12-18 at 10:59 -0500, Chuck Lever wrote:
> > On Dec 18, 2018, at 10:55 AM, Trond Myklebust <
> > [email protected]> wrote:
> >
> > On Tue, 2018-12-18 at 09:20 -0500, Scott Mayhew wrote:
> > > I ran Dave's test in a loop over night with these 3 patches on
> > > top of
> > > v4.20-rc7 and didn't see any more of the XPRT_WRITE_SPACE hangs.
> > >
> >
> > Thanks for verifying that!
>
> With these three patches and my RDMA-related disconnect fixes, fio8
> on krb5i
> now works fine, even though the connect count was 508. No stalls or
> deadlocks.
>
> Tested-by: Chuck Lever <[email protected]>
Yay! Thanks Chuck.
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]
> On Dec 18, 2018, at 11:02 AM, Trond Myklebust <[email protected]> wrote:
>
> On Tue, 2018-12-18 at 10:59 -0500, Chuck Lever wrote:
>>> On Dec 18, 2018, at 10:55 AM, Trond Myklebust <
>>> [email protected]> wrote:
>>>
>>> On Tue, 2018-12-18 at 09:20 -0500, Scott Mayhew wrote:
>>>> I ran Dave's test in a loop over night with these 3 patches on
>>>> top of
>>>> v4.20-rc7 and didn't see any more of the XPRT_WRITE_SPACE hangs.
>>>>
>>>
>>> Thanks for verifying that!
>>
>> With these three patches and my RDMA-related disconnect fixes, fio8
>> on krb5i
>> now works fine, even though the connect count was 508. No stalls or
>> deadlocks.
>>
>> Tested-by: Chuck Lever <[email protected]>
>
> Yay! Thanks Chuck.
I am highly in favor of fewer thundering herd wake-ups ! ;-)
--
Chuck Lever