2018-12-17 20:06:35

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH] SUNRPC: Fix disconnection races

When the socket is closed, we need to call xprt_disconnect_done() in order
to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.

However, we also want to ensure that we don't wake them up before the socket
is closed, since that would cause thundering herd issues with everyone
piling up to retransmit before the TCP shutdown dance has completed.
Only the task that holds XPRT_LOCKED needs to wake up early in order to
allow the close to complete.

Reported-by: Dave Wysochanski <[email protected]>
Reported-by: Scott Mayhew <[email protected]>
Cc: Chuck Lever <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprt.c | 4 +++-
net/sunrpc/xprtsock.c | 10 ++++------
2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ce927002862a..089e53a87379 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ else if (xprt->snd_task)
+ rpc_wake_up_queued_task_set_status(&xprt->pending,
+ xprt->snd_task, -ENOTCONN);
spin_unlock_bh(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8a5e823e0b33..f0b3700cec95 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)

trace_rpc_socket_close(xprt, sock);
sock_release(sock);
+
+ xprt_disconnect_done(xprt);
}

/**
@@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)

xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
-
- xprt_disconnect_done(xprt);
}

static void xs_inject_disconnect(struct rpc_xprt *xprt)
@@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
&transport->sock_state))
xprt_clear_connecting(xprt);
clear_bit(XPRT_CLOSING, &xprt->state);
- if (sk->sk_err)
- xprt_wake_pending_tasks(xprt, -sk->sk_err);
/* Trigger the socket release */
xs_tcp_force_close(xprt);
}
@@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
trace_rpc_socket_connect(xprt, sock, 0);
status = 0;
out:
- xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
+ xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
}

@@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
}
status = -EAGAIN;
out:
- xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
+ xprt_unlock_connect(xprt, transport);
xprt_wake_pending_tasks(xprt, status);
}

--
2.19.2



2018-12-17 22:06:45

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH] SUNRPC: Fix disconnection races



> On Dec 17, 2018, at 3:05 PM, Trond Myklebust <[email protected]> wrote:
>
> When the socket is closed, we need to call xprt_disconnect_done() in order
> to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping tasks.
>
> However, we also want to ensure that we don't wake them up before the socket
> is closed, since that would cause thundering herd issues with everyone
> piling up to retransmit before the TCP shutdown dance has completed.
> Only the task that holds XPRT_LOCKED needs to wake up early in order to
> allow the close to complete.
>
> Reported-by: Dave Wysochanski <[email protected]>
> Reported-by: Scott Mayhew <[email protected]>
> Cc: Chuck Lever <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> net/sunrpc/xprt.c | 4 +++-
> net/sunrpc/xprtsock.c | 10 ++++------
> 2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index ce927002862a..089e53a87379 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
> /* Try to schedule an autoclose RPC call */
> if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
> queue_work(xprtiod_workqueue, &xprt->task_cleanup);
> - xprt_wake_pending_tasks(xprt, -EAGAIN);
> + else if (xprt->snd_task)
> + rpc_wake_up_queued_task_set_status(&xprt->pending,
> + xprt->snd_task, -ENOTCONN);
> spin_unlock_bh(&xprt->transport_lock);
> }
> EXPORT_SYMBOL_GPL(xprt_force_disconnect);

Hrm, well I'm not sure xprt_connect_status is ready to deal with -ENOTCONN.

kworker/2:3-2342 [002] 3706.246289: rpc_task_wakeup: task:39992@5 flags=0001 state=001e status=-107 timeout=120000 queue=xprt_pending
kworker/0:7-2424 [000] 3706.246296: xprtrdma_reconnect: peer=[192.168.2.55]:20049 r_xprt=0xffff88886c465000
kworker/u25:28-2270 [004] 3706.246321: rpc_task_run_action: task:39992@5 flags=0001 state=001d status=-107 action=xprt_connect_status
kworker/u25:28-2270 [004] 3706.246323: rpc_task_run_action: task:39992@5 flags=0001 state=001d status=-5 action=call_connect_status
kworker/u25:28-2270 [004] 3706.246323: rpc_connect_status: task:39992@5 status=-5
kworker/u25:28-2270 [004] 3706.246324: rpc_task_run_action: task:39992@5 flags=0001 state=001d status=-5 action=rpc_exit_task
kworker/u25:28-2270 [004] 3706.246327: rpc_stats_latency: task:39992@5 xid=0x2f1ad350 nfsv3 READ backlog=0 rtt=0 execute=13028
kworker/u25:28-2270 [004] 3706.246333: xprtrdma_rpc_done: task:39992@5 req=0xffff88886a438000 rep=(nil)

static void xprt_connect_status(struct rpc_task *task)
{
switch (task->tk_status) {
case 0:
dprintk("RPC: %5u xprt_connect_status: connection established\n",
task->tk_pid);
break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
case -EPIPE:
case -EAGAIN:
dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break;
case -ETIMEDOUT:
dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
"out\n", task->tk_pid);
break;
default:
dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
"server %s\n", task->tk_pid, -task->tk_status,
task->tk_rqstp->rq_xprt->servername);
task->tk_status = -EIO;
}
}

Maybe the wake-up status should remain -EAGAIN ?


> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 8a5e823e0b33..f0b3700cec95 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct sock_xprt *transport)
>
> trace_rpc_socket_close(xprt, sock);
> sock_release(sock);
> +
> + xprt_disconnect_done(xprt);
> }
>
> /**
> @@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
>
> xs_reset_transport(transport);
> xprt->reestablish_timeout = 0;
> -
> - xprt_disconnect_done(xprt);
> }
>
> static void xs_inject_disconnect(struct rpc_xprt *xprt)
> @@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock *sk)
> &transport->sock_state))
> xprt_clear_connecting(xprt);
> clear_bit(XPRT_CLOSING, &xprt->state);
> - if (sk->sk_err)
> - xprt_wake_pending_tasks(xprt, -sk->sk_err);
> /* Trigger the socket release */
> xs_tcp_force_close(xprt);
> }
> @@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
> trace_rpc_socket_connect(xprt, sock, 0);
> status = 0;
> out:
> - xprt_unlock_connect(xprt, transport);
> xprt_clear_connecting(xprt);
> + xprt_unlock_connect(xprt, transport);
> xprt_wake_pending_tasks(xprt, status);
> }
>
> @@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
> }
> status = -EAGAIN;
> out:
> - xprt_unlock_connect(xprt, transport);
> xprt_clear_connecting(xprt);
> + xprt_unlock_connect(xprt, transport);
> xprt_wake_pending_tasks(xprt, status);
> }
>
> --
> 2.19.2
>

--
Chuck Lever




2018-12-17 22:24:40

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] SUNRPC: Fix disconnection races

On Mon, 2018-12-17 at 17:06 -0500, Chuck Lever wrote:
> > On Dec 17, 2018, at 3:05 PM, Trond Myklebust <[email protected]>
> > wrote:
> >
> > When the socket is closed, we need to call xprt_disconnect_done()
> > in order
> > to clean up the XPRT_WRITE_SPACE flag, and wake up the sleeping
> > tasks.
> >
> > However, we also want to ensure that we don't wake them up before
> > the socket
> > is closed, since that would cause thundering herd issues with
> > everyone
> > piling up to retransmit before the TCP shutdown dance has
> > completed.
> > Only the task that holds XPRT_LOCKED needs to wake up early in
> > order to
> > allow the close to complete.
> >
> > Reported-by: Dave Wysochanski <[email protected]>
> > Reported-by: Scott Mayhew <[email protected]>
> > Cc: Chuck Lever <[email protected]>
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > net/sunrpc/xprt.c | 4 +++-
> > net/sunrpc/xprtsock.c | 10 ++++------
> > 2 files changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> > index ce927002862a..089e53a87379 100644
> > --- a/net/sunrpc/xprt.c
> > +++ b/net/sunrpc/xprt.c
> > @@ -680,7 +680,9 @@ void xprt_force_disconnect(struct rpc_xprt
> > *xprt)
> > /* Try to schedule an autoclose RPC call */
> > if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
> > queue_work(xprtiod_workqueue, &xprt->task_cleanup);
> > - xprt_wake_pending_tasks(xprt, -EAGAIN);
> > + else if (xprt->snd_task)
> > + rpc_wake_up_queued_task_set_status(&xprt->pending,
> > + xprt->snd_task, -ENOTCONN);
> > spin_unlock_bh(&xprt->transport_lock);
> > }
> > EXPORT_SYMBOL_GPL(xprt_force_disconnect);
>
> Hrm, well I'm not sure xprt_connect_status is ready to deal with
> -ENOTCONN.
>
> kworker/2:3-2342 [002] 3706.246289:
> rpc_task_wakeup: task:39992@5 flags=0001 state=001e status=-107
> timeout=120000 queue=xprt_pending
> kworker/0:7-2424 [000] 3706.246296:
> xprtrdma_reconnect: peer=[192.168.2.55]:20049
> r_xprt=0xffff88886c465000
> kworker/u25:28-2270 [004] 3706.246321:
> rpc_task_run_action: task:39992@5 flags=0001 state=001d status=-107
> action=xprt_connect_status
> kworker/u25:28-2270 [004] 3706.246323:
> rpc_task_run_action: task:39992@5 flags=0001 state=001d status=-5
> action=call_connect_status
> kworker/u25:28-2270 [004] 3706.246323:
> rpc_connect_status: task:39992@5 status=-5
> kworker/u25:28-2270 [004] 3706.246324:
> rpc_task_run_action: task:39992@5 flags=0001 state=001d status=-5
> action=rpc_exit_task
> kworker/u25:28-2270 [004] 3706.246327:
> rpc_stats_latency: task:39992@5 xid=0x2f1ad350 nfsv3 READ
> backlog=0 rtt=0 execute=13028
> kworker/u25:28-2270 [004] 3706.246333:
> xprtrdma_rpc_done: task:39992@5 req=0xffff88886a438000 rep=(nil)
>
> static void xprt_connect_status(struct rpc_task *task)
> {
> switch (task->tk_status) {
> case 0:
> dprintk("RPC: %5u xprt_connect_status: connection
> established\n",
> task->tk_pid);
> break;
> case -ECONNREFUSED:
> case -ECONNRESET:
> case -ECONNABORTED:
> case -ENETUNREACH:
> case -EHOSTUNREACH:
> case -EPIPE:
> case -EAGAIN:
> dprintk("RPC: %5u xprt_connect_status: retrying\n",
> task->tk_pid);
> break;
> case -ETIMEDOUT:
> dprintk("RPC: %5u xprt_connect_status: connect
> attempt timed "
> "out\n", task->tk_pid);
> break;
> default:
> dprintk("RPC: %5u xprt_connect_status: error %d
> connecting to "
> "server %s\n", task->tk_pid, -task-
> >tk_status,
> task->tk_rqstp->rq_xprt->servername);
> task->tk_status = -EIO;
> }
> }
>
> Maybe the wake-up status should remain -EAGAIN ?


Actually, that brings up a very good point. Given that a number of
things can wake up tasks on xprt->pending, and set an error code, why
do we want xprt_connect_status() to change the error codes on us? That
looks like it can go wrong in quite a few different ways.

Furthermore, call_connect_status() is supposed to perform more or less
the same service, and already knows how to deal with more errors.

>
> > diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> > index 8a5e823e0b33..f0b3700cec95 100644
> > --- a/net/sunrpc/xprtsock.c
> > +++ b/net/sunrpc/xprtsock.c
> > @@ -1217,6 +1217,8 @@ static void xs_reset_transport(struct
> > sock_xprt *transport)
> >
> > trace_rpc_socket_close(xprt, sock);
> > sock_release(sock);
> > +
> > + xprt_disconnect_done(xprt);
> > }
> >
> > /**
> > @@ -1237,8 +1239,6 @@ static void xs_close(struct rpc_xprt *xprt)
> >
> > xs_reset_transport(transport);
> > xprt->reestablish_timeout = 0;
> > -
> > - xprt_disconnect_done(xprt);
> > }
> >
> > static void xs_inject_disconnect(struct rpc_xprt *xprt)
> > @@ -1489,8 +1489,6 @@ static void xs_tcp_state_change(struct sock
> > *sk)
> > &transport->sock_state))
> > xprt_clear_connecting(xprt);
> > clear_bit(XPRT_CLOSING, &xprt->state);
> > - if (sk->sk_err)
> > - xprt_wake_pending_tasks(xprt, -sk->sk_err);
> > /* Trigger the socket release */
> > xs_tcp_force_close(xprt);
> > }
> > @@ -2092,8 +2090,8 @@ static void xs_udp_setup_socket(struct
> > work_struct *work)
> > trace_rpc_socket_connect(xprt, sock, 0);
> > status = 0;
> > out:
> > - xprt_unlock_connect(xprt, transport);
> > xprt_clear_connecting(xprt);
> > + xprt_unlock_connect(xprt, transport);
> > xprt_wake_pending_tasks(xprt, status);
> > }
> >
> > @@ -2329,8 +2327,8 @@ static void xs_tcp_setup_socket(struct
> > work_struct *work)
> > }
> > status = -EAGAIN;
> > out:
> > - xprt_unlock_connect(xprt, transport);
> > xprt_clear_connecting(xprt);
> > + xprt_unlock_connect(xprt, transport);
> > xprt_wake_pending_tasks(xprt, status);
> > }
> >
> > --
> > 2.19.2
> >
>
> --
> Chuck Lever
>
>
>
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]