Over the years, the code that manages the TCP connections has accumulated
a lot of cargo cult code, largely in due to the fact that the RPC duplicate
replay caches do a lot of stuff that is not really sanctioned by the IETF
spec; the main unsanctioned thing being port reuse.
This patchset is an attempt to clean up the mess, and replace our current
hacky code to disconnect sockets using the TCP RST mechanism with a more
robust version that uses the SO_REUSEPORT socket option to ensure that
we can reuse the socket ports even if they are stuck in TIME_WAIT.
V2: Move close code to use shutdown() instead of sock_release() so that
we always can monitor the close process through xs_tcp_state_change().
Trond Myklebust (14):
SUNRPC: Set SO_REUSEPORT socket option for TCP connections
SUNRPC: Handle EADDRINUSE on connect
SUNRPC: Do not clear the source port in xs_reset_transport
SUNRPC: Ensure xs_reset_transport() resets the close connection flags
SUNRPC: Add helpers to prevent socket create from racing
SUNRPC: TCP/UDP always close the old socket before reconnecting
SUNRPC: Remove TCP client connection reset hack
SUNRPC: Remove TCP socket linger code
SUNRPC: Cleanup to remove remaining uses of XPRT_CONNECTION_ABORT
SUNRPC: Ensure xs_tcp_shutdown() requests a full close of the
connection
SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a
sock_release
SUNRPC: Remove the redundant XPRT_CONNECTION_CLOSE flag
SUNRPC: Handle connection reset more efficiently.
SUNRPC: Define xs_tcp_fin_timeout only if CONFIG_SUNRPC_DEBUG
include/linux/sunrpc/xprt.h | 6 +-
net/sunrpc/clnt.c | 3 +
net/sunrpc/xprt.c | 38 +++++++-
net/sunrpc/xprtsock.c | 232 ++++++++++++++++----------------------------
4 files changed, 125 insertions(+), 154 deletions(-)
--
2.1.0
When using TCP, we need the ability to reuse port numbers after
a disconnection, so that the NFSv3 server knows that we're the same
client. Currently we use a hack to work around the TCP socket's
TIME_WAIT: we send an RST instead of closing, which doesn't
always work...
The SO_REUSEPORT option added in Linux 3.9 allows us to bind multiple
TCP connections to the same source address+port combination, and thus
to use ordinary TCP close() instead of the current hack.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 49 insertions(+), 4 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 87ce7e8bb8dc..484c5040436a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1667,6 +1667,39 @@ static unsigned short xs_get_random_port(void)
}
/**
+ * xs_set_reuseaddr_port - set the socket's port and address reuse options
+ * @sock: socket
+ *
+ * Note that this function has to be called on all sockets that share the
+ * same port, and it must be called before binding.
+ */
+static void xs_sock_set_reuseport(struct socket *sock)
+{
+ char opt = 1;
+
+ kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+}
+
+static unsigned short xs_sock_getport(struct socket *sock)
+{
+ struct sockaddr_storage buf;
+ int buflen;
+ unsigned short port = 0;
+
+ if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0)
+ goto out;
+ switch (buf.ss_family) {
+ case AF_INET6:
+ port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port);
+ break;
+ case AF_INET:
+ port = ntohs(((struct sockaddr_in *)&buf)->sin_port);
+ }
+out:
+ return port;
+}
+
+/**
* xs_set_port - reset the port number in the remote endpoint address
* @xprt: generic transport
* @port: new port number
@@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
xs_update_peer_port(xprt);
}
+static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock)
+{
+ if (transport->srcport == 0)
+ transport->srcport = xs_sock_getport(sock);
+}
+
static unsigned short xs_get_srcport(struct sock_xprt *transport)
{
unsigned short port = transport->srcport;
@@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work)
}
static struct socket *xs_create_sock(struct rpc_xprt *xprt,
- struct sock_xprt *transport, int family, int type, int protocol)
+ struct sock_xprt *transport, int family, int type,
+ int protocol, bool reuseport)
{
struct socket *sock;
int err;
@@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
}
xs_reclassify_socket(family, sock);
+ if (reuseport)
+ xs_sock_set_reuseport(sock);
+
err = xs_bind(transport, sock);
if (err) {
sock_release(sock);
@@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work)
/* Start by resetting any existing state */
xs_reset_transport(transport);
sock = xs_create_sock(xprt, transport,
- xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP);
+ xs_addr(xprt)->sa_family, SOCK_DGRAM,
+ IPPROTO_UDP, false);
if (IS_ERR(sock))
goto out;
@@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
- sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
sock_reset_flag(sk, SOCK_LINGER);
tcp_sk(sk)->linger2 = 0;
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
@@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
switch (ret) {
case 0:
+ xs_set_srcport(transport, sock);
case -EINPROGRESS:
/* SYN_SENT! */
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
@@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
if (!sock) {
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = xs_create_sock(xprt, transport,
- xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP);
+ xs_addr(xprt)->sa_family, SOCK_STREAM,
+ IPPROTO_TCP, true);
if (IS_ERR(sock)) {
status = PTR_ERR(sock);
goto out;
--
2.1.0
Now that we're setting SO_REUSEPORT, we still need to handle the
case where a connect() is attempted, but the old socket is still
lingering.
Essentially, all we want to do here is handle the error by waiting
a few seconds and then retrying.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/clnt.c | 3 +++
net/sunrpc/xprtsock.c | 2 ++
2 files changed, 5 insertions(+)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3f5d4d48f0cb..612aa73bbc60 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1826,6 +1826,7 @@ call_connect_status(struct rpc_task *task)
case -ECONNABORTED:
case -ENETUNREACH:
case -EHOSTUNREACH:
+ case -EADDRINUSE:
case -ENOBUFS:
case -EPIPE:
if (RPC_IS_SOFTCONN(task))
@@ -1934,6 +1935,7 @@ call_transmit_status(struct rpc_task *task)
}
case -ECONNRESET:
case -ECONNABORTED:
+ case -EADDRINUSE:
case -ENOTCONN:
case -ENOBUFS:
case -EPIPE:
@@ -2053,6 +2055,7 @@ call_status(struct rpc_task *task)
case -ECONNRESET:
case -ECONNABORTED:
rpc_force_rebind(clnt);
+ case -EADDRINUSE:
case -ENOBUFS:
rpc_delay(task, 3*HZ);
case -EPIPE:
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 484c5040436a..20f25a837e06 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -721,6 +721,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
xs_tcp_shutdown(xprt);
case -ECONNREFUSED:
case -ENOTCONN:
+ case -EADDRINUSE:
case -EPIPE:
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
}
@@ -2299,6 +2300,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -ECONNREFUSED:
case -ECONNRESET:
case -ENETUNREACH:
+ case -EADDRINUSE:
case -ENOBUFS:
/* retry with existing socket, after a delay */
goto out;
--
2.1.0
Now that we can reuse bound ports after a close, we never really want to
clear the transport's source port after it has been set. Doing so really
messes up the NFSv3 DRC on the server.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 20f25a837e06..ea1882f97912 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -811,8 +811,6 @@ static void xs_reset_transport(struct sock_xprt *transport)
if (sk == NULL)
return;
- transport->srcport = 0;
-
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
transport->sock = NULL;
--
2.1.0
Otherwise, we may end up looping.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 29 +++++++++++++----------------
1 file changed, 13 insertions(+), 16 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ea1882f97912..0fa7ed93dc20 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -803,10 +803,21 @@ static void xs_error_report(struct sock *sk)
read_unlock_bh(&sk->sk_callback_lock);
}
+static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
+{
+ smp_mb__before_atomic();
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+ clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ clear_bit(XPRT_CLOSING, &xprt->state);
+ smp_mb__after_atomic();
+}
+
static void xs_reset_transport(struct sock_xprt *transport)
{
struct socket *sock = transport->sock;
struct sock *sk = transport->inet;
+ struct rpc_xprt *xprt = &transport->xprt;
if (sk == NULL)
return;
@@ -819,8 +830,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
xs_restore_old_callbacks(transport, sk);
write_unlock_bh(&sk->sk_callback_lock);
+ xs_sock_reset_connection_flags(xprt);
- trace_rpc_socket_close(&transport->xprt, sock);
+ trace_rpc_socket_close(xprt, sock);
sock_release(sock);
}
@@ -845,11 +857,6 @@ static void xs_close(struct rpc_xprt *xprt)
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
- smp_mb__before_atomic();
- clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
- clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_atomic();
xprt_disconnect_done(xprt);
}
@@ -1455,16 +1462,6 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
xprt_clear_connecting(xprt);
}
-static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
-{
- smp_mb__before_atomic();
- clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
- clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
- clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_atomic();
-}
-
static void xs_sock_mark_closed(struct rpc_xprt *xprt)
{
xs_sock_reset_connection_flags(xprt);
--
2.1.0
The socket lock is currently held by the task that is requesting the
connection be established. While that is efficient in the case where
the connection happens quickly, it is racy in the case where it doesn't.
What we really want is for the connect helper to be able to block access
to the socket while it is being set up.
This patch does so by arranging to transfer the socket lock from the
task that is requesting the connect attempt, and then releasing that
lock once everything is done.
This scheme also gives us automatic protection against collisions with
the RPC close code, so we can kill the cancel_delayed_work_sync()
call in xs_close().
Signed-off-by: Trond Myklebust <[email protected]>
---
include/linux/sunrpc/xprt.h | 3 +++
net/sunrpc/xprt.c | 37 +++++++++++++++++++++++++++++++++----
net/sunrpc/xprtsock.c | 7 +++++--
3 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 9d27ac45b909..2926e618dbc6 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -347,6 +347,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt);
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
int xs_swapper(struct rpc_xprt *xprt, int enable);
+bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *);
+void xprt_unlock_connect(struct rpc_xprt *, void *);
+
/*
* Reserved bit positions in xprt->state
*/
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ebbefad21a37..ff3574df8344 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -690,6 +690,37 @@ out_abort:
spin_unlock(&xprt->transport_lock);
}
+bool xprt_lock_connect(struct rpc_xprt *xprt,
+ struct rpc_task *task,
+ void *cookie)
+{
+ bool ret = false;
+
+ spin_lock_bh(&xprt->transport_lock);
+ if (!test_bit(XPRT_LOCKED, &xprt->state))
+ goto out;
+ if (xprt->snd_task != task)
+ goto out;
+ xprt->snd_task = cookie;
+ ret = true;
+out:
+ spin_unlock_bh(&xprt->transport_lock);
+ return ret;
+}
+
+void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
+{
+ spin_lock_bh(&xprt->transport_lock);
+ if (xprt->snd_task != cookie)
+ goto out;
+ if (!test_bit(XPRT_LOCKED, &xprt->state))
+ goto out;
+ xprt->snd_task =NULL;
+ xprt->ops->release_xprt(xprt, NULL);
+out:
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
/**
* xprt_connect - schedule a transport connect operation
* @task: RPC task that is requesting the connect
@@ -712,9 +743,7 @@ void xprt_connect(struct rpc_task *task)
if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state))
xprt->ops->close(xprt);
- if (xprt_connected(xprt))
- xprt_release_write(xprt, task);
- else {
+ if (!xprt_connected(xprt)) {
task->tk_rqstp->rq_bytes_sent = 0;
task->tk_timeout = task->tk_rqstp->rq_timeout;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
@@ -726,6 +755,7 @@ void xprt_connect(struct rpc_task *task)
xprt->stat.connect_start = jiffies;
xprt->ops->connect(xprt, task);
}
+ xprt_release_write(xprt, task);
}
static void xprt_connect_status(struct rpc_task *task)
@@ -758,7 +788,6 @@ static void xprt_connect_status(struct rpc_task *task)
dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
"server %s\n", task->tk_pid, -task->tk_status,
xprt->servername);
- xprt_release_write(xprt, task);
task->tk_status = -EIO;
}
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0fa7ed93dc20..e57d8ed2c4d8 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -852,8 +852,6 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
- cancel_delayed_work_sync(&transport->connect_worker);
-
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
@@ -2101,6 +2099,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
trace_rpc_socket_connect(xprt, sock, 0);
status = 0;
out:
+ xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
}
@@ -2286,6 +2285,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case 0:
case -EINPROGRESS:
case -EALREADY:
+ xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
return;
case -EINVAL:
@@ -2303,6 +2303,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
out_eagain:
status = -EAGAIN;
out:
+ xprt_unlock_connect(xprt, transport);
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
}
@@ -2325,6 +2326,8 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
+
if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
dprintk("RPC: xs_connect delayed xprt %p for %lu "
"seconds\n",
--
2.1.0
It is not safe to call xs_reset_transport() from inside xs_udp_setup_socket()
or xs_tcp_setup_socket(), since they do not own the correct locks. Instead,
do it in xs_connect().
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e57d8ed2c4d8..e53a5ca03daf 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2081,8 +2081,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct socket *sock = transport->sock;
int status = -EIO;
- /* Start by resetting any existing state */
- xs_reset_transport(transport);
sock = xs_create_sock(xprt, transport,
xs_addr(xprt)->sa_family, SOCK_DGRAM,
IPPROTO_UDP, false);
@@ -2328,6 +2326,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
+ /* Start by resetting any existing state */
+ xs_reset_transport(transport);
+
if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
dprintk("RPC: xs_connect delayed xprt %p for %lu "
"seconds\n",
--
2.1.0
Instead we rely on SO_REUSEPORT to provide the reconnection semantics
that we need for NFSv2/v3.
Signed-off-by: Trond Myklebust <[email protected]>
---
include/linux/sunrpc/xprt.h | 1 -
net/sunrpc/xprtsock.c | 67 +--------------------------------------------
2 files changed, 1 insertion(+), 67 deletions(-)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 2926e618dbc6..86af854338b5 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -363,7 +363,6 @@ void xprt_unlock_connect(struct rpc_xprt *, void *);
#define XPRT_CONNECTION_ABORT (7)
#define XPRT_CONNECTION_CLOSE (8)
#define XPRT_CONGESTED (9)
-#define XPRT_CONNECTION_REUSE (10)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
{
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index e53a5ca03daf..dbf279cd4494 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -796,8 +796,6 @@ static void xs_error_report(struct sock *sk)
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
- if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state))
- goto out;
xprt_wake_pending_tasks(xprt, err);
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -2102,57 +2100,6 @@ out:
xprt_wake_pending_tasks(xprt, status);
}
-/*
- * We need to preserve the port number so the reply cache on the server can
- * find our cached RPC replies when we get around to reconnecting.
- */
-static void xs_abort_connection(struct sock_xprt *transport)
-{
- int result;
- struct sockaddr any;
-
- dprintk("RPC: disconnecting xprt %p to reuse port\n", transport);
-
- /*
- * Disconnect the transport socket by doing a connect operation
- * with AF_UNSPEC. This should return immediately...
- */
- memset(&any, 0, sizeof(any));
- any.sa_family = AF_UNSPEC;
- result = kernel_connect(transport->sock, &any, sizeof(any), 0);
- trace_rpc_socket_reset_connection(&transport->xprt,
- transport->sock, result);
- if (!result)
- xs_sock_reset_connection_flags(&transport->xprt);
- dprintk("RPC: AF_UNSPEC connect return code %d\n", result);
-}
-
-static void xs_tcp_reuse_connection(struct sock_xprt *transport)
-{
- unsigned int state = transport->inet->sk_state;
-
- if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) {
- /* we don't need to abort the connection if the socket
- * hasn't undergone a shutdown
- */
- if (transport->inet->sk_shutdown == 0)
- return;
- dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n",
- __func__, transport->inet->sk_shutdown);
- }
- if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) {
- /* we don't need to abort the connection if the socket
- * hasn't undergone a shutdown
- */
- if (transport->inet->sk_shutdown == 0)
- return;
- dprintk("RPC: %s: ESTABLISHED/SYN_SENT "
- "sk_shutdown set to %d\n",
- __func__, transport->inet->sk_shutdown);
- }
- xs_abort_connection(transport);
-}
-
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2245,18 +2192,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
status = PTR_ERR(sock);
goto out;
}
- } else {
- int abort_and_exit;
-
- abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
- &xprt->state);
- /* "close" the socket, preserving the local port */
- set_bit(XPRT_CONNECTION_REUSE, &xprt->state);
- xs_tcp_reuse_connection(transport);
- clear_bit(XPRT_CONNECTION_REUSE, &xprt->state);
-
- if (abort_and_exit)
- goto out_eagain;
}
dprintk("RPC: worker connecting xprt %p via %s to "
@@ -2296,9 +2231,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EADDRINUSE:
case -ENOBUFS:
/* retry with existing socket, after a delay */
+ xs_tcp_force_close(xprt);
goto out;
}
-out_eagain:
status = -EAGAIN;
out:
xprt_unlock_connect(xprt, transport);
--
2.1.0
Now that we no longer use the partial shutdown code when closing the
socket, we no longer need to worry about the TCP linger2 state.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 35 -----------------------------------
1 file changed, 35 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index dbf279cd4494..c65f74019288 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1427,37 +1427,6 @@ out:
read_unlock_bh(&sk->sk_callback_lock);
}
-/*
- * Do the equivalent of linger/linger2 handling for dealing with
- * broken servers that don't close the socket in a timely
- * fashion
- */
-static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
- unsigned long timeout)
-{
- struct sock_xprt *transport;
-
- if (xprt_test_and_set_connecting(xprt))
- return;
- set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
- transport = container_of(xprt, struct sock_xprt, xprt);
- queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
- timeout);
-}
-
-static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
-{
- struct sock_xprt *transport;
-
- transport = container_of(xprt, struct sock_xprt, xprt);
-
- if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
- !cancel_delayed_work(&transport->connect_worker))
- return;
- clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
- xprt_clear_connecting(xprt);
-}
-
static void xs_sock_mark_closed(struct rpc_xprt *xprt)
{
xs_sock_reset_connection_flags(xprt);
@@ -1513,7 +1482,6 @@ static void xs_tcp_state_change(struct sock *sk)
clear_bit(XPRT_CONNECTED, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
smp_mb__after_atomic();
- xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
break;
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
@@ -1530,13 +1498,11 @@ static void xs_tcp_state_change(struct sock *sk)
break;
case TCP_LAST_ACK:
set_bit(XPRT_CLOSING, &xprt->state);
- xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
smp_mb__before_atomic();
clear_bit(XPRT_CONNECTED, &xprt->state);
smp_mb__after_atomic();
break;
case TCP_CLOSE:
- xs_tcp_cancel_linger_timeout(xprt);
xs_sock_mark_closed(xprt);
}
out:
@@ -2134,7 +2100,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
- tcp_sk(sk)->linger2 = 0;
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
xprt_clear_connected(xprt);
--
2.1.0
Signed-off-by: Trond Myklebust <[email protected]>
---
include/linux/sunrpc/xprt.h | 1 -
net/sunrpc/xprtsock.c | 3 ---
2 files changed, 4 deletions(-)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 86af854338b5..ae39d478a272 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -360,7 +360,6 @@ void xprt_unlock_connect(struct rpc_xprt *, void *);
#define XPRT_BOUND (4)
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
-#define XPRT_CONNECTION_ABORT (7)
#define XPRT_CONNECTION_CLOSE (8)
#define XPRT_CONGESTED (9)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c65f74019288..2f8db3499a17 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -804,7 +804,6 @@ static void xs_error_report(struct sock *sk)
static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
{
smp_mb__before_atomic();
- clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
@@ -1904,7 +1903,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
struct socket *sock;
int status = -EIO;
- clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
status = __sock_create(xprt->xprt_net, AF_LOCAL,
SOCK_STREAM, 0, &sock, 1);
if (status < 0) {
@@ -2149,7 +2147,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
int status = -EIO;
if (!sock) {
- clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = xs_create_sock(xprt, transport,
xs_addr(xprt)->sa_family, SOCK_STREAM,
IPPROTO_TCP, true);
--
2.1.0
The previous behaviour left the connection half-open in order to try
to scrape the last replies from the socket. Now that we have more reliable
reconnection, change the behaviour to close down the socket faster.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2f8db3499a17..3d83cbd32ef2 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -627,7 +627,7 @@ process_status:
* @xprt: transport
*
* Initiates a graceful shutdown of the TCP socket by calling the
- * equivalent of shutdown(SHUT_WR);
+ * equivalent of shutdown(SHUT_RDWR);
*/
static void xs_tcp_shutdown(struct rpc_xprt *xprt)
{
@@ -635,7 +635,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
struct socket *sock = transport->sock;
if (sock != NULL) {
- kernel_sock_shutdown(sock, SHUT_WR);
+ kernel_sock_shutdown(sock, SHUT_RDWR);
trace_rpc_socket_shutdown(xprt, sock);
}
}
--
2.1.0
Use of socket shutdown() means that we monitor the shutdown process
through the xs_tcp_state_change() callback, so it is preferable to
a full close in all cases unless we're destroying the transport.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d83cbd32ef2..0279e8ffb14a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -857,10 +857,7 @@ static void xs_close(struct rpc_xprt *xprt)
static void xs_tcp_close(struct rpc_xprt *xprt)
{
- if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
- xs_close(xprt);
- else
- xs_tcp_shutdown(xprt);
+ xs_tcp_shutdown(xprt);
}
static void xs_xprt_free(struct rpc_xprt *xprt)
@@ -1033,7 +1030,6 @@ static void xs_udp_data_ready(struct sock *sk)
*/
static void xs_tcp_force_close(struct rpc_xprt *xprt)
{
- set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
xprt_force_disconnect(xprt);
}
--
2.1.0
Signed-off-by: Trond Myklebust <[email protected]>
---
include/linux/sunrpc/xprt.h | 1 -
net/sunrpc/xprt.c | 1 -
net/sunrpc/xprtsock.c | 1 -
3 files changed, 3 deletions(-)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index ae39d478a272..8b93ef53df3c 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -360,7 +360,6 @@ void xprt_unlock_connect(struct rpc_xprt *, void *);
#define XPRT_BOUND (4)
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
-#define XPRT_CONNECTION_CLOSE (8)
#define XPRT_CONGESTED (9)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ff3574df8344..e3015aede0d9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -683,7 +683,6 @@ xprt_init_autodisconnect(unsigned long data)
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
goto out_abort;
spin_unlock(&xprt->transport_lock);
- set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
queue_work(rpciod_workqueue, &xprt->task_cleanup);
return;
out_abort:
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0279e8ffb14a..c72b13e2bdf5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -804,7 +804,6 @@ static void xs_error_report(struct sock *sk)
static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
{
smp_mb__before_atomic();
- clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_atomic();
--
2.1.0
If the connection reset is due to an active call on our side, then
the state change is sometimes not reported. Catch those instances
using xs_error_report() instead.
Also remove the xs_tcp_shutdown() call in xs_tcp_send_request() as
the change in behaviour makes it redundant.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c72b13e2bdf5..540d542d85e5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -718,7 +718,6 @@ static int xs_tcp_send_request(struct rpc_task *task)
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
case -ECONNRESET:
- xs_tcp_shutdown(xprt);
case -ECONNREFUSED:
case -ENOTCONN:
case -EADDRINUSE:
@@ -774,6 +773,21 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
sk->sk_error_report = transport->old_error_report;
}
+static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
+{
+ smp_mb__before_atomic();
+ clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ clear_bit(XPRT_CLOSING, &xprt->state);
+ smp_mb__after_atomic();
+}
+
+static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+{
+ xs_sock_reset_connection_flags(xprt);
+ /* Mark transport as closed and wake up all pending tasks */
+ xprt_disconnect_done(xprt);
+}
+
/**
* xs_error_report - callback to handle TCP socket state errors
* @sk: socket
@@ -793,6 +807,9 @@ static void xs_error_report(struct sock *sk)
err = -sk->sk_err;
if (err == 0)
goto out;
+ /* Is this a reset event? */
+ if (sk->sk_state == TCP_CLOSE)
+ xs_sock_mark_closed(xprt);
dprintk("RPC: xs_error_report client %p, error=%d...\n",
xprt, -err);
trace_rpc_socket_error(xprt, sk->sk_socket, err);
@@ -801,14 +818,6 @@ static void xs_error_report(struct sock *sk)
read_unlock_bh(&sk->sk_callback_lock);
}
-static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
-{
- smp_mb__before_atomic();
- clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
- clear_bit(XPRT_CLOSING, &xprt->state);
- smp_mb__after_atomic();
-}
-
static void xs_reset_transport(struct sock_xprt *transport)
{
struct socket *sock = transport->sock;
@@ -1421,13 +1430,6 @@ out:
read_unlock_bh(&sk->sk_callback_lock);
}
-static void xs_sock_mark_closed(struct rpc_xprt *xprt)
-{
- xs_sock_reset_connection_flags(xprt);
- /* Mark transport as closed and wake up all pending tasks */
- xprt_disconnect_done(xprt);
-}
-
/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
--
2.1.0
Now that the linger code is gone, the xs_tcp_fin_timeout variable has
no real function. Keep it for now, since it is part of the /proc
interface, but only define it if that /proc interface is enabled.
Suggested-by: Anna Schumaker <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 540d542d85e5..8ab02262c761 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -63,6 +63,8 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+
#define XS_TCP_LINGER_TO (15U * HZ)
static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
@@ -75,8 +77,6 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
* someone else's file names!
*/
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-
static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
--
2.1.0