2021-07-19 14:52:46

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v2 2/6] SUNRPC: Unset RPC_TASK_NO_RETRANS_TIMEOUT for NULL RPCs

In some rare failure modes, the server is actually reading the
transport, but then just dropping the requests on the floor.
TCP_USER_TIMEOUT cannot detect that case.

Prevent such a stuck server from pinning client resources
indefinitely by ensuring that certain idempotent requests
(such as NULL) can time out even if the connection is still
operational.

Otherwise rpc_bind_new_program(), gss_destroy_cred(), or
rpc_clnt_test_and_add_xprt() can wait forever.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/clnt.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ca2000d8cf64..d34737a8a68a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2694,6 +2694,18 @@ static const struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};

+static void
+rpc_null_call_prepare(struct rpc_task *task, void *data)
+{
+ task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+ rpc_call_start(task);
+}
+
+static const struct rpc_call_ops rpc_null_ops = {
+ .rpc_call_prepare = rpc_null_call_prepare,
+ .rpc_call_done = rpc_default_callback,
+};
+
static
struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
@@ -2707,7 +2719,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
.rpc_xprt = xprt,
.rpc_message = &msg,
.rpc_op_cred = cred,
- .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
+ .callback_ops = ops ?: &rpc_null_ops,
.callback_data = data,
.flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
RPC_TASK_NULLCREDS,
@@ -2758,6 +2770,7 @@ static void rpc_cb_add_xprt_release(void *calldata)
}

static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+ .rpc_call_prepare = rpc_null_call_prepare,
.rpc_call_done = rpc_cb_add_xprt_done,
.rpc_release = rpc_cb_add_xprt_release,
};