2020-04-23 19:43:42

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH RFC2 1/2] SUNRPC: Set SOFTCONN when destroying GSS contexts

Prevent a (temporary) hang when shutting down an rpc_clnt that has
used one or more GSS creds.

I noticed that callers of rpc_call_null_helper() use a common set of
flags, so I collected them all in that helper function.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/auth_gss/auth_gss.c | 2 +-
net/sunrpc/clnt.c | 10 ++++------
2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ac5cac0dd24b..16cec9755b86 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1285,7 +1285,7 @@ static void gss_pipe_free(struct gss_pipe *p)
ctx->gc_proc = RPC_GSS_PROC_DESTROY;

task = rpc_call_null(gss_auth->client, &new->gc_base,
- RPC_TASK_ASYNC|RPC_TASK_SOFT);
+ RPC_TASK_ASYNC);
if (!IS_ERR(task))
rpc_put_task(task);

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 325a0858700f..ddc98b97c170 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2742,7 +2742,8 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
.rpc_op_cred = cred,
.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
.callback_data = data,
- .flags = flags | RPC_TASK_NULLCREDS,
+ .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
+ RPC_TASK_NULLCREDS,
};

return rpc_run_task(&task_setup_data);
@@ -2805,8 +2806,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
goto success;
}

- task = rpc_call_null_helper(clnt, xprt, NULL,
- RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
+ task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
&rpc_cb_add_xprt_call_ops, data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -2850,9 +2850,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
goto out_err;

/* Test the connection */
- task = rpc_call_null_helper(clnt, xprt, NULL,
- RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
- NULL, NULL);
+ task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
if (IS_ERR(task)) {
status = PTR_ERR(task);
goto out_err;


2020-04-23 19:43:46

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH RFC2 2/2] sunrpc: Ensure signalled RPC tasks exit

If an RPC task is signaled while it is running and the transport is
not connected, it will never sleep and never be terminated. This can
happen when a RPC transport is shut down: the remaining tasks are
signalled, but the transport is disconnected.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/sched.h | 3 ++-
net/sunrpc/sched.c | 14 ++++++--------
2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index df696efdd675..9f5e48f154c5 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -170,7 +170,8 @@ struct rpc_task_setup {

#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)

-#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
+#define RPC_SIGNALLED(t) \
+ unlikely(test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate) != 0)

/*
* Task priorities.
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 7eba20a88438..99b7b834a110 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -912,6 +912,12 @@ static void __rpc_execute(struct rpc_task *task)
trace_rpc_task_run_action(task, do_action);
do_action(task);

+ if (RPC_SIGNALLED(task)) {
+ task->tk_rpc_status = -ERESTARTSYS;
+ rpc_exit(task, -ERESTARTSYS);
+ break;
+ }
+
/*
* Lockless check for whether task is sleeping or not.
*/
@@ -919,14 +925,6 @@ static void __rpc_execute(struct rpc_task *task)
continue;

/*
- * Signalled tasks should exit rather than sleep.
- */
- if (RPC_SIGNALLED(task)) {
- task->tk_rpc_status = -ERESTARTSYS;
- rpc_exit(task, -ERESTARTSYS);
- }
-
- /*
* The queue->lock protects against races with
* rpc_make_runnable().
*

2020-04-23 19:44:26

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH RFC2 2/2] sunrpc: Ensure signalled RPC tasks exit

Sigh.

I forgot to set the command line flag to compose a cover letter.
Let me get to that.


> On Apr 23, 2020, at 3:43 PM, Chuck Lever <[email protected]> wrote:
>
> If an RPC task is signaled while it is running and the transport is
> not connected, it will never sleep and never be terminated. This can
> happen when a RPC transport is shut down: the remaining tasks are
> signalled, but the transport is disconnected.
>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
> include/linux/sunrpc/sched.h | 3 ++-
> net/sunrpc/sched.c | 14 ++++++--------
> 2 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index df696efdd675..9f5e48f154c5 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -170,7 +170,8 @@ struct rpc_task_setup {
>
> #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
>
> -#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
> +#define RPC_SIGNALLED(t) \
> + unlikely(test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate) != 0)
>
> /*
> * Task priorities.
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index 7eba20a88438..99b7b834a110 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -912,6 +912,12 @@ static void __rpc_execute(struct rpc_task *task)
> trace_rpc_task_run_action(task, do_action);
> do_action(task);
>
> + if (RPC_SIGNALLED(task)) {
> + task->tk_rpc_status = -ERESTARTSYS;
> + rpc_exit(task, -ERESTARTSYS);
> + break;
> + }
> +
> /*
> * Lockless check for whether task is sleeping or not.
> */
> @@ -919,14 +925,6 @@ static void __rpc_execute(struct rpc_task *task)
> continue;
>
> /*
> - * Signalled tasks should exit rather than sleep.
> - */
> - if (RPC_SIGNALLED(task)) {
> - task->tk_rpc_status = -ERESTARTSYS;
> - rpc_exit(task, -ERESTARTSYS);
> - }
> -
> - /*
> * The queue->lock protects against races with
> * rpc_make_runnable().
> *
>

--
Chuck Lever



2020-04-23 19:51:39

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH RFC2 1/2] SUNRPC: Set SOFTCONN when destroying GSS contexts

On Thu, 2020-04-23 at 15:43 -0400, Chuck Lever wrote:
> Prevent a (temporary) hang when shutting down an rpc_clnt that has
> used one or more GSS creds.
>
> I noticed that callers of rpc_call_null_helper() use a common set of
> flags, so I collected them all in that helper function.
>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
> net/sunrpc/auth_gss/auth_gss.c | 2 +-
> net/sunrpc/clnt.c | 10 ++++------
> 2 files changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/net/sunrpc/auth_gss/auth_gss.c
> b/net/sunrpc/auth_gss/auth_gss.c
> index ac5cac0dd24b..16cec9755b86 100644
> --- a/net/sunrpc/auth_gss/auth_gss.c
> +++ b/net/sunrpc/auth_gss/auth_gss.c
> @@ -1285,7 +1285,7 @@ static void gss_pipe_free(struct gss_pipe *p)
> ctx->gc_proc = RPC_GSS_PROC_DESTROY;
>
> task = rpc_call_null(gss_auth->client, &new->gc_base,
> - RPC_TASK_ASYNC|RPC_TASK_SOFT);
> + RPC_TASK_ASYNC);

No. This means we can end with silently hanging clients that never go
away. Besides, this function is destroying the creds, so there should
be no problem with them timing out.

> if (!IS_ERR(task))
> rpc_put_task(task);
>
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 325a0858700f..ddc98b97c170 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -2742,7 +2742,8 @@ struct rpc_task *rpc_call_null_helper(struct
> rpc_clnt *clnt,
> .rpc_op_cred = cred,
> .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
> .callback_data = data,
> - .flags = flags | RPC_TASK_NULLCREDS,
> + .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
> + RPC_TASK_NULLCREDS,
> };
>
> return rpc_run_task(&task_setup_data);
> @@ -2805,8 +2806,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt
> *clnt,
> goto success;
> }
>
> - task = rpc_call_null_helper(clnt, xprt, NULL,
> - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|
> RPC_TASK_NULLCREDS,
> + task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
> &rpc_cb_add_xprt_call_ops, data);
> if (IS_ERR(task))
> return PTR_ERR(task);
> @@ -2850,9 +2850,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct
> rpc_clnt *clnt,
> goto out_err;
>
> /* Test the connection */
> - task = rpc_call_null_helper(clnt, xprt, NULL,
> - RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
> RPC_TASK_NULLCREDS,
> - NULL, NULL);
> + task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
> if (IS_ERR(task)) {
> status = PTR_ERR(task);
> goto out_err;
>
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2020-04-23 20:04:27

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH RFC2 1/2] SUNRPC: Set SOFTCONN when destroying GSS contexts



> On Apr 23, 2020, at 3:50 PM, Trond Myklebust <[email protected]> wrote:
>
> On Thu, 2020-04-23 at 15:43 -0400, Chuck Lever wrote:
>> Prevent a (temporary) hang when shutting down an rpc_clnt that has
>> used one or more GSS creds.
>>
>> I noticed that callers of rpc_call_null_helper() use a common set of
>> flags, so I collected them all in that helper function.
>>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> net/sunrpc/auth_gss/auth_gss.c | 2 +-
>> net/sunrpc/clnt.c | 10 ++++------
>> 2 files changed, 5 insertions(+), 7 deletions(-)
>>
>> diff --git a/net/sunrpc/auth_gss/auth_gss.c
>> b/net/sunrpc/auth_gss/auth_gss.c
>> index ac5cac0dd24b..16cec9755b86 100644
>> --- a/net/sunrpc/auth_gss/auth_gss.c
>> +++ b/net/sunrpc/auth_gss/auth_gss.c
>> @@ -1285,7 +1285,7 @@ static void gss_pipe_free(struct gss_pipe *p)
>> ctx->gc_proc = RPC_GSS_PROC_DESTROY;
>>
>> task = rpc_call_null(gss_auth->client, &new->gc_base,
>> - RPC_TASK_ASYNC|RPC_TASK_SOFT);
>> + RPC_TASK_ASYNC);
>
> No. This means we can end with silently hanging clients that never go
> away. Besides, this function is destroying the creds, so there should
> be no problem with them timing out.

Agreed. RPC_TASK_SOFT is still set, but it's been moved to
rpc_call_null_helper() because every one of its callers is a soft
caller.


>> if (!IS_ERR(task))
>> rpc_put_task(task);
>>
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index 325a0858700f..ddc98b97c170 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -2742,7 +2742,8 @@ struct rpc_task *rpc_call_null_helper(struct
>> rpc_clnt *clnt,
>> .rpc_op_cred = cred,
>> .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
>> .callback_data = data,
>> - .flags = flags | RPC_TASK_NULLCREDS,
>> + .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
>> + RPC_TASK_NULLCREDS,
>> };
>>
>> return rpc_run_task(&task_setup_data);
>> @@ -2805,8 +2806,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt
>> *clnt,
>> goto success;
>> }
>>
>> - task = rpc_call_null_helper(clnt, xprt, NULL,
>> - RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|
>> RPC_TASK_NULLCREDS,
>> + task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
>> &rpc_cb_add_xprt_call_ops, data);
>> if (IS_ERR(task))
>> return PTR_ERR(task);
>> @@ -2850,9 +2850,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct
>> rpc_clnt *clnt,
>> goto out_err;
>>
>> /* Test the connection */
>> - task = rpc_call_null_helper(clnt, xprt, NULL,
>> - RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
>> RPC_TASK_NULLCREDS,
>> - NULL, NULL);
>> + task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
>> if (IS_ERR(task)) {
>> status = PTR_ERR(task);
>> goto out_err;
>>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]

--
Chuck Lever