2023-12-08 16:34:05

by Benjamin Coddington

[permalink] [raw]
Subject: [PATCH] SUNRPC: Fixup v4.1 backchannel request timeouts

After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on
the sending list"), any 4.1 backchannel tasks placed on the sending queue
would immediately return with -ETIMEDOUT since their req timers are zero.
We can fix this by keeping a copy of the rpc_clnt's timeout params on the
transport and using them to properly setup the timeouts on the v4.1
backchannel tasks' req.

Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on the sending list")
Signed-off-by: Benjamin Coddington <[email protected]>
---
include/linux/sunrpc/xprt.h | 1 +
net/sunrpc/clnt.c | 3 +++
net/sunrpc/xprt.c | 15 +++++++++++++--
3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f85d3a0daca2..7565902053f3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -285,6 +285,7 @@ struct rpc_xprt {
* items */
struct list_head bc_pa_list; /* List of preallocated
* backchannel rpc_rqst's */
+ struct rpc_timeout bc_timeout; /* backchannel timeout params */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */

struct rb_root recv_queue; /* Receive queue */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d6805c1268a7..5891757c88b1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -279,6 +279,9 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
clnt->cl_autobind = 1;

clnt->cl_timeout = timeout;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+ memcpy(&xprt->bc_timeout, timeout, sizeof(struct rpc_timeout));
+#endif
rcu_assign_pointer(clnt->cl_xprt, xprt);
spin_unlock(&clnt->cl_lock);

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 92301e32cda4..d9cbe0814fd8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -655,9 +655,14 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)

static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
{
- const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
+ const struct rpc_timeout *to;
unsigned long majortimeo = req->rq_timeout;

+ if (req->rq_task->tk_client)
+ to = req->rq_task->tk_client->cl_timeout;
+ else
+ to = &req->rq_xprt->bc_timeout;
+
if (to->to_exponential)
majortimeo <<= to->to_retries;
else
@@ -686,7 +691,11 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
time_init = jiffies;
else
time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
- req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+
+ if (task->tk_client)
+ req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+ else
+ req->rq_timeout = req->rq_xprt->bc_timeout.to_initval;
req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
req->rq_minortimeo = time_init + req->rq_timeout;
}
@@ -1998,6 +2007,8 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
+
+ xprt_init_majortimeo(task, req);
}
#endif

--
2.43.0



2023-12-08 18:17:09

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] SUNRPC: Fixup v4.1 backchannel request timeouts

On Fri, 2023-12-08 at 11:33 -0500, Benjamin Coddington wrote:
> After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out
> when on
> the sending list"), any 4.1 backchannel tasks placed on the sending
> queue
> would immediately return with -ETIMEDOUT since their req timers are
> zero.
> We can fix this by keeping a copy of the rpc_clnt's timeout params on
> the
> transport and using them to properly setup the timeouts on the v4.1
> backchannel tasks' req.
>
> Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on
> the sending list")
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
>  include/linux/sunrpc/xprt.h |  1 +
>  net/sunrpc/clnt.c           |  3 +++
>  net/sunrpc/xprt.c           | 15 +++++++++++++--
>  3 files changed, 17 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprt.h
> b/include/linux/sunrpc/xprt.h
> index f85d3a0daca2..7565902053f3 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -285,6 +285,7 @@ struct rpc_xprt {
>   * items */
>   struct list_head bc_pa_list; /* List of
> preallocated
>   * backchannel
> rpc_rqst's */
> + struct rpc_timeout bc_timeout; /* backchannel
> timeout params */
>  #endif /* CONFIG_SUNRPC_BACKCHANNEL */
>  
>   struct rb_root recv_queue; /* Receive queue */
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index d6805c1268a7..5891757c88b1 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -279,6 +279,9 @@ static struct rpc_xprt
> *rpc_clnt_set_transport(struct rpc_clnt *clnt,
>   clnt->cl_autobind = 1;
>  
>   clnt->cl_timeout = timeout;
> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
> + memcpy(&xprt->bc_timeout, timeout, sizeof(struct
> rpc_timeout));
> +#endif
>   rcu_assign_pointer(clnt->cl_xprt, xprt);
>   spin_unlock(&clnt->cl_lock);
>  
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 92301e32cda4..d9cbe0814fd8 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -655,9 +655,14 @@ static unsigned long
> xprt_abs_ktime_to_jiffies(ktime_t abstime)
>  
>  static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
>  {
> - const struct rpc_timeout *to = req->rq_task->tk_client-
> >cl_timeout;
> + const struct rpc_timeout *to;
>   unsigned long majortimeo = req->rq_timeout;
>  
> + if (req->rq_task->tk_client)
> + to = req->rq_task->tk_client->cl_timeout;
> + else
> + to = &req->rq_xprt->bc_timeout;
>

If you're going to convert this function for generic use, then please
pass the timeout 'to' as a function parameter rather than making
assumptions here.

> +
>   if (to->to_exponential)
>   majortimeo <<= to->to_retries;
>   else
> @@ -686,7 +691,11 @@ static void xprt_init_majortimeo(struct rpc_task
> *task, struct rpc_rqst *req)
>   time_init = jiffies;
>   else
>   time_init = xprt_abs_ktime_to_jiffies(task-
> >tk_start);
> - req->rq_timeout = task->tk_client->cl_timeout->to_initval;
> +
> + if (task->tk_client)
> + req->rq_timeout = task->tk_client->cl_timeout-
> >to_initval;
> + else
> + req->rq_timeout = req->rq_xprt-
> >bc_timeout.to_initval;

Ditto.

>   req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
>   req->rq_minortimeo = time_init + req->rq_timeout;
>  }
> @@ -1998,6 +2007,8 @@ xprt_init_bc_request(struct rpc_rqst *req,
> struct rpc_task *task)
>   */
>   xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
>   xbufp->tail[0].iov_len;
> +
> + xprt_init_majortimeo(task, req);
>  }
>  #endif
>  

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2023-12-08 19:08:14

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH] SUNRPC: Fixup v4.1 backchannel request timeouts

On 8 Dec 2023, at 13:16, Trond Myklebust wrote:

> On Fri, 2023-12-08 at 11:33 -0500, Benjamin Coddington wrote:
>> After commit 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out
>> when on
>> the sending list"), any 4.1 backchannel tasks placed on the sending
>> queue
>> would immediately return with -ETIMEDOUT since their req timers are
>> zero.
>> We can fix this by keeping a copy of the rpc_clnt's timeout params on
>> the
>> transport and using them to properly setup the timeouts on the v4.1
>> backchannel tasks' req.
>>
>> Fixes: 59464b262ff5 ("SUNRPC: SOFTCONN tasks should time out when on
>> the sending list")
>> Signed-off-by: Benjamin Coddington <[email protected]>
>> ---
>>  include/linux/sunrpc/xprt.h |  1 +
>>  net/sunrpc/clnt.c           |  3 +++
>>  net/sunrpc/xprt.c           | 15 +++++++++++++--
>>  3 files changed, 17 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/xprt.h
>> b/include/linux/sunrpc/xprt.h
>> index f85d3a0daca2..7565902053f3 100644
>> --- a/include/linux/sunrpc/xprt.h
>> +++ b/include/linux/sunrpc/xprt.h
>> @@ -285,6 +285,7 @@ struct rpc_xprt {
>>   * items */
>>   struct list_head bc_pa_list; /* List of
>> preallocated
>>   * backchannel
>> rpc_rqst's */
>> + struct rpc_timeout bc_timeout; /* backchannel
>> timeout params */
>>  #endif /* CONFIG_SUNRPC_BACKCHANNEL */
>>  
>>   struct rb_root recv_queue; /* Receive queue */
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index d6805c1268a7..5891757c88b1 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -279,6 +279,9 @@ static struct rpc_xprt
>> *rpc_clnt_set_transport(struct rpc_clnt *clnt,
>>   clnt->cl_autobind = 1;
>>  
>>   clnt->cl_timeout = timeout;
>> +#if defined(CONFIG_SUNRPC_BACKCHANNEL)
>> + memcpy(&xprt->bc_timeout, timeout, sizeof(struct
>> rpc_timeout));
>> +#endif
>>   rcu_assign_pointer(clnt->cl_xprt, xprt);
>>   spin_unlock(&clnt->cl_lock);
>>  
>> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
>> index 92301e32cda4..d9cbe0814fd8 100644
>> --- a/net/sunrpc/xprt.c
>> +++ b/net/sunrpc/xprt.c
>> @@ -655,9 +655,14 @@ static unsigned long
>> xprt_abs_ktime_to_jiffies(ktime_t abstime)
>>  
>>  static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
>>  {
>> - const struct rpc_timeout *to = req->rq_task->tk_client-
>>> cl_timeout;
>> + const struct rpc_timeout *to;
>>   unsigned long majortimeo = req->rq_timeout;
>>  
>> + if (req->rq_task->tk_client)
>> + to = req->rq_task->tk_client->cl_timeout;
>> + else
>> + to = &req->rq_xprt->bc_timeout;
>>
>
> If you're going to convert this function for generic use, then please
> pass the timeout 'to' as a function parameter rather than making
> assumptions here.

No problem, I'll send it, but we'll end needing to make the same assumption
calling xprt_reset_majortimeo() from xprt_adjust_timeout().

.. actually it looks like backchannel tasks never currently call
rpc_check_timeout(), so we could just send the rpc_client's rpc_timeout
there, but that looks like a potential future problem. I'll send a v2 that
way and kick off my testing again.

I always thought that NULL tk_client was definitively backchannel. Is there
a case you're worried about?

We can fix this another way, probably. Looks like this fix won't actually
end up implementing normal timeout processing without adding
rpc_check_timeout() calls to the backchannel's tk_actions.

Ben