Occasionally NLM lock and unlock request fail with EIO and ENOLCK
respectively. This usually happens when the NFS server is restarted
while NLM lock test is running.
Currently there is a 9 seconds limit for retrying the bind operation.
If the server is under load the port mapper might take more than 9
seconds to become ready after the NFS server restarted.
This patch increases the timeout for rebind from 9 to 30 seconds
allowing a bit more time for the port mapper to become ready.
Signed-off-by: Dai Ngo <[email protected]>
---
include/linux/sunrpc/clnt.h | 3 +++
include/linux/sunrpc/sched.h | 4 ++--
net/sunrpc/clnt.c | 2 +-
net/sunrpc/sched.c | 3 ++-
4 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 770ef2cb5775..7f2dee56c121 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
#define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
+#define RPC_CLNT_REBIND_DELAY 3
+#define RPC_CLNT_REBIND_MAX_TIMEOUT 30
+
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
const struct rpc_program *, u32);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index b8ca3ecaf8d7..e9dc142f10bb 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -90,8 +90,8 @@ struct rpc_task {
#endif
unsigned char tk_priority : 2,/* Task priority */
tk_garb_retry : 2,
- tk_cred_retry : 2,
- tk_rebind_retry : 2;
+ tk_cred_retry : 2;
+ unsigned char tk_rebind_retry;
};
typedef void (*rpc_action)(struct rpc_task *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0b0b9f1eed46..6c89a1fa40bf 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
if (task->tk_rebind_retry == 0)
break;
task->tk_rebind_retry--;
- rpc_delay(task, 3*HZ);
+ rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
goto retry_timeout;
case -ENOBUFS:
rpc_delay(task, HZ >> 2);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index be587a308e05..5c18a35752aa 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
/* Initialize retry counters */
task->tk_garb_retry = 2;
task->tk_cred_retry = 2;
- task->tk_rebind_retry = 2;
+ task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
+ RPC_CLNT_REBIND_DELAY;
/* starting timestamp */
task->tk_start = ktime_get();
--
2.9.5
Hi Trond,
Could you please let me know your opinion on this patch?
Thanks,
-Dai
On 2/10/23 12:10 AM, Dai Ngo wrote:
> Occasionally NLM lock and unlock request fail with EIO and ENOLCK
> respectively. This usually happens when the NFS server is restarted
> while NLM lock test is running.
>
> Currently there is a 9 seconds limit for retrying the bind operation.
> If the server is under load the port mapper might take more than 9
> seconds to become ready after the NFS server restarted.
>
> This patch increases the timeout for rebind from 9 to 30 seconds
> allowing a bit more time for the port mapper to become ready.
>
> Signed-off-by: Dai Ngo <[email protected]>
> ---
> include/linux/sunrpc/clnt.h | 3 +++
> include/linux/sunrpc/sched.h | 4 ++--
> net/sunrpc/clnt.c | 2 +-
> net/sunrpc/sched.c | 3 ++-
> 4 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
> index 770ef2cb5775..7f2dee56c121 100644
> --- a/include/linux/sunrpc/clnt.h
> +++ b/include/linux/sunrpc/clnt.h
> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
> #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
> #define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
>
> +#define RPC_CLNT_REBIND_DELAY 3
> +#define RPC_CLNT_REBIND_MAX_TIMEOUT 30
> +
> struct rpc_clnt *rpc_create(struct rpc_create_args *args);
> struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
> const struct rpc_program *, u32);
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index b8ca3ecaf8d7..e9dc142f10bb 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -90,8 +90,8 @@ struct rpc_task {
> #endif
> unsigned char tk_priority : 2,/* Task priority */
> tk_garb_retry : 2,
> - tk_cred_retry : 2,
> - tk_rebind_retry : 2;
> + tk_cred_retry : 2;
> + unsigned char tk_rebind_retry;
> };
>
> typedef void (*rpc_action)(struct rpc_task *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index 0b0b9f1eed46..6c89a1fa40bf 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
> if (task->tk_rebind_retry == 0)
> break;
> task->tk_rebind_retry--;
> - rpc_delay(task, 3*HZ);
> + rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
> goto retry_timeout;
> case -ENOBUFS:
> rpc_delay(task, HZ >> 2);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index be587a308e05..5c18a35752aa 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
> /* Initialize retry counters */
> task->tk_garb_retry = 2;
> task->tk_cred_retry = 2;
> - task->tk_rebind_retry = 2;
> + task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
> + RPC_CLNT_REBIND_DELAY;
>
> /* starting timestamp */
> task->tk_start = ktime_get();
Hi Anna,
Just a reminder that this patch is still waiting for a review.
Thanks,
-Dai
On 2/17/23 10:22 AM, [email protected] wrote:
> Hi Trond,
>
> Could you please let me know your opinion on this patch?
>
> Thanks,
> -Dai
>
> On 2/10/23 12:10 AM, Dai Ngo wrote:
>> Occasionally NLM lock and unlock request fail with EIO and ENOLCK
>> respectively. This usually happens when the NFS server is restarted
>> while NLM lock test is running.
>>
>> Currently there is a 9 seconds limit for retrying the bind operation.
>> If the server is under load the port mapper might take more than 9
>> seconds to become ready after the NFS server restarted.
>>
>> This patch increases the timeout for rebind from 9 to 30 seconds
>> allowing a bit more time for the port mapper to become ready.
>>
>> Signed-off-by: Dai Ngo <[email protected]>
>> ---
>> include/linux/sunrpc/clnt.h | 3 +++
>> include/linux/sunrpc/sched.h | 4 ++--
>> net/sunrpc/clnt.c | 2 +-
>> net/sunrpc/sched.c | 3 ++-
>> 4 files changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
>> index 770ef2cb5775..7f2dee56c121 100644
>> --- a/include/linux/sunrpc/clnt.h
>> +++ b/include/linux/sunrpc/clnt.h
>> @@ -162,6 +162,9 @@ struct rpc_add_xprt_test {
>> #define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
>> #define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
>> +#define RPC_CLNT_REBIND_DELAY 3
>> +#define RPC_CLNT_REBIND_MAX_TIMEOUT 30
>> +
>> struct rpc_clnt *rpc_create(struct rpc_create_args *args);
>> struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
>> const struct rpc_program *, u32);
>> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
>> index b8ca3ecaf8d7..e9dc142f10bb 100644
>> --- a/include/linux/sunrpc/sched.h
>> +++ b/include/linux/sunrpc/sched.h
>> @@ -90,8 +90,8 @@ struct rpc_task {
>> #endif
>> unsigned char tk_priority : 2,/* Task priority */
>> tk_garb_retry : 2,
>> - tk_cred_retry : 2,
>> - tk_rebind_retry : 2;
>> + tk_cred_retry : 2;
>> + unsigned char tk_rebind_retry;
>> };
>> typedef void (*rpc_action)(struct rpc_task *);
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index 0b0b9f1eed46..6c89a1fa40bf 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -2053,7 +2053,7 @@ call_bind_status(struct rpc_task *task)
>> if (task->tk_rebind_retry == 0)
>> break;
>> task->tk_rebind_retry--;
>> - rpc_delay(task, 3*HZ);
>> + rpc_delay(task, RPC_CLNT_REBIND_DELAY * HZ);
>> goto retry_timeout;
>> case -ENOBUFS:
>> rpc_delay(task, HZ >> 2);
>> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
>> index be587a308e05..5c18a35752aa 100644
>> --- a/net/sunrpc/sched.c
>> +++ b/net/sunrpc/sched.c
>> @@ -817,7 +817,8 @@ rpc_init_task_statistics(struct rpc_task *task)
>> /* Initialize retry counters */
>> task->tk_garb_retry = 2;
>> task->tk_cred_retry = 2;
>> - task->tk_rebind_retry = 2;
>> + task->tk_rebind_retry = RPC_CLNT_REBIND_MAX_TIMEOUT /
>> + RPC_CLNT_REBIND_DELAY;
>> /* starting timestamp */
>> task->tk_start = ktime_get();