2024-02-28 03:48:34

by NeilBrown

[permalink] [raw]
Subject: [PATCH] NFS: Restore -EIO as error to return when "umount -f" aborts request.


When "umount -f" is used to abort all outstanding requests on an NFS
mount, some pending systemcalls can be expected to return an error.
Currently this error is ERESTARTSYS which should never be exposed to
applications (it should only be returned due to a signal).

Prior to Linux v5.2 EIO would be returned in these cases, which it is
more likely that applications will handle.

This patch restores that behaviour so EIO is returned.

Reported-and-tested-by: Zhitao Li <[email protected]>
Closes: https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
Signed-off-by: NeilBrown <[email protected]>
---
include/linux/sunrpc/sched.h | 2 +-
net/sunrpc/clnt.c | 2 +-
net/sunrpc/sched.c | 6 +++---
3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 2d61987b3545..ed3a116efd5d 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -222,7 +222,7 @@ void rpc_put_task(struct rpc_task *);
void rpc_put_task_async(struct rpc_task *);
bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
void rpc_task_try_cancel(struct rpc_task *task, int error);
-void rpc_signal_task(struct rpc_task *);
+void rpc_signal_task(struct rpc_task *, int);
void rpc_exit_task(struct rpc_task *);
void rpc_exit(struct rpc_task *, int);
void rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cda0935a68c9..cdbdfae13030 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -895,7 +895,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
trace_rpc_clnt_killall(clnt);
spin_lock(&clnt->cl_lock);
list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
- rpc_signal_task(rovr);
+ rpc_signal_task(rovr, -EIO);
spin_unlock(&clnt->cl_lock);
}
EXPORT_SYMBOL_GPL(rpc_killall_tasks);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6debf4fd42d4..e4f36fe16808 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -852,14 +852,14 @@ void rpc_exit_task(struct rpc_task *task)
}
}

-void rpc_signal_task(struct rpc_task *task)
+void rpc_signal_task(struct rpc_task *task, int err)
{
struct rpc_wait_queue *queue;

if (!RPC_IS_ACTIVATED(task))
return;

- if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
+ if (!rpc_task_set_rpc_status(task, err))
return;
trace_rpc_task_signalled(task, task->tk_action);
set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
@@ -992,7 +992,7 @@ static void __rpc_execute(struct rpc_task *task)
* clean up after sleeping on some queue, we don't
* break the loop here, but go around once more.
*/
- rpc_signal_task(task);
+ rpc_signal_task(task, -ERESTARTSYS);
}
trace_rpc_task_sync_wake(task, task->tk_action);
}
--
2.43.0



2024-02-28 21:32:16

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH] NFS: Restore -EIO as error to return when "umount -f" aborts request.

On Wed, 2024-02-28 at 14:46 +1100, NeilBrown wrote:
>
> When "umount -f" is used to abort all outstanding requests on an NFS
> mount, some pending systemcalls can be expected to return an error.
> Currently this error is ERESTARTSYS which should never be exposed to
> applications (it should only be returned due to a signal).
>
> Prior to Linux v5.2 EIO would be returned in these cases, which it is
> more likely that applications will handle.
>
> This patch restores that behaviour so EIO is returned.
>
> Reported-and-tested-by: Zhitao Li <[email protected]>
> Closes:
> https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
> Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
> Signed-off-by: NeilBrown <[email protected]>
> ---
>  include/linux/sunrpc/sched.h | 2 +-
>  net/sunrpc/clnt.c            | 2 +-
>  net/sunrpc/sched.c           | 6 +++---
>  3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/sched.h
> b/include/linux/sunrpc/sched.h
> index 2d61987b3545..ed3a116efd5d 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -222,7 +222,7 @@ void rpc_put_task(struct rpc_task
> *);
>  void rpc_put_task_async(struct rpc_task *);
>  bool rpc_task_set_rpc_status(struct rpc_task *task, int
> rpc_status);
>  void rpc_task_try_cancel(struct rpc_task *task, int
> error);
> -void rpc_signal_task(struct rpc_task *);
> +void rpc_signal_task(struct rpc_task *, int);
>  void rpc_exit_task(struct rpc_task *);
>  void rpc_exit(struct rpc_task *, int);
>  void rpc_release_calldata(const struct rpc_call_ops *,
> void *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index cda0935a68c9..cdbdfae13030 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -895,7 +895,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
>   trace_rpc_clnt_killall(clnt);
>   spin_lock(&clnt->cl_lock);
>   list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
> - rpc_signal_task(rovr);
> + rpc_signal_task(rovr, -EIO);

rpc_killall_tasks() is called by rpc_shutdown_client(). It is not
obvious to me that EIO is an appropriate return value in all the cases
where that is being called.

If we're going to replace ERESTARTSYS, then why would we not want to go
for EINTR?

>   spin_unlock(&clnt->cl_lock);
>  }
>  EXPORT_SYMBOL_GPL(rpc_killall_tasks);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index 6debf4fd42d4..e4f36fe16808 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -852,14 +852,14 @@ void rpc_exit_task(struct rpc_task *task)
>   }
>  }
>  
> -void rpc_signal_task(struct rpc_task *task)
> +void rpc_signal_task(struct rpc_task *task, int err)
>  {
>   struct rpc_wait_queue *queue;
>  
>   if (!RPC_IS_ACTIVATED(task))
>   return;
>  
> - if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
> + if (!rpc_task_set_rpc_status(task, err))
>   return;
>   trace_rpc_task_signalled(task, task->tk_action);
>   set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
> @@ -992,7 +992,7 @@ static void __rpc_execute(struct rpc_task *task)
>   * clean up after sleeping on some queue, we
> don't
>   * break the loop here, but go around once
> more.
>   */
> - rpc_signal_task(task);
> + rpc_signal_task(task, -ERESTARTSYS);
>   }
>   trace_rpc_task_sync_wake(task, task->tk_action);
>   }

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2024-03-04 04:50:39

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH] NFS: Restore -EIO as error to return when "umount -f" aborts request.

On Thu, 29 Feb 2024, Trond Myklebust wrote:
> On Wed, 2024-02-28 at 14:46 +1100, NeilBrown wrote:
> >
> > When "umount -f" is used to abort all outstanding requests on an NFS
> > mount, some pending systemcalls can be expected to return an error.
> > Currently this error is ERESTARTSYS which should never be exposed to
> > applications (it should only be returned due to a signal).
> >
> > Prior to Linux v5.2 EIO would be returned in these cases, which it is
> > more likely that applications will handle.
> >
> > This patch restores that behaviour so EIO is returned.
> >
> > Reported-and-tested-by: Zhitao Li <[email protected]>
> > Closes:
> > https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
> > Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
> > Signed-off-by: NeilBrown <[email protected]>
> > ---
> >  include/linux/sunrpc/sched.h | 2 +-
> >  net/sunrpc/clnt.c            | 2 +-
> >  net/sunrpc/sched.c           | 6 +++---
> >  3 files changed, 5 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/sched.h
> > b/include/linux/sunrpc/sched.h
> > index 2d61987b3545..ed3a116efd5d 100644
> > --- a/include/linux/sunrpc/sched.h
> > +++ b/include/linux/sunrpc/sched.h
> > @@ -222,7 +222,7 @@ void rpc_put_task(struct rpc_task
> > *);
> >  void rpc_put_task_async(struct rpc_task *);
> >  bool rpc_task_set_rpc_status(struct rpc_task *task, int
> > rpc_status);
> >  void rpc_task_try_cancel(struct rpc_task *task, int
> > error);
> > -void rpc_signal_task(struct rpc_task *);
> > +void rpc_signal_task(struct rpc_task *, int);
> >  void rpc_exit_task(struct rpc_task *);
> >  void rpc_exit(struct rpc_task *, int);
> >  void rpc_release_calldata(const struct rpc_call_ops *,
> > void *);
> > diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > index cda0935a68c9..cdbdfae13030 100644
> > --- a/net/sunrpc/clnt.c
> > +++ b/net/sunrpc/clnt.c
> > @@ -895,7 +895,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
> >   trace_rpc_clnt_killall(clnt);
> >   spin_lock(&clnt->cl_lock);
> >   list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
> > - rpc_signal_task(rovr);
> > + rpc_signal_task(rovr, -EIO);
>
> rpc_killall_tasks() is called by rpc_shutdown_client(). It is not
> obvious to me that EIO is an appropriate return value in all the cases
> where that is being called.
>
> If we're going to replace ERESTARTSYS, then why would we not want to go
> for EINTR?

I have no particular opinion on which error code is correct, though I am
confident that ERESTARTSYS is not. I do see the attraction of EINTR,
though "man 3 errno" suggests that is appropriate when a signal is
received, which is not the case here.

With this patch I was primarily reverting what appeared to be an
unintentional change in a previous patch. Such a reversion can then be
used in -stable kernels if that seems appropriate.

If this exercise suggests to you that a different error code might be
more appropriate, then I think any such change should be in a separate
patch which only goes to mainline.

Thanks,
NeilBrown


>
> >   spin_unlock(&clnt->cl_lock);
> >  }
> >  EXPORT_SYMBOL_GPL(rpc_killall_tasks);
> > diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> > index 6debf4fd42d4..e4f36fe16808 100644
> > --- a/net/sunrpc/sched.c
> > +++ b/net/sunrpc/sched.c
> > @@ -852,14 +852,14 @@ void rpc_exit_task(struct rpc_task *task)
> >   }
> >  }
> >  
> > -void rpc_signal_task(struct rpc_task *task)
> > +void rpc_signal_task(struct rpc_task *task, int err)
> >  {
> >   struct rpc_wait_queue *queue;
> >  
> >   if (!RPC_IS_ACTIVATED(task))
> >   return;
> >  
> > - if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
> > + if (!rpc_task_set_rpc_status(task, err))
> >   return;
> >   trace_rpc_task_signalled(task, task->tk_action);
> >   set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
> > @@ -992,7 +992,7 @@ static void __rpc_execute(struct rpc_task *task)
> >   * clean up after sleeping on some queue, we
> > don't
> >   * break the loop here, but go around once
> > more.
> >   */
> > - rpc_signal_task(task);
> > + rpc_signal_task(task, -ERESTARTSYS);
> >   }
> >   trace_rpc_task_sync_wake(task, task->tk_action);
> >   }
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]
>
>
>


2024-03-04 13:42:25

by Jeffrey Layton

[permalink] [raw]
Subject: Re: [PATCH] NFS: Restore -EIO as error to return when "umount -f" aborts request.

On Wed, 2024-02-28 at 14:46 +1100, NeilBrown wrote:
> When "umount -f" is used to abort all outstanding requests on an NFS
> mount, some pending systemcalls can be expected to return an error.
> Currently this error is ERESTARTSYS which should never be exposed to
> applications (it should only be returned due to a signal).
>
> Prior to Linux v5.2 EIO would be returned in these cases, which it is
> more likely that applications will handle.
>
> This patch restores that behaviour so EIO is returned.
>
> Reported-and-tested-by: Zhitao Li <[email protected]>
> Closes: https://lore.kernel.org/linux-nfs/CAPKjjnrYvzH8hEk9boaBt-fETX3VD2cjjN-Z6iNgwZpHqYUjWw@mail.gmail.com/
> Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling")
> Signed-off-by: NeilBrown <[email protected]>
> ---
> include/linux/sunrpc/sched.h | 2 +-
> net/sunrpc/clnt.c | 2 +-
> net/sunrpc/sched.c | 6 +++---
> 3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index 2d61987b3545..ed3a116efd5d 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -222,7 +222,7 @@ void rpc_put_task(struct rpc_task *);
> void rpc_put_task_async(struct rpc_task *);
> bool rpc_task_set_rpc_status(struct rpc_task *task, int rpc_status);
> void rpc_task_try_cancel(struct rpc_task *task, int error);
> -void rpc_signal_task(struct rpc_task *);
> +void rpc_signal_task(struct rpc_task *, int);
> void rpc_exit_task(struct rpc_task *);
> void rpc_exit(struct rpc_task *, int);
> void rpc_release_calldata(const struct rpc_call_ops *, void *);
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index cda0935a68c9..cdbdfae13030 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -895,7 +895,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
> trace_rpc_clnt_killall(clnt);
> spin_lock(&clnt->cl_lock);
> list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
> - rpc_signal_task(rovr);
> + rpc_signal_task(rovr, -EIO);
> spin_unlock(&clnt->cl_lock);
> }
> EXPORT_SYMBOL_GPL(rpc_killall_tasks);
> diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
> index 6debf4fd42d4..e4f36fe16808 100644
> --- a/net/sunrpc/sched.c
> +++ b/net/sunrpc/sched.c
> @@ -852,14 +852,14 @@ void rpc_exit_task(struct rpc_task *task)
> }
> }
>
> -void rpc_signal_task(struct rpc_task *task)
> +void rpc_signal_task(struct rpc_task *task, int err)
> {
> struct rpc_wait_queue *queue;
>
> if (!RPC_IS_ACTIVATED(task))
> return;
>
> - if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
> + if (!rpc_task_set_rpc_status(task, err))
> return;
> trace_rpc_task_signalled(task, task->tk_action);
> set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
> @@ -992,7 +992,7 @@ static void __rpc_execute(struct rpc_task *task)
> * clean up after sleeping on some queue, we don't
> * break the loop here, but go around once more.
> */
> - rpc_signal_task(task);
> + rpc_signal_task(task, -ERESTARTSYS);
> }
> trace_rpc_task_sync_wake(task, task->tk_action);
> }

This seems like the right thing to do.

Reviewed-by: Jeff Layton <[email protected]>