We are using applications which hang if any NFS servers fail to
respond. We would like to be able to control NFS timeouts so that we
can control the maximum time that the applications hang. We currently
can't do that with TCP NFS mounts, since RPC calls made to an existing
NFS mount are first subject to the default untuneable Sun RPC timeout
of 2 minutes.
(I'll note that the existing NFS manpage seems to not describe current
behaviour correctly, since it says that this two-minute timeout applies
to initial mount operations (which it does not), and does not say that
the two-minute timeout applies to operations on existing mounts (which
it does).)
An existing thread discussing this patch can be found here:
Link: https://lore.kernel.org/linux-nfs/[email protected]/T/
This patch uses the RPC call timeout to set the xprt timeout. In that
discussion thread, Jeff Layton has pointed out that this may or may not
be the ideal approach. I have suggested these alternatives, and would
be happy to get feedback:
- Create system-wide tuneables for xs_[local|udp|tcp]_default_timeout.
In our case that's less-than-ideal, since we want to change the total
timeout for an NFS mount on a per-server or per-mount basis rather than
a system-wide basis, but it would do in a pinch.
- Add a second set of timeout options to NFS so that RPC call and xprt
timeouts can be specified separately. I'm guessing no-one is
enthusiastic about option bloat, even if this would be the theoretically
cleanest option. I'm guessing this would also involve changing the
Sun RPC API and everything that calls it in order for it to accept the
second set of timeout options.
- Use timeo and retrans for the RPC call timeout, and retry for the
xprt timeout. Or do the opposite. The NFS manpage describes the current
behaviour incorrectly, so this at least wouldn't make the documentation
any worse. I assume this would also involve changing the Sun RPC API.
Use rpc_create_args->timeout to initialize rpc_xprt->timeout
Signed-off-by: Andrew Klaassen <[email protected]>
---
include/linux/sunrpc/xprt.h | 3 +++
net/sunrpc/clnt.c | 1 +
net/sunrpc/xprt.c | 21 +++++++++++++++++++++
net/sunrpc/xprtsock.c | 19 ++++++++++++++++---
4 files changed, 41 insertions(+), 3 deletions(-)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b9f59aabee53..ca7be090cf83 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -333,6 +333,7 @@ struct xprt_create {
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
struct rpc_xprt_switch *bc_xps;
unsigned int flags;
+ const struct rpc_timeout *timeout; /* timeout parms */
};
struct xprt_class {
@@ -373,6 +374,8 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_release(struct rpc_task *task);
struct rpc_xprt * xprt_get(struct rpc_xprt *xprt);
void xprt_put(struct rpc_xprt *xprt);
+struct rpc_timeout *xprt_alloc_timeout(const struct rpc_timeout *timeo,
+ const struct rpc_timeout *default_timeo);
struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
unsigned int num_prealloc,
unsigned int max_req);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0b0b9f1eed46..1350c1f489f7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -532,6 +532,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.addrlen = args->addrsize,
.servername = args->servername,
.bc_xprt = args->bc_xprt,
+ .timeout = args->timeout,
};
char servername[48];
struct rpc_clnt *clnt;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ab453ede54f0..0bb800c90976 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1801,6 +1801,26 @@ static void xprt_free_id(struct rpc_xprt *xprt)
ida_free(&rpc_xprt_ids, xprt->id);
}
+struct rpc_timeout *xprt_alloc_timeout(const struct rpc_timeout *timeo,
+ const struct rpc_timeout *default_timeo)
+{
+ struct rpc_timeout *timeout;
+
+ timeout = kzalloc(sizeof(*timeout), GFP_KERNEL);
+ if (!timeout)
+ return ERR_PTR(-ENOMEM);
+ if (timeo)
+ memcpy(timeout, timeo, sizeof(struct rpc_timeout));
+ else
+ memcpy(timeout, default_timeo, sizeof(struct rpc_timeout));
+ return timeout;
+}
+
+static void xprt_free_timeout(struct rpc_xprt *xprt)
+{
+ kfree(xprt->timeout);
+}
+
struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
unsigned int num_prealloc,
unsigned int max_alloc)
@@ -1837,6 +1857,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
void xprt_free(struct rpc_xprt *xprt)
{
+ xprt_free_timeout(xprt);
put_net_track(xprt->xprt_net, &xprt->ns_tracker);
xprt_free_all_slots(xprt);
xprt_free_id(xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index aaa5b2741b79..687e06226433 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2924,7 +2924,11 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
xprt->ops = &xs_udp_ops;
- xprt->timeout = &xs_udp_default_timeout;
+ xprt->timeout = xprt_alloc_timeout(args->timeout, &xs_udp_default_timeout);
+ if (IS_ERR(xprt->timeout)) {
+ ret = ERR_CAST(xprt->timeout);
+ goto out_err;
+ }
INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
INIT_WORK(&transport->error_worker, xs_error_handle);
@@ -3003,7 +3007,12 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->idle_timeout = XS_IDLE_DISC_TO;
xprt->ops = &xs_tcp_ops;
- xprt->timeout = &xs_tcp_default_timeout;
+
+ xprt->timeout = xprt_alloc_timeout(args->timeout, &xs_tcp_default_timeout);
+ if (IS_ERR(xprt->timeout)) {
+ ret = ERR_CAST(xprt->timeout);
+ goto out_err;
+ }
xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
xprt->connect_timeout = xprt->timeout->to_initval *
@@ -3071,7 +3080,11 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
xprt->prot = IPPROTO_TCP;
xprt->xprt_class = &xs_bc_tcp_transport;
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
- xprt->timeout = &xs_tcp_default_timeout;
+ xprt->timeout = xprt_alloc_timeout(args->timeout, &xs_tcp_default_timeout);
+ if (IS_ERR(xprt->timeout)) {
+ ret = ERR_CAST(xprt->timeout);
+ goto out_err;
+ }
/* backchannel */
xprt_set_bound(xprt);
--
2.39.2
> On Mar 13, 2023, at 11:17, Andrew Klaassen <[email protected]> wrote:
>
> We are using applications which hang if any NFS servers fail to
> respond. We would like to be able to control NFS timeouts so that we
> can control the maximum time that the applications hang. We currently
> can't do that with TCP NFS mounts, since RPC calls made to an existing
> NFS mount are first subject to the default untuneable Sun RPC timeout
> of 2 minutes.
>
> (I'll note that the existing NFS manpage seems to not describe current
> behaviour correctly, since it says that this two-minute timeout applies
> to initial mount operations (which it does not), and does not say that
> the two-minute timeout applies to operations on existing mounts (which
> it does).)
>
> An existing thread discussing this patch can be found here:
>
> Link: https://lore.kernel.org/linux-nfs/[email protected]/T/
>
> This patch uses the RPC call timeout to set the xprt timeout. In that
> discussion thread, Jeff Layton has pointed out that this may or may not
> be the ideal approach. I have suggested these alternatives, and would
> be happy to get feedback:
>
> - Create system-wide tuneables for xs_[local|udp|tcp]_default_timeout.
> In our case that's less-than-ideal, since we want to change the total
> timeout for an NFS mount on a per-server or per-mount basis rather than
> a system-wide basis, but it would do in a pinch.
>
> - Add a second set of timeout options to NFS so that RPC call and xprt
> timeouts can be specified separately. I'm guessing no-one is
> enthusiastic about option bloat, even if this would be the theoretically
> cleanest option. I'm guessing this would also involve changing the
> Sun RPC API and everything that calls it in order for it to accept the
> second set of timeout options.
>
> - Use timeo and retrans for the RPC call timeout, and retry for the
> xprt timeout. Or do the opposite. The NFS manpage describes the current
> behaviour incorrectly, so this at least wouldn't make the documentation
> any worse. I assume this would also involve changing the Sun RPC API.
>
> Use rpc_create_args->timeout to initialize rpc_xprt->timeout
>
Just because something can be done in the kernel, it doesn’t mean that it should be done in the kernel. If you’re unhappy with sunrpc timeouts, then it should be quite possible to do those calls in userspace, and pass the port number down as part of the mount syscall.
_________________________________
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]
> From: Trond Myklebust <[email protected]>
> Sent: Tuesday, March 14, 2023 2:40 PM
>
> > On Mar 13, 2023, at 11:17, Andrew Klaassen
> <[email protected]> wrote:
> >
> > We are using applications which hang if any NFS servers fail to
> > respond. We would like to be able to control NFS timeouts so that we
> > can control the maximum time that the applications hang. We currently
> > can't do that with TCP NFS mounts, since RPC calls made to an existing
> > NFS mount are first subject to the default untuneable Sun RPC timeout
> > of 2 minutes.
> >
> > (I'll note that the existing NFS manpage seems to not describe current
> > behaviour correctly, since it says that this two-minute timeout
> > applies to initial mount operations (which it does not), and does not
> > say that the two-minute timeout applies to operations on existing
> > mounts (which it does).)
> >
> > An existing thread discussing this patch can be found here:
> >
> > Link:
> > https://lore.kernel.org/linux-nfs/45e2e7f05a13abab777b3b0868744cdbfc62
> > [email protected]/T/
> >
> > This patch uses the RPC call timeout to set the xprt timeout. In that
> > discussion thread, Jeff Layton has pointed out that this may or may
> > not be the ideal approach. I have suggested these alternatives, and
> > would be happy to get feedback:
> >
> > - Create system-wide tuneables for xs_[local|udp|tcp]_default_timeout.
> > In our case that's less-than-ideal, since we want to change the total
> > timeout for an NFS mount on a per-server or per-mount basis rather
> > than a system-wide basis, but it would do in a pinch.
> >
> > - Add a second set of timeout options to NFS so that RPC call and xprt
> > timeouts can be specified separately. I'm guessing no-one is
> > enthusiastic about option bloat, even if this would be the
> > theoretically cleanest option. I'm guessing this would also involve
> > changing the Sun RPC API and everything that calls it in order for it
> > to accept the second set of timeout options.
> >
> > - Use timeo and retrans for the RPC call timeout, and retry for the
> > xprt timeout. Or do the opposite. The NFS manpage describes the
> > current behaviour incorrectly, so this at least wouldn't make the
> > documentation any worse. I assume this would also involve changing the
> Sun RPC API.
> >
> > Use rpc_create_args->timeout to initialize rpc_xprt->timeout
> >
>
> Just because something can be done in the kernel, it doesn’t mean that it
> should be done in the kernel. If you’re unhappy with sunrpc timeouts, then it
> should be quite possible to do those calls in userspace, and pass the port
> number down as part of the mount syscall.
Thanks for the direction, Trond. I'll spend some time getting familiar with the code and see if I can make that happen.
I'm currently clueless about how to get started, as there doesn't appear to be any way to override sunrpc timeout defaults for any sunrpc call, so I may have some followup questions once I get my head wrapped around the mount code.
Andrew
> From: Trond Myklebust <[email protected]>
> Sent: Tuesday, March 14, 2023 2:40 PM
>
> > On Mar 13, 2023, at 11:17, Andrew Klaassen
> <[email protected]> wrote:
> >
> > We are using applications which hang if any NFS servers fail to
> > respond. We would like to be able to control NFS timeouts so that we
> > can control the maximum time that the applications hang. We currently
> > can't do that with TCP NFS mounts, since RPC calls made to an existing
> > NFS mount are first subject to the default untuneable Sun RPC timeout
> > of 2 minutes.
> >
> > (I'll note that the existing NFS manpage seems to not describe current
> > behaviour correctly, since it says that this two-minute timeout
> > applies to initial mount operations (which it does not), and does not
> > say that the two-minute timeout applies to operations on existing
> > mounts (which it does).)
> >
> > An existing thread discussing this patch can be found here:
> >
> > Link:
> > https://lore.kernel.org/linux-
> nfs/45e2e7f05a13abab777b3b0868744cdbfc62
> > [email protected]/T/
> >
> > This patch uses the RPC call timeout to set the xprt timeout. In that
> > discussion thread, Jeff Layton has pointed out that this may or may
> > not be the ideal approach. I have suggested these alternatives, and
> > would be happy to get feedback:
> >
> > - Create system-wide tuneables for xs_[local|udp|tcp]_default_timeout.
> > In our case that's less-than-ideal, since we want to change the total
> > timeout for an NFS mount on a per-server or per-mount basis rather
> > than a system-wide basis, but it would do in a pinch.
> >
> > - Add a second set of timeout options to NFS so that RPC call and xprt
> > timeouts can be specified separately. I'm guessing no-one is
> > enthusiastic about option bloat, even if this would be the
> > theoretically cleanest option. I'm guessing this would also involve
> > changing the Sun RPC API and everything that calls it in order for it
> > to accept the second set of timeout options.
> >
> > - Use timeo and retrans for the RPC call timeout, and retry for the
> > xprt timeout. Or do the opposite. The NFS manpage describes the
> > current behaviour incorrectly, so this at least wouldn't make the
> > documentation any worse. I assume this would also involve changing the
> Sun RPC API.
> >
> > Use rpc_create_args->timeout to initialize rpc_xprt->timeout
>
> Just because something can be done in the kernel, it doesn’t mean that it
> should be done in the kernel. If you’re unhappy with sunrpc timeouts, then it
> should be quite possible to do those calls in userspace, and pass the port
> number down as part of the mount syscall.
An update, since I got an email from someone inquiring about this problem and my patch: I worked on this for a little while to try to figure out how to do what you suggested, but I didn't find any way to pass an existing port number to the mount syscall in a way that would preserve the timeout options on the userspace sunrpc call.
If you have a quick proof-of-concept I could try to take it from there. Unfortunately my company has moved me on to other projects and I'm not able to dedicate much time to it.
Andrew