2021-06-03 23:00:28

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH v1 0/3] modify xprt state using sysfs

From: Olga Kornievskaia <[email protected]>

When a transport gets stuck, it is desired to be able to move the tasks
that have been stuck/queued on that transport to another. This patch
series attempts to do so. First patch, takes a transport and marks
it offline so that no more tasks are queued on it. Second,
we identify which tasks are able to be re-tried on a different
transport (only 4.1+). Lastly, once the transport is deemed bad and
in need of a removal, it's marked to be removed. Any tasks that are
stuck there will now release the transport and try picking a different
one. This transport will be removed from the list of xprts. First
transport with which the RPC client was created is considered the main
transport and can't be taken offline or removed.

Olga Kornievskaia (3):
sunrpc: take a xprt offline using sysfs
NFSv4.1 identify and mark RPC tasks that can move between transports
sunrpc: remove an offlined xprt using sysfs

fs/nfs/nfs4proc.c | 38 ++++++++++++++++++++++----
fs/nfs/pagelist.c | 8 ++++--
fs/nfs/write.c | 6 ++++-
include/linux/sunrpc/sched.h | 2 ++
include/linux/sunrpc/xprt.h | 3 +++
net/sunrpc/clnt.c | 21 +++++++++++++++
net/sunrpc/sysfs.c | 52 +++++++++++++++++++++++++++++++++---
net/sunrpc/xprtmultipath.c | 3 ++-
8 files changed, 120 insertions(+), 13 deletions(-)

--
2.27.0


2021-06-03 23:00:49

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH v1 3/3] sunrpc: remove an offlined xprt using sysfs

From: Olga Kornievskaia <[email protected]>

Once a transport has been put offline, this transport can be also
removed from the list of transports. Any tasks that have been stuck
on this transport would find the next available active transport
and be re-tried. This transport would be removed from the xprt_switch
list and freed.

Signed-off-by: Olga Kornievskaia <[email protected]>
---
include/linux/sunrpc/xprt.h | 1 +
net/sunrpc/clnt.c | 20 ++++++++++++++++++++
net/sunrpc/sysfs.c | 18 ++++++++++++++----
3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 72a858f032c7..c2ecf5cc3802 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -428,6 +428,7 @@ void xprt_release_write(struct rpc_xprt *, struct rpc_task *);
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
#define XPRT_OFFLINE (7)
+#define XPRT_REMOVE (8)
#define XPRT_CONGESTED (9)
#define XPRT_CWND_WAIT (10)
#define XPRT_WRITE_SPACE (11)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 408618765aa5..36040ec53404 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2106,6 +2106,26 @@ call_connect_status(struct rpc_task *task)
case -ENOTCONN:
case -EAGAIN:
case -ETIMEDOUT:
+ if (!(task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) &&
+ (task->tk_flags & RPC_TASK_MOVEABLE) &&
+ test_bit(XPRT_REMOVE, &xprt->state)) {
+ struct rpc_xprt *saved = task->tk_xprt;
+ struct rpc_xprt_switch *xps;
+
+ rcu_read_lock();
+ xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+ rcu_read_unlock();
+ if (xps->xps_nactive > 1) {
+ xprt_release(task);
+ xprt_put(saved);
+ rpc_xprt_switch_remove_xprt(xps, saved);
+ task->tk_xprt = NULL;
+ task->tk_action = call_start;
+ }
+ xprt_switch_put(xps);
+ if (!task->tk_xprt)
+ return;
+ }
goto out_retry;
case -ENOBUFS:
rpc_delay(task, HZ >> 2);
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 02c918c5061b..44a69638c55f 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -118,7 +118,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
ssize_t ret;
int locked, connected, connecting, close_wait, bound, binding,
- closing, congested, cwnd_wait, write_space, offline;
+ closing, congested, cwnd_wait, write_space, offline, remove;

if (!xprt)
return 0;
@@ -137,8 +137,9 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
cwnd_wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
write_space = test_bit(XPRT_WRITE_SPACE, &xprt->state);
offline = test_bit(XPRT_OFFLINE, &xprt->state);
+ remove = test_bit(XPRT_REMOVE, &xprt->state);

- ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s\n",
+ ret = sprintf(buf, "state=%s %s %s %s %s %s %s %s %s %s %s %s\n",
locked ? "LOCKED" : "",
connected ? "CONNECTED" : "",
connecting ? "CONNECTING" : "",
@@ -149,7 +150,8 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj,
congested ? "CONGESTED" : "",
cwnd_wait ? "CWND_WAIT" : "",
write_space ? "WRITE_SPACE" : "",
- offline ? "OFFLINE" : "");
+ offline ? "OFFLINE" : "",
+ remove ? "REMOVE" : "");
}

xprt_put(xprt);
@@ -230,13 +232,15 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
const char *buf, size_t count)
{
struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
- int offline = 0;
+ int offline = 0, remove = 0;

if (!xprt)
return 0;

if (!strncmp(buf, "offline", 7))
offline = 1;
+ else if (!strncmp(buf, "remove", 6))
+ remove = 1;
else
return -EINVAL;

@@ -249,6 +253,12 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj,
count = -EINVAL;
else
set_bit(XPRT_OFFLINE, &xprt->state);
+ } else if (remove) {
+ if (!test_bit(XPRT_CONNECTED, &xprt->state) &&
+ test_bit(XPRT_OFFLINE, &xprt->state))
+ set_bit(XPRT_REMOVE, &xprt->state);
+ else
+ count = -EINVAL;
}

xprt_release_write(xprt, NULL);
--
2.27.0

2021-06-03 23:01:31

by Olga Kornievskaia

[permalink] [raw]
Subject: [PATCH v1 2/3] NFSv4.1 identify and mark RPC tasks that can move between transports

From: Olga Kornievskaia <[email protected]>

In preparation for when we can re-try a task on a different transport,
identify and mark such RPC tasks as moveable. Only 4.1+ operarations can
be re-tried on a different transport.

Signed-off-by: Olga Kornievskaia <[email protected]>
---
fs/nfs/nfs4proc.c | 38 +++++++++++++++++++++++++++++++-----
fs/nfs/pagelist.c | 8 ++++++--
fs/nfs/write.c | 6 +++++-
include/linux/sunrpc/sched.h | 2 ++
4 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e25c16257545..40e8d36cbfa8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1155,7 +1155,11 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res)
{
- return nfs4_do_call_sync(clnt, server, msg, args, res, 0);
+ unsigned short task_flags = 0;
+
+ if (server->nfs_client->cl_minorversion)
+ task_flags = RPC_TASK_MOVEABLE;
+ return nfs4_do_call_sync(clnt, server, msg, args, res, task_flags);
}


@@ -2569,6 +2573,9 @@ static int nfs4_run_open_task(struct nfs4_opendata *data,
};
int status;

+ if (server->nfs_client->cl_minorversion)
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
kref_get(&data->kref);
data->rpc_done = false;
data->rpc_status = 0;
@@ -3749,6 +3756,9 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
};
int status = -ENOMEM;

+ if (server->nfs_client->cl_minorversion)
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;
+
nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_CLEANUP,
&task_setup_data.rpc_client, &msg);

@@ -4188,6 +4198,9 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
};
unsigned short task_flags = 0;

+ if (server->nfs_client->cl_minorversion)
+ task_flags = RPC_TASK_MOVEABLE;
+
/* Is this is an attribute revalidation, subject to softreval? */
if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
task_flags |= RPC_TASK_TIMEOUT;
@@ -4307,6 +4320,9 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
};
unsigned short task_flags = 0;

+ if (server->nfs_client->cl_minorversion)
+ task_flags = RPC_TASK_MOVEABLE;
+
/* Is this is an attribute revalidation, subject to softreval? */
if (nfs_lookup_is_soft_revalidate(dentry))
task_flags |= RPC_TASK_TIMEOUT;
@@ -6538,7 +6554,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
.rpc_client = server->client,
.rpc_message = &msg,
.callback_ops = &nfs4_delegreturn_ops,
- .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE,
};
int status = 0;

@@ -6856,6 +6872,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
+ struct nfs_client *client =
+ NFS_SERVER(lsp->ls_state->inode)->nfs_client;
+
+ if (client->cl_minorversion)
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;

nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)->nfs_client,
NFS_SP4_MACH_CRED_CLEANUP, &task_setup_data.rpc_client, &msg);
@@ -7130,6 +7151,10 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
.flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
};
int ret;
+ struct nfs_client *client = NFS_SERVER(state->inode)->nfs_client;
+
+ if (client->cl_minorversion)
+ task_setup_data.flags |= RPC_TASK_MOVEABLE;

dprintk("%s: begin!\n", __func__);
data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
@@ -9186,7 +9211,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
.rpc_client = clp->cl_rpcclient,
.rpc_message = &msg,
.callback_ops = &nfs41_sequence_ops,
- .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT | RPC_TASK_MOVEABLE,
};
struct rpc_task *ret;

@@ -9509,7 +9534,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
.rpc_message = &msg,
.callback_ops = &nfs4_layoutget_call_ops,
.callback_data = lgp,
- .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF |
+ RPC_TASK_MOVEABLE,
};
struct pnfs_layout_segment *lseg = NULL;
struct nfs4_exception exception = {
@@ -9650,6 +9676,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
.rpc_message = &msg,
.callback_ops = &nfs4_layoutreturn_call_ops,
.callback_data = lrp,
+ .flags = RPC_TASK_MOVEABLE,
};
int status = 0;

@@ -9804,6 +9831,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
.rpc_message = &msg,
.callback_ops = &nfs4_layoutcommit_ops,
.callback_data = data,
+ .flags = RPC_TASK_MOVEABLE,
};
struct rpc_task *task;
int status = 0;
@@ -10131,7 +10159,7 @@ static int nfs41_free_stateid(struct nfs_server *server,
.rpc_client = server->client,
.rpc_message = &msg,
.callback_ops = &nfs41_free_stateid_ops,
- .flags = RPC_TASK_ASYNC,
+ .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
};
struct nfs_free_stateid_data *data;
struct rpc_task *task;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index cf9cc62ec48e..cc232d1f16f2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -954,6 +954,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
{
struct nfs_pgio_header *hdr;
int ret;
+ unsigned short task_flags = 0;

hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) {
@@ -962,14 +963,17 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
}
nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
- if (ret == 0)
+ if (ret == 0) {
+ if (NFS_SERVER(hdr->inode)->nfs_client->cl_minorversion)
+ task_flags = RPC_TASK_MOVEABLE;
ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
hdr,
hdr->cred,
NFS_PROTO(hdr->inode),
desc->pg_rpc_callops,
desc->pg_ioflags,
- RPC_TASK_CRED_NOREF);
+ RPC_TASK_CRED_NOREF | task_flags);
+ }
return ret;
}

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3bf82178166a..eae9bf114041 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1810,6 +1810,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
struct nfs_commit_info *cinfo)
{
struct nfs_commit_data *data;
+ unsigned short task_flags = 0;

/* another commit raced with us */
if (list_empty(head))
@@ -1820,8 +1821,11 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
/* Set up the argument struct */
nfs_init_commit(data, head, NULL, cinfo);
atomic_inc(&cinfo->mds->rpcs_out);
+ if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
+ task_flags = RPC_TASK_MOVEABLE;
return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
- data->mds_ops, how, RPC_TASK_CRED_NOREF);
+ data->mds_ops, how,
+ RPC_TASK_CRED_NOREF | task_flags);
}

/*
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index df696efdd675..a237b8dbf608 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -121,6 +121,7 @@ struct rpc_task_setup {
*/
#define RPC_TASK_ASYNC 0x0001 /* is an async task */
#define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */
+#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */
#define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */
#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */
#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */
@@ -139,6 +140,7 @@ struct rpc_task_setup {
#define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN)
#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT)
+#define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE)

#define RPC_TASK_RUNNING 0
#define RPC_TASK_QUEUED 1
--
2.27.0

2021-06-03 23:58:38

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH v1 0/3] modify xprt state using sysfs

On Fri, 04 Jun 2021, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <[email protected]>
>
> When a transport gets stuck, it is desired to be able to move the tasks
> that have been stuck/queued on that transport to another.

This is interesting.....
A long-standing problem with NFS is that it is tricky to reliably
unmount a filesystem if the network is not responding. It is possible,
but you need to identify all the processes blocked on the filesystem and
SIGKILL them.
My most recent exposure to this was when shutdown hung for someone
because NetworkManager shutdown the wifi before NFS filesystems were
unmounted. This is arguably a config error, but the same problem could
happen with a power-outage instead of networkmanage breaking the wifi.

It would be nice to be able to forcibly unmount filesystems. e.g. mark
the transport as dead in such a way that all requests report EIO (or
similar).
This is obviously a big hammer, probably bigger than justified for use
with "umount -f", but sometimes it is a necessary hammer.

Could your work lead to being able to do this? Could I write a shutdown
script that runs when there is no more network and no expectation of any
network ever again, and which marks all transports as dead - and then
wakes up all pending rpc tasks?

Thanks,
NeilBrown

2021-06-04 00:23:23

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 0/3] modify xprt state using sysfs

On Thu, Jun 3, 2021 at 7:57 PM NeilBrown <[email protected]> wrote:
>
> On Fri, 04 Jun 2021, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia <[email protected]>
> >
> > When a transport gets stuck, it is desired to be able to move the tasks
> > that have been stuck/queued on that transport to another.
>
> This is interesting.....
> A long-standing problem with NFS is that it is tricky to reliably
> unmount a filesystem if the network is not responding. It is possible,
> but you need to identify all the processes blocked on the filesystem and
> SIGKILL them.
> My most recent exposure to this was when shutdown hung for someone
> because NetworkManager shutdown the wifi before NFS filesystems were
> unmounted. This is arguably a config error, but the same problem could
> happen with a power-outage instead of networkmanage breaking the wifi.
>
> It would be nice to be able to forcibly unmount filesystems. e.g. mark
> the transport as dead in such a way that all requests report EIO (or
> similar).
> This is obviously a big hammer, probably bigger than justified for use
> with "umount -f", but sometimes it is a necessary hammer.
>
> Could your work lead to being able to do this? Could I write a shutdown
> script that runs when there is no more network and no expectation of any
> network ever again, and which marks all transports as dead - and then
> wakes up all pending rpc tasks?

I thought that was something that Ben was looking into in parallel to
my efforts. In this patch series I'm only addressing the issue where
some transport is unresponsive and it's not the "main" transport. I
don't allow main transport to be put offline or removed. As you said
in that case, the tasks need to be errored out to the application. But
yes, I think in the next step we can allow for the main transport to
be removed and erroring the tasks and allowing for unmounting when the
server isn't responding.

>
> Thanks,
> NeilBrown

2021-06-13 16:20:18

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSv4.1 identify and mark RPC tasks that can move between transports

On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote:
> From: Olga Kornievskaia <[email protected]>
>
> In preparation for when we can re-try a task on a different
> transport,
> identify and mark such RPC tasks as moveable. Only 4.1+ operarations
> can
> be re-tried on a different transport.
>
> Signed-off-by: Olga Kornievskaia <[email protected]>
> ---
>  fs/nfs/nfs4proc.c            | 38 +++++++++++++++++++++++++++++++---
> --
>  fs/nfs/pagelist.c            |  8 ++++++--
>  fs/nfs/write.c               |  6 +++++-
>  include/linux/sunrpc/sched.h |  2 ++
>  4 files changed, 46 insertions(+), 8 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index e25c16257545..40e8d36cbfa8 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -1155,7 +1155,11 @@ static int nfs4_call_sync_sequence(struct
> rpc_clnt *clnt,
>                                    struct nfs4_sequence_args *args,
>                                    struct nfs4_sequence_res *res)
>  {
> -       return nfs4_do_call_sync(clnt, server, msg, args, res, 0);
> +       unsigned short task_flags = 0;
> +
> +       if (server->nfs_client->cl_minorversion)
> +               task_flags = RPC_TASK_MOVEABLE;
> +       return nfs4_do_call_sync(clnt, server, msg, args, res,
> task_flags);
>  }
>  
>  
> @@ -2569,6 +2573,9 @@ static int nfs4_run_open_task(struct
> nfs4_opendata *data,
>         };
>         int status;
>  
> +       if (server->nfs_client->cl_minorversion)
> +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
> +
>         kref_get(&data->kref);
>         data->rpc_done = false;
>         data->rpc_status = 0;
> @@ -3749,6 +3756,9 @@ int nfs4_do_close(struct nfs4_state *state,
> gfp_t gfp_mask, int wait)
>         };
>         int status = -ENOMEM;
>  
> +       if (server->nfs_client->cl_minorversion)
> +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
> +
>         nfs4_state_protect(server->nfs_client,
> NFS_SP4_MACH_CRED_CLEANUP,
>                 &task_setup_data.rpc_client, &msg);
>  
> @@ -4188,6 +4198,9 @@ static int _nfs4_proc_getattr(struct nfs_server
> *server, struct nfs_fh *fhandle,
>         };
>         unsigned short task_flags = 0;
>  
> +       if (server->nfs_client->cl_minorversion)
> +               task_flags = RPC_TASK_MOVEABLE;

Hmmm... This property isn't specific to a non-zero minor version. It is
actually specific to the existence of a session.

> +
>         /* Is this is an attribute revalidation, subject to
> softreval? */
>         if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
>                 task_flags |= RPC_TASK_TIMEOUT;
> @@ -4307,6 +4320,9 @@ static int _nfs4_proc_lookup(struct rpc_clnt
> *clnt, struct inode *dir,
>         };
>         unsigned short task_flags = 0;
>  
> +       if (server->nfs_client->cl_minorversion)
> +               task_flags = RPC_TASK_MOVEABLE;
> +
>         /* Is this is an attribute revalidation, subject to
> softreval? */
>         if (nfs_lookup_is_soft_revalidate(dentry))
>                 task_flags |= RPC_TASK_TIMEOUT;
> @@ -6538,7 +6554,7 @@ static int _nfs4_proc_delegreturn(struct inode
> *inode, const struct cred *cred,
>                 .rpc_client = server->client,
>                 .rpc_message = &msg,
>                 .callback_ops = &nfs4_delegreturn_ops,
> -               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> +               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> RPC_TASK_MOVEABLE,
>         };
>         int status = 0;
>  
> @@ -6856,6 +6872,11 @@ static struct rpc_task *nfs4_do_unlck(struct
> file_lock *fl,
>                 .workqueue = nfsiod_workqueue,
>                 .flags = RPC_TASK_ASYNC,
>         };
> +       struct nfs_client *client =
> +               NFS_SERVER(lsp->ls_state->inode)->nfs_client;
> +
> +       if (client->cl_minorversion)
> +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
>  
>         nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)-
> >nfs_client,
>                 NFS_SP4_MACH_CRED_CLEANUP,
> &task_setup_data.rpc_client, &msg);
> @@ -7130,6 +7151,10 @@ static int _nfs4_do_setlk(struct nfs4_state
> *state, int cmd, struct file_lock *f
>                 .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
>         };
>         int ret;
> +       struct nfs_client *client = NFS_SERVER(state->inode)-
> >nfs_client;
> +
> +       if (client->cl_minorversion)
> +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
>  
>         dprintk("%s: begin!\n", __func__);
>         data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl-
> >fl_file),
> @@ -9186,7 +9211,7 @@ static struct rpc_task
> *_nfs41_proc_sequence(struct nfs_client *clp,
>                 .rpc_client = clp->cl_rpcclient,
>                 .rpc_message = &msg,
>                 .callback_ops = &nfs41_sequence_ops,
> -               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> +               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> RPC_TASK_MOVEABLE,
>         };
>         struct rpc_task *ret;
>  
> @@ -9509,7 +9534,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
> long *timeout)
>                 .rpc_message = &msg,
>                 .callback_ops = &nfs4_layoutget_call_ops,
>                 .callback_data = lgp,
> -               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
> +               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF |
> +                        RPC_TASK_MOVEABLE,
>         };
>         struct pnfs_layout_segment *lseg = NULL;
>         struct nfs4_exception exception = {
> @@ -9650,6 +9676,7 @@ int nfs4_proc_layoutreturn(struct
> nfs4_layoutreturn *lrp, bool sync)
>                 .rpc_message = &msg,
>                 .callback_ops = &nfs4_layoutreturn_call_ops,
>                 .callback_data = lrp,
> +               .flags = RPC_TASK_MOVEABLE,
>         };
>         int status = 0;
>  
> @@ -9804,6 +9831,7 @@ nfs4_proc_layoutcommit(struct
> nfs4_layoutcommit_data *data, bool sync)
>                 .rpc_message = &msg,
>                 .callback_ops = &nfs4_layoutcommit_ops,
>                 .callback_data = data,
> +               .flags = RPC_TASK_MOVEABLE,
>         };
>         struct rpc_task *task;
>         int status = 0;
> @@ -10131,7 +10159,7 @@ static int nfs41_free_stateid(struct
> nfs_server *server,
>                 .rpc_client = server->client,
>                 .rpc_message = &msg,
>                 .callback_ops = &nfs41_free_stateid_ops,
> -               .flags = RPC_TASK_ASYNC,
> +               .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
>         };
>         struct nfs_free_stateid_data *data;
>         struct rpc_task *task;
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index cf9cc62ec48e..cc232d1f16f2 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -954,6 +954,7 @@ static int nfs_generic_pg_pgios(struct
> nfs_pageio_descriptor *desc)
>  {
>         struct nfs_pgio_header *hdr;
>         int ret;
> +       unsigned short task_flags = 0;
>  
>         hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
>         if (!hdr) {
> @@ -962,14 +963,17 @@ static int nfs_generic_pg_pgios(struct
> nfs_pageio_descriptor *desc)
>         }
>         nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
>         ret = nfs_generic_pgio(desc, hdr);
> -       if (ret == 0)
> +       if (ret == 0) {
> +               if (NFS_SERVER(hdr->inode)->nfs_client-
> >cl_minorversion)
> +                       task_flags = RPC_TASK_MOVEABLE;
>                 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
>                                         hdr,
>                                         hdr->cred,
>                                         NFS_PROTO(hdr->inode),
>                                         desc->pg_rpc_callops,
>                                         desc->pg_ioflags,
> -                                       RPC_TASK_CRED_NOREF);
> +                                       RPC_TASK_CRED_NOREF |
> task_flags);
> +       }
>         return ret;
>  }
>  
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 3bf82178166a..eae9bf114041 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -1810,6 +1810,7 @@ nfs_commit_list(struct inode *inode, struct
> list_head *head, int how,
>                 struct nfs_commit_info *cinfo)
>  {
>         struct nfs_commit_data  *data;
> +       unsigned short task_flags = 0;
>  
>         /* another commit raced with us */
>         if (list_empty(head))
> @@ -1820,8 +1821,11 @@ nfs_commit_list(struct inode *inode, struct
> list_head *head, int how,
>         /* Set up the argument struct */
>         nfs_init_commit(data, head, NULL, cinfo);
>         atomic_inc(&cinfo->mds->rpcs_out);
> +       if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
> +               task_flags = RPC_TASK_MOVEABLE;
>         return nfs_initiate_commit(NFS_CLIENT(inode), data,
> NFS_PROTO(inode),
> -                                  data->mds_ops, how,
> RPC_TASK_CRED_NOREF);
> +                                  data->mds_ops, how,
> +                                  RPC_TASK_CRED_NOREF | task_flags);
>  }
>  
>  /*
> diff --git a/include/linux/sunrpc/sched.h
> b/include/linux/sunrpc/sched.h
> index df696efdd675..a237b8dbf608 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -121,6 +121,7 @@ struct rpc_task_setup {
>   */
>  #define RPC_TASK_ASYNC         0x0001          /* is an async task
> */
>  #define RPC_TASK_SWAPPER       0x0002          /* is swapping in/out
> */
> +#define RPC_TASK_MOVEABLE      0x0004          /* nfs4.1+ rpc tasks
> */
>  #define RPC_TASK_NULLCREDS     0x0010          /* Use AUTH_NULL
> credential */
>  #define RPC_CALL_MAJORSEEN     0x0020          /* major timeout seen
> */
>  #define RPC_TASK_ROOTCREDS     0x0040          /* force root creds
> */
> @@ -139,6 +140,7 @@ struct rpc_task_setup {
>  #define RPC_IS_SOFT(t)         ((t)->tk_flags &
> (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
>  #define RPC_IS_SOFTCONN(t)     ((t)->tk_flags & RPC_TASK_SOFTCONN)
>  #define RPC_WAS_SENT(t)                ((t)->tk_flags &
> RPC_TASK_SENT)
> +#define RPC_IS_MOVEABLE(t)     ((t)->tk_flags & RPC_TASK_MOVEABLE)
>  
>  #define RPC_TASK_RUNNING       0
>  #define RPC_TASK_QUEUED                1
> --
> 2.27.0
>

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2021-06-15 15:07:52

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSv4.1 identify and mark RPC tasks that can move between transports

On Sun, Jun 13, 2021 at 12:18 PM Trond Myklebust
<[email protected]> wrote:
>
> On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote:
> > From: Olga Kornievskaia <[email protected]>
> >
> > In preparation for when we can re-try a task on a different
> > transport,
> > identify and mark such RPC tasks as moveable. Only 4.1+ operarations
> > can
> > be re-tried on a different transport.
> >
> > Signed-off-by: Olga Kornievskaia <[email protected]>
> > ---
> > fs/nfs/nfs4proc.c | 38 +++++++++++++++++++++++++++++++---
> > --
> > fs/nfs/pagelist.c | 8 ++++++--
> > fs/nfs/write.c | 6 +++++-
> > include/linux/sunrpc/sched.h | 2 ++
> > 4 files changed, 46 insertions(+), 8 deletions(-)
> >
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index e25c16257545..40e8d36cbfa8 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -1155,7 +1155,11 @@ static int nfs4_call_sync_sequence(struct
> > rpc_clnt *clnt,
> > struct nfs4_sequence_args *args,
> > struct nfs4_sequence_res *res)
> > {
> > - return nfs4_do_call_sync(clnt, server, msg, args, res, 0);
> > + unsigned short task_flags = 0;
> > +
> > + if (server->nfs_client->cl_minorversion)
> > + task_flags = RPC_TASK_MOVEABLE;
> > + return nfs4_do_call_sync(clnt, server, msg, args, res,
> > task_flags);
> > }
> >
> >
> > @@ -2569,6 +2573,9 @@ static int nfs4_run_open_task(struct
> > nfs4_opendata *data,
> > };
> > int status;
> >
> > + if (server->nfs_client->cl_minorversion)
> > + task_setup_data.flags |= RPC_TASK_MOVEABLE;
> > +
> > kref_get(&data->kref);
> > data->rpc_done = false;
> > data->rpc_status = 0;
> > @@ -3749,6 +3756,9 @@ int nfs4_do_close(struct nfs4_state *state,
> > gfp_t gfp_mask, int wait)
> > };
> > int status = -ENOMEM;
> >
> > + if (server->nfs_client->cl_minorversion)
> > + task_setup_data.flags |= RPC_TASK_MOVEABLE;
> > +
> > nfs4_state_protect(server->nfs_client,
> > NFS_SP4_MACH_CRED_CLEANUP,
> > &task_setup_data.rpc_client, &msg);
> >
> > @@ -4188,6 +4198,9 @@ static int _nfs4_proc_getattr(struct nfs_server
> > *server, struct nfs_fh *fhandle,
> > };
> > unsigned short task_flags = 0;
> >
> > + if (server->nfs_client->cl_minorversion)
> > + task_flags = RPC_TASK_MOVEABLE;
>
> Hmmm... This property isn't specific to a non-zero minor version. It is
> actually specific to the existence of a session.

Isn't this the same? Would we ever have a 4.1+ client without a session?

> > +
> > /* Is this is an attribute revalidation, subject to
> > softreval? */
> > if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
> > task_flags |= RPC_TASK_TIMEOUT;
> > @@ -4307,6 +4320,9 @@ static int _nfs4_proc_lookup(struct rpc_clnt
> > *clnt, struct inode *dir,
> > };
> > unsigned short task_flags = 0;
> >
> > + if (server->nfs_client->cl_minorversion)
> > + task_flags = RPC_TASK_MOVEABLE;
> > +
> > /* Is this is an attribute revalidation, subject to
> > softreval? */
> > if (nfs_lookup_is_soft_revalidate(dentry))
> > task_flags |= RPC_TASK_TIMEOUT;
> > @@ -6538,7 +6554,7 @@ static int _nfs4_proc_delegreturn(struct inode
> > *inode, const struct cred *cred,
> > .rpc_client = server->client,
> > .rpc_message = &msg,
> > .callback_ops = &nfs4_delegreturn_ops,
> > - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > RPC_TASK_MOVEABLE,
> > };
> > int status = 0;
> >
> > @@ -6856,6 +6872,11 @@ static struct rpc_task *nfs4_do_unlck(struct
> > file_lock *fl,
> > .workqueue = nfsiod_workqueue,
> > .flags = RPC_TASK_ASYNC,
> > };
> > + struct nfs_client *client =
> > + NFS_SERVER(lsp->ls_state->inode)->nfs_client;
> > +
> > + if (client->cl_minorversion)
> > + task_setup_data.flags |= RPC_TASK_MOVEABLE;
> >
> > nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)-
> > >nfs_client,
> > NFS_SP4_MACH_CRED_CLEANUP,
> > &task_setup_data.rpc_client, &msg);
> > @@ -7130,6 +7151,10 @@ static int _nfs4_do_setlk(struct nfs4_state
> > *state, int cmd, struct file_lock *f
> > .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
> > };
> > int ret;
> > + struct nfs_client *client = NFS_SERVER(state->inode)-
> > >nfs_client;
> > +
> > + if (client->cl_minorversion)
> > + task_setup_data.flags |= RPC_TASK_MOVEABLE;
> >
> > dprintk("%s: begin!\n", __func__);
> > data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl-
> > >fl_file),
> > @@ -9186,7 +9211,7 @@ static struct rpc_task
> > *_nfs41_proc_sequence(struct nfs_client *clp,
> > .rpc_client = clp->cl_rpcclient,
> > .rpc_message = &msg,
> > .callback_ops = &nfs41_sequence_ops,
> > - .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > + .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > RPC_TASK_MOVEABLE,
> > };
> > struct rpc_task *ret;
> >
> > @@ -9509,7 +9534,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
> > long *timeout)
> > .rpc_message = &msg,
> > .callback_ops = &nfs4_layoutget_call_ops,
> > .callback_data = lgp,
> > - .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
> > + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF |
> > + RPC_TASK_MOVEABLE,
> > };
> > struct pnfs_layout_segment *lseg = NULL;
> > struct nfs4_exception exception = {
> > @@ -9650,6 +9676,7 @@ int nfs4_proc_layoutreturn(struct
> > nfs4_layoutreturn *lrp, bool sync)
> > .rpc_message = &msg,
> > .callback_ops = &nfs4_layoutreturn_call_ops,
> > .callback_data = lrp,
> > + .flags = RPC_TASK_MOVEABLE,
> > };
> > int status = 0;
> >
> > @@ -9804,6 +9831,7 @@ nfs4_proc_layoutcommit(struct
> > nfs4_layoutcommit_data *data, bool sync)
> > .rpc_message = &msg,
> > .callback_ops = &nfs4_layoutcommit_ops,
> > .callback_data = data,
> > + .flags = RPC_TASK_MOVEABLE,
> > };
> > struct rpc_task *task;
> > int status = 0;
> > @@ -10131,7 +10159,7 @@ static int nfs41_free_stateid(struct
> > nfs_server *server,
> > .rpc_client = server->client,
> > .rpc_message = &msg,
> > .callback_ops = &nfs41_free_stateid_ops,
> > - .flags = RPC_TASK_ASYNC,
> > + .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
> > };
> > struct nfs_free_stateid_data *data;
> > struct rpc_task *task;
> > diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> > index cf9cc62ec48e..cc232d1f16f2 100644
> > --- a/fs/nfs/pagelist.c
> > +++ b/fs/nfs/pagelist.c
> > @@ -954,6 +954,7 @@ static int nfs_generic_pg_pgios(struct
> > nfs_pageio_descriptor *desc)
> > {
> > struct nfs_pgio_header *hdr;
> > int ret;
> > + unsigned short task_flags = 0;
> >
> > hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
> > if (!hdr) {
> > @@ -962,14 +963,17 @@ static int nfs_generic_pg_pgios(struct
> > nfs_pageio_descriptor *desc)
> > }
> > nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
> > ret = nfs_generic_pgio(desc, hdr);
> > - if (ret == 0)
> > + if (ret == 0) {
> > + if (NFS_SERVER(hdr->inode)->nfs_client-
> > >cl_minorversion)
> > + task_flags = RPC_TASK_MOVEABLE;
> > ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
> > hdr,
> > hdr->cred,
> > NFS_PROTO(hdr->inode),
> > desc->pg_rpc_callops,
> > desc->pg_ioflags,
> > - RPC_TASK_CRED_NOREF);
> > + RPC_TASK_CRED_NOREF |
> > task_flags);
> > + }
> > return ret;
> > }
> >
> > diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> > index 3bf82178166a..eae9bf114041 100644
> > --- a/fs/nfs/write.c
> > +++ b/fs/nfs/write.c
> > @@ -1810,6 +1810,7 @@ nfs_commit_list(struct inode *inode, struct
> > list_head *head, int how,
> > struct nfs_commit_info *cinfo)
> > {
> > struct nfs_commit_data *data;
> > + unsigned short task_flags = 0;
> >
> > /* another commit raced with us */
> > if (list_empty(head))
> > @@ -1820,8 +1821,11 @@ nfs_commit_list(struct inode *inode, struct
> > list_head *head, int how,
> > /* Set up the argument struct */
> > nfs_init_commit(data, head, NULL, cinfo);
> > atomic_inc(&cinfo->mds->rpcs_out);
> > + if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
> > + task_flags = RPC_TASK_MOVEABLE;
> > return nfs_initiate_commit(NFS_CLIENT(inode), data,
> > NFS_PROTO(inode),
> > - data->mds_ops, how,
> > RPC_TASK_CRED_NOREF);
> > + data->mds_ops, how,
> > + RPC_TASK_CRED_NOREF | task_flags);
> > }
> >
> > /*
> > diff --git a/include/linux/sunrpc/sched.h
> > b/include/linux/sunrpc/sched.h
> > index df696efdd675..a237b8dbf608 100644
> > --- a/include/linux/sunrpc/sched.h
> > +++ b/include/linux/sunrpc/sched.h
> > @@ -121,6 +121,7 @@ struct rpc_task_setup {
> > */
> > #define RPC_TASK_ASYNC 0x0001 /* is an async task
> > */
> > #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out
> > */
> > +#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks
> > */
> > #define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL
> > credential */
> > #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen
> > */
> > #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds
> > */
> > @@ -139,6 +140,7 @@ struct rpc_task_setup {
> > #define RPC_IS_SOFT(t) ((t)->tk_flags &
> > (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
> > #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN)
> > #define RPC_WAS_SENT(t) ((t)->tk_flags &
> > RPC_TASK_SENT)
> > +#define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE)
> >
> > #define RPC_TASK_RUNNING 0
> > #define RPC_TASK_QUEUED 1
> > --
> > 2.27.0
> >
>
> --
> Trond Myklebust
> Linux NFS client maintainer, Hammerspace
> [email protected]
>
>

2021-06-15 16:01:41

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSv4.1 identify and mark RPC tasks that can move between transports

On Tue, 2021-06-15 at 11:07 -0400, Olga Kornievskaia wrote:
> On Sun, Jun 13, 2021 at 12:18 PM Trond Myklebust
> <[email protected]> wrote:
> >
> > On Thu, 2021-06-03 at 18:59 -0400, Olga Kornievskaia wrote:
> > > From: Olga Kornievskaia <[email protected]>
> > >
> > > In preparation for when we can re-try a task on a different
> > > transport,
> > > identify and mark such RPC tasks as moveable. Only 4.1+
> > > operarations
> > > can
> > > be re-tried on a different transport.
> > >
> > > Signed-off-by: Olga Kornievskaia <[email protected]>
> > > ---
> > >  fs/nfs/nfs4proc.c            | 38
> > > +++++++++++++++++++++++++++++++---
> > > --
> > >  fs/nfs/pagelist.c            |  8 ++++++--
> > >  fs/nfs/write.c               |  6 +++++-
> > >  include/linux/sunrpc/sched.h |  2 ++
> > >  4 files changed, 46 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > > index e25c16257545..40e8d36cbfa8 100644
> > > --- a/fs/nfs/nfs4proc.c
> > > +++ b/fs/nfs/nfs4proc.c
> > > @@ -1155,7 +1155,11 @@ static int nfs4_call_sync_sequence(struct
> > > rpc_clnt *clnt,
> > >                                    struct nfs4_sequence_args
> > > *args,
> > >                                    struct nfs4_sequence_res *res)
> > >  {
> > > -       return nfs4_do_call_sync(clnt, server, msg, args, res,
> > > 0);
> > > +       unsigned short task_flags = 0;
> > > +
> > > +       if (server->nfs_client->cl_minorversion)
> > > +               task_flags = RPC_TASK_MOVEABLE;
> > > +       return nfs4_do_call_sync(clnt, server, msg, args, res,
> > > task_flags);
> > >  }
> > >
> > >
> > > @@ -2569,6 +2573,9 @@ static int nfs4_run_open_task(struct
> > > nfs4_opendata *data,
> > >         };
> > >         int status;
> > >
> > > +       if (server->nfs_client->cl_minorversion)
> > > +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
> > > +
> > >         kref_get(&data->kref);
> > >         data->rpc_done = false;
> > >         data->rpc_status = 0;
> > > @@ -3749,6 +3756,9 @@ int nfs4_do_close(struct nfs4_state *state,
> > > gfp_t gfp_mask, int wait)
> > >         };
> > >         int status = -ENOMEM;
> > >
> > > +       if (server->nfs_client->cl_minorversion)
> > > +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
> > > +
> > >         nfs4_state_protect(server->nfs_client,
> > > NFS_SP4_MACH_CRED_CLEANUP,
> > >                 &task_setup_data.rpc_client, &msg);
> > >
> > > @@ -4188,6 +4198,9 @@ static int _nfs4_proc_getattr(struct
> > > nfs_server
> > > *server, struct nfs_fh *fhandle,
> > >         };
> > >         unsigned short task_flags = 0;
> > >
> > > +       if (server->nfs_client->cl_minorversion)
> > > +               task_flags = RPC_TASK_MOVEABLE;
> >
> > Hmmm... This property isn't specific to a non-zero minor version.
> > It is
> > actually specific to the existence of a session.
>
> Isn't this the same? Would we ever have a 4.1+ client without a
> session?

Possibly not, but the use of the session as a test documents more
clearly why we can do this. Use of cl_minorversion requires people to
scurry off to find the right RFC and then read the entire text if they
want to find out why.

>
> > > +
> > >         /* Is this is an attribute revalidation, subject to
> > > softreval? */
> > >         if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
> > >                 task_flags |= RPC_TASK_TIMEOUT;
> > > @@ -4307,6 +4320,9 @@ static int _nfs4_proc_lookup(struct
> > > rpc_clnt
> > > *clnt, struct inode *dir,
> > >         };
> > >         unsigned short task_flags = 0;
> > >
> > > +       if (server->nfs_client->cl_minorversion)
> > > +               task_flags = RPC_TASK_MOVEABLE;
> > > +
> > >         /* Is this is an attribute revalidation, subject to
> > > softreval? */
> > >         if (nfs_lookup_is_soft_revalidate(dentry))
> > >                 task_flags |= RPC_TASK_TIMEOUT;
> > > @@ -6538,7 +6554,7 @@ static int _nfs4_proc_delegreturn(struct
> > > inode
> > > *inode, const struct cred *cred,
> > >                 .rpc_client = server->client,
> > >                 .rpc_message = &msg,
> > >                 .callback_ops = &nfs4_delegreturn_ops,
> > > -               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > +               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > > RPC_TASK_MOVEABLE,
> > >         };
> > >         int status = 0;
> > >
> > > @@ -6856,6 +6872,11 @@ static struct rpc_task
> > > *nfs4_do_unlck(struct
> > > file_lock *fl,
> > >                 .workqueue = nfsiod_workqueue,
> > >                 .flags = RPC_TASK_ASYNC,
> > >         };
> > > +       struct nfs_client *client =
> > > +               NFS_SERVER(lsp->ls_state->inode)->nfs_client;
> > > +
> > > +       if (client->cl_minorversion)
> > > +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
> > >
> > >         nfs4_state_protect(NFS_SERVER(lsp->ls_state->inode)-
> > > > nfs_client,
> > >                 NFS_SP4_MACH_CRED_CLEANUP,
> > > &task_setup_data.rpc_client, &msg);
> > > @@ -7130,6 +7151,10 @@ static int _nfs4_do_setlk(struct
> > > nfs4_state
> > > *state, int cmd, struct file_lock *f
> > >                 .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
> > >         };
> > >         int ret;
> > > +       struct nfs_client *client = NFS_SERVER(state->inode)-
> > > > nfs_client;
> > > +
> > > +       if (client->cl_minorversion)
> > > +               task_setup_data.flags |= RPC_TASK_MOVEABLE;
> > >
> > >         dprintk("%s: begin!\n", __func__);
> > >         data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl-
> > > > fl_file),
> > > @@ -9186,7 +9211,7 @@ static struct rpc_task
> > > *_nfs41_proc_sequence(struct nfs_client *clp,
> > >                 .rpc_client = clp->cl_rpcclient,
> > >                 .rpc_message = &msg,
> > >                 .callback_ops = &nfs41_sequence_ops,
> > > -               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT,
> > > +               .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT |
> > > RPC_TASK_MOVEABLE,
> > >         };
> > >         struct rpc_task *ret;
> > >
> > > @@ -9509,7 +9534,8 @@ nfs4_proc_layoutget(struct nfs4_layoutget
> > > *lgp,
> > > long *timeout)
> > >                 .rpc_message = &msg,
> > >                 .callback_ops = &nfs4_layoutget_call_ops,
> > >                 .callback_data = lgp,
> > > -               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF,
> > > +               .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF |
> > > +                        RPC_TASK_MOVEABLE,
> > >         };
> > >         struct pnfs_layout_segment *lseg = NULL;
> > >         struct nfs4_exception exception = {
> > > @@ -9650,6 +9676,7 @@ int nfs4_proc_layoutreturn(struct
> > > nfs4_layoutreturn *lrp, bool sync)
> > >                 .rpc_message = &msg,
> > >                 .callback_ops = &nfs4_layoutreturn_call_ops,
> > >                 .callback_data = lrp,
> > > +               .flags = RPC_TASK_MOVEABLE,
> > >         };
> > >         int status = 0;
> > >
> > > @@ -9804,6 +9831,7 @@ nfs4_proc_layoutcommit(struct
> > > nfs4_layoutcommit_data *data, bool sync)
> > >                 .rpc_message = &msg,
> > >                 .callback_ops = &nfs4_layoutcommit_ops,
> > >                 .callback_data = data,
> > > +               .flags = RPC_TASK_MOVEABLE,
> > >         };
> > >         struct rpc_task *task;
> > >         int status = 0;
> > > @@ -10131,7 +10159,7 @@ static int nfs41_free_stateid(struct
> > > nfs_server *server,
> > >                 .rpc_client = server->client,
> > >                 .rpc_message = &msg,
> > >                 .callback_ops = &nfs41_free_stateid_ops,
> > > -               .flags = RPC_TASK_ASYNC,
> > > +               .flags = RPC_TASK_ASYNC | RPC_TASK_MOVEABLE,
> > >         };
> > >         struct nfs_free_stateid_data *data;
> > >         struct rpc_task *task;
> > > diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> > > index cf9cc62ec48e..cc232d1f16f2 100644
> > > --- a/fs/nfs/pagelist.c
> > > +++ b/fs/nfs/pagelist.c
> > > @@ -954,6 +954,7 @@ static int nfs_generic_pg_pgios(struct
> > > nfs_pageio_descriptor *desc)
> > >  {
> > >         struct nfs_pgio_header *hdr;
> > >         int ret;
> > > +       unsigned short task_flags = 0;
> > >
> > >         hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
> > >         if (!hdr) {
> > > @@ -962,14 +963,17 @@ static int nfs_generic_pg_pgios(struct
> > > nfs_pageio_descriptor *desc)
> > >         }
> > >         nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
> > >         ret = nfs_generic_pgio(desc, hdr);
> > > -       if (ret == 0)
> > > +       if (ret == 0) {
> > > +               if (NFS_SERVER(hdr->inode)->nfs_client-
> > > > cl_minorversion)
> > > +                       task_flags = RPC_TASK_MOVEABLE;
> > >                 ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
> > >                                         hdr,
> > >                                         hdr->cred,
> > >                                         NFS_PROTO(hdr->inode),
> > >                                         desc->pg_rpc_callops,
> > >                                         desc->pg_ioflags,
> > > -                                       RPC_TASK_CRED_NOREF);
> > > +                                       RPC_TASK_CRED_NOREF |
> > > task_flags);
> > > +       }
> > >         return ret;
> > >  }
> > >
> > > diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> > > index 3bf82178166a..eae9bf114041 100644
> > > --- a/fs/nfs/write.c
> > > +++ b/fs/nfs/write.c
> > > @@ -1810,6 +1810,7 @@ nfs_commit_list(struct inode *inode, struct
> > > list_head *head, int how,
> > >                 struct nfs_commit_info *cinfo)
> > >  {
> > >         struct nfs_commit_data  *data;
> > > +       unsigned short task_flags = 0;
> > >
> > >         /* another commit raced with us */
> > >         if (list_empty(head))
> > > @@ -1820,8 +1821,11 @@ nfs_commit_list(struct inode *inode,
> > > struct
> > > list_head *head, int how,
> > >         /* Set up the argument struct */
> > >         nfs_init_commit(data, head, NULL, cinfo);
> > >         atomic_inc(&cinfo->mds->rpcs_out);
> > > +       if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
> > > +               task_flags = RPC_TASK_MOVEABLE;
> > >         return nfs_initiate_commit(NFS_CLIENT(inode), data,
> > > NFS_PROTO(inode),
> > > -                                  data->mds_ops, how,
> > > RPC_TASK_CRED_NOREF);
> > > +                                  data->mds_ops, how,
> > > +                                  RPC_TASK_CRED_NOREF |
> > > task_flags);
> > >  }
> > >
> > >  /*
> > > diff --git a/include/linux/sunrpc/sched.h
> > > b/include/linux/sunrpc/sched.h
> > > index df696efdd675..a237b8dbf608 100644
> > > --- a/include/linux/sunrpc/sched.h
> > > +++ b/include/linux/sunrpc/sched.h
> > > @@ -121,6 +121,7 @@ struct rpc_task_setup {
> > >   */
> > >  #define RPC_TASK_ASYNC         0x0001          /* is an async
> > > task
> > > */
> > >  #define RPC_TASK_SWAPPER       0x0002          /* is swapping
> > > in/out
> > > */
> > > +#define RPC_TASK_MOVEABLE      0x0004          /* nfs4.1+ rpc
> > > tasks
> > > */
> > >  #define RPC_TASK_NULLCREDS     0x0010          /* Use AUTH_NULL
> > > credential */
> > >  #define RPC_CALL_MAJORSEEN     0x0020          /* major timeout
> > > seen
> > > */
> > >  #define RPC_TASK_ROOTCREDS     0x0040          /* force root
> > > creds
> > > */
> > > @@ -139,6 +140,7 @@ struct rpc_task_setup {
> > >  #define RPC_IS_SOFT(t)         ((t)->tk_flags &
> > > (RPC_TASK_SOFT|RPC_TASK_TIMEOUT))
> > >  #define RPC_IS_SOFTCONN(t)     ((t)->tk_flags &
> > > RPC_TASK_SOFTCONN)
> > >  #define RPC_WAS_SENT(t)                ((t)->tk_flags &
> > > RPC_TASK_SENT)
> > > +#define RPC_IS_MOVEABLE(t)     ((t)->tk_flags &
> > > RPC_TASK_MOVEABLE)
> > >
> > >  #define RPC_TASK_RUNNING       0
> > >  #define RPC_TASK_QUEUED                1
> > > --
> > > 2.27.0
> > >
> >
> > --
> > Trond Myklebust
> > Linux NFS client maintainer, Hammerspace
> > [email protected]
> >
> >

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]