From: Trond Myklebust <[email protected]>
Now that https://datatracker.ietf.org/doc/draft-ietf-nfsv4-layrec/ is
mostly done with the review process, I'd like to move the final patches
for the client implementation upstream.
The following patch series therefore adds support to the flexfiles pNFS
driver so that if a metadata server reboot occurs while a client has
layouts outstanding, and is performing I/O, then the client will report
layoutstats and layout errors through a LAYOUTRETURN during the grace
period, after the metadata server comes back up.
This has implications for mirrored workloads, since it allows the client
to report exactly which mirror data instances may have been corrupted
due to the presence of errors during WRITEs or COMMITs.
Trond Myklebust (11):
NFSv4/pnfs: Remove redundant list check
NFSv4.1: constify the stateid argument in nfs41_test_stateid()
NFSv4: Clean up encode_nfs4_stateid()
pNFS: Add a flag argument to pnfs_destroy_layouts_byclid()
NFSv4/pnfs: Add support for the PNFS_LAYOUT_FILE_BULK_RETURN flag
NFSv4/pNFS: Add a helper to defer failed layoutreturn calls
NFSv4/pNFS: Handle server reboots in pnfs_poc_release()
NFSv4/pNFS: Retry the layout return later in case of a timeout or
reboot
NFSv4/pnfs: Give nfs4_proc_layoutreturn() a flags argument
NFSv4/pNFS: Remove redundant call to unhash the layout
NFSv4/pNFS: Do layout state recovery upon reboot
fs/nfs/callback_proc.c | 5 +-
fs/nfs/flexfilelayout/flexfilelayout.c | 2 +-
fs/nfs/nfs4_fs.h | 3 +-
fs/nfs/nfs4proc.c | 53 ++++--
fs/nfs/nfs4state.c | 4 +-
fs/nfs/nfs4xdr.c | 7 +-
fs/nfs/pnfs.c | 223 +++++++++++++++++++------
fs/nfs/pnfs.h | 30 +++-
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 2 +-
10 files changed, 249 insertions(+), 81 deletions(-)
--
2.45.2
From: Trond Myklebust <[email protected]>
pnfs_layout_free_bulk_destroy_list() already checks for whether the list
is empty or not.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b5834728f31b..bbbb692b2a47 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -923,8 +923,6 @@ pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- if (list_empty(&layout_list))
- return 0;
return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
}
@@ -947,8 +945,6 @@ pnfs_destroy_layouts_byclid(struct nfs_client *clp,
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
- if (list_empty(&layout_list))
- return 0;
return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
}
--
2.45.2
From: Trond Myklebust <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4_fs.h | 3 ++-
fs/nfs/nfs4proc.c | 24 ++++++++++++------------
fs/nfs/nfs4xdr.c | 2 +-
include/linux/nfs_xdr.h | 2 +-
4 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7024230f0d1d..c2045a2a9d0f 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -67,7 +67,8 @@ struct nfs4_minor_version_ops {
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
int (*test_and_free_expired)(struct nfs_server *,
- nfs4_stateid *, const struct cred *);
+ const nfs4_stateid *,
+ const struct cred *);
struct nfs_seqid *
(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
void (*session_trunk)(struct rpc_clnt *clnt,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a4f85af880c2..ae835d14ac75 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -103,10 +103,10 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
const struct cred *cred,
struct nfs4_slot *slot,
bool is_privileged);
-static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
- const struct cred *);
+static int nfs41_test_stateid(struct nfs_server *, const nfs4_stateid *,
+ const struct cred *);
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
- const struct cred *, bool);
+ const struct cred *, bool);
#endif
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
@@ -2867,16 +2867,16 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
}
static int nfs40_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
return -NFS4ERR_BAD_STATEID;
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_test_and_free_expired_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
int status;
@@ -10357,12 +10357,12 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
}
static int _nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
int status;
struct nfs41_test_stateid_args args = {
- .stateid = stateid,
+ .stateid = *stateid,
};
struct nfs41_test_stateid_res res;
struct rpc_message msg = {
@@ -10418,8 +10418,8 @@ static void nfs4_handle_delay_or_session_error(struct nfs_server *server,
* failed or the state ID is not currently valid.
*/
static int nfs41_test_stateid(struct nfs_server *server,
- nfs4_stateid *stateid,
- const struct cred *cred)
+ const nfs4_stateid *stateid,
+ const struct cred *cred)
{
struct nfs4_exception exception = {
.interruptible = true,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 98aab2c324c9..4bf7d5c09282 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2137,7 +2137,7 @@ static void encode_test_stateid(struct xdr_stream *xdr,
{
encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr);
encode_uint32(xdr, 1);
- encode_nfs4_stateid(xdr, args->stateid);
+ encode_nfs4_stateid(xdr, &args->stateid);
}
static void encode_free_stateid(struct xdr_stream *xdr,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 01efacae4634..45623af3e7b8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1438,7 +1438,7 @@ struct nfs41_secinfo_no_name_args {
struct nfs41_test_stateid_args {
struct nfs4_sequence_args seq_args;
- nfs4_stateid *stateid;
+ nfs4_stateid stateid;
};
struct nfs41_test_stateid_res {
--
2.45.2
On Thu, 2024-06-13 at 01:00 -0400, [email protected] wrote:
> From: Trond Myklebust <[email protected]>
>
> Now that https://datatracker.ietf.org/doc/draft-ietf-nfsv4-layrec/ is
> mostly done with the review process, I'd like to move the final patches
> for the client implementation upstream.
>
> The following patch series therefore adds support to the flexfiles pNFS
> driver so that if a metadata server reboot occurs while a client has
> layouts outstanding, and is performing I/O, then the client will report
> layoutstats and layout errors through a LAYOUTRETURN during the grace
> period, after the metadata server comes back up.
> This has implications for mirrored workloads, since it allows the client
> to report exactly which mirror data instances may have been corrupted
> due to the presence of errors during WRITEs or COMMITs.
>
> Trond Myklebust (11):
> NFSv4/pnfs: Remove redundant list check
> NFSv4.1: constify the stateid argument in nfs41_test_stateid()
> NFSv4: Clean up encode_nfs4_stateid()
> pNFS: Add a flag argument to pnfs_destroy_layouts_byclid()
> NFSv4/pnfs: Add support for the PNFS_LAYOUT_FILE_BULK_RETURN flag
> NFSv4/pNFS: Add a helper to defer failed layoutreturn calls
> NFSv4/pNFS: Handle server reboots in pnfs_poc_release()
> NFSv4/pNFS: Retry the layout return later in case of a timeout or
> reboot
> NFSv4/pnfs: Give nfs4_proc_layoutreturn() a flags argument
> NFSv4/pNFS: Remove redundant call to unhash the layout
> NFSv4/pNFS: Do layout state recovery upon reboot
>
> fs/nfs/callback_proc.c | 5 +-
> fs/nfs/flexfilelayout/flexfilelayout.c | 2 +-
> fs/nfs/nfs4_fs.h | 3 +-
> fs/nfs/nfs4proc.c | 53 ++++--
> fs/nfs/nfs4state.c | 4 +-
> fs/nfs/nfs4xdr.c | 7 +-
> fs/nfs/pnfs.c | 223 +++++++++++++++++++------
> fs/nfs/pnfs.h | 30 +++-
> include/linux/nfs_fs_sb.h | 1 +
> include/linux/nfs_xdr.h | 2 +-
> 10 files changed, 249 insertions(+), 81 deletions(-)
>
These have been used for a while inside of Meta vs. Hammerspace's
servers and have been behaving.
Reviewed-by: Jeff Layton <[email protected]>