2024-06-13 05:07:16

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 04/11] pNFS: Add a flag argument to pnfs_destroy_layouts_byclid()

From: Trond Myklebust <[email protected]>

Change the bool argument to a flag so that we can add different modes
for doing bulk destroy of a layout. In particular, we will want the
ability to schedule return of all the layouts associated with a given
NFS server when it reboots.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 5 +++--
fs/nfs/pnfs.c | 21 +++++++++------------
fs/nfs/pnfs.h | 14 +++++++++-----
3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 199c52788640..7832fb0369a1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -323,9 +323,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
int stat;

if (args->cbl_recall_type == RETURN_FSID)
- stat = pnfs_destroy_layouts_byfsid(clp, &args->cbl_fsid, true);
+ stat = pnfs_layout_destroy_byfsid(clp, &args->cbl_fsid,
+ PNFS_LAYOUT_BULK_RETURN);
else
- stat = pnfs_destroy_layouts_byclid(clp, true);
+ stat = pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_BULK_RETURN);
if (stat != 0)
return NFS4ERR_DELAY;
return NFS4ERR_NOMATCHING_LAYOUT;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bbbb692b2a47..0e188bc303ee 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -868,7 +868,7 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp,

static int
pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
- bool is_bulk_recall)
+ enum pnfs_layout_destroy_mode mode)
{
struct pnfs_layout_hdr *lo;
struct inode *inode;
@@ -887,7 +887,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
- if (is_bulk_recall)
+ if (mode == PNFS_LAYOUT_BULK_RETURN)
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
}
@@ -901,10 +901,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
return ret;
}

-int
-pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall)
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
@@ -923,12 +921,11 @@ pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
rcu_read_unlock();
spin_unlock(&clp->cl_lock);

- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+ return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
}

-int
-pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall)
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode)
{
struct nfs_server *server;
LIST_HEAD(layout_list);
@@ -945,7 +942,7 @@ pnfs_destroy_layouts_byclid(struct nfs_client *clp,
rcu_read_unlock();
spin_unlock(&clp->cl_lock);

- return pnfs_layout_free_bulk_destroy_list(&layout_list, is_recall);
+ return pnfs_layout_free_bulk_destroy_list(&layout_list, mode);
}

/*
@@ -958,7 +955,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
nfs4_deviceid_mark_client_invalid(clp);
nfs4_deviceid_purge_client(clp);

- pnfs_destroy_layouts_byclid(clp, false);
+ pnfs_layout_destroy_byclid(clp, PNFS_LAYOUT_INVALIDATE);
}

static void
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index fa5beeaaf5da..a6f9427782c2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -118,6 +118,11 @@ enum layoutdriver_policy_flags {
PNFS_LAYOUTGET_ON_OPEN = 1 << 3,
};

+enum pnfs_layout_destroy_mode {
+ PNFS_LAYOUT_INVALIDATE = 0,
+ PNFS_LAYOUT_BULK_RETURN,
+};
+
struct nfs4_deviceid_node;

/* Per-layout driver specific registration structure */
@@ -273,11 +278,10 @@ void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_layout_final(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
-int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
- struct nfs_fsid *fsid,
- bool is_recall);
-int pnfs_destroy_layouts_byclid(struct nfs_client *clp,
- bool is_recall);
+int pnfs_layout_destroy_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid,
+ enum pnfs_layout_destroy_mode mode);
+int pnfs_layout_destroy_byclid(struct nfs_client *clp,
+ enum pnfs_layout_destroy_mode mode);
bool nfs4_layout_refresh_old_stateid(nfs4_stateid *dst,
struct pnfs_layout_range *dst_range,
struct inode *inode);
--
2.45.2



2024-06-13 05:07:18

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 05/11] NFSv4/pnfs: Add support for the PNFS_LAYOUT_FILE_BULK_RETURN flag

From: Trond Myklebust <[email protected]>

Add a flag PNFS_LAYOUT_FILE_BULK_RETURN, that will attempt to return all
the layouts in a pnfs_layout_destroy_byfsid/pnfs_layout_destroy_byclid
call, instead of just invalidating them.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 35 +++++++++++++++++++----------------
fs/nfs/pnfs.h | 1 +
2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0e188bc303ee..3bfc74841831 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -476,6 +476,18 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
return !list_empty(&lo->plh_segs);
}

+static int pnfs_mark_layout_stateid_return(struct pnfs_layout_hdr *lo,
+ struct list_head *lseg_list,
+ enum pnfs_iomode iomode, u32 seq)
+{
+ struct pnfs_layout_range range = {
+ .iomode = iomode,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ return pnfs_mark_matching_lsegs_return(lo, lseg_list, &range, seq);
+}
+
static int
pnfs_iomode_to_fail_bit(u32 iomode)
{
@@ -886,7 +898,10 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,

spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
- if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+ if (mode == PNFS_LAYOUT_FILE_BULK_RETURN) {
+ pnfs_mark_layout_stateid_return(lo, &lseg_list,
+ IOMODE_ANY, 0);
+ } else if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
if (mode == PNFS_LAYOUT_BULK_RETURN)
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
@@ -1265,27 +1280,15 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo,
return status;
}

-static bool
-pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo,
- enum pnfs_iomode iomode,
- u32 seq)
-{
- struct pnfs_layout_range recall_range = {
- .length = NFS4_MAX_UINT64,
- .iomode = iomode,
- };
- return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- &recall_range, seq) != -EBUSY;
-}
-
/* Return true if layoutreturn is needed */
static bool
pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
{
if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
return false;
- return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode,
- lo->plh_return_seq);
+ return pnfs_mark_layout_stateid_return(lo, &lo->plh_return_segs,
+ lo->plh_return_iomode,
+ lo->plh_return_seq) != EBUSY;
}

static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a6f9427782c2..8fa0f152ed19 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -121,6 +121,7 @@ enum layoutdriver_policy_flags {
enum pnfs_layout_destroy_mode {
PNFS_LAYOUT_INVALIDATE = 0,
PNFS_LAYOUT_BULK_RETURN,
+ PNFS_LAYOUT_FILE_BULK_RETURN,
};

struct nfs4_deviceid_node;
--
2.45.2


2024-06-13 05:17:17

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 06/11] NFSv4/pNFS: Add a helper to defer failed layoutreturn calls

From: Trond Myklebust <[email protected]>

If the layoutreturn-on-close fails due to an RPC layer problem, such as
a timeout, then we want to retry at a later time. Add a helper function
to allow this.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 25 ++++++++++++++++++++++---
1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3bfc74841831..a79ae47b3842 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1171,6 +1171,26 @@ static void pnfs_clear_layoutcommit(struct inode *inode,
}
}

+static void
+pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range)
+{
+ const struct pnfs_layout_segment *lseg;
+ u32 seq = be32_to_cpu(arg_stateid->seqid);
+
+ if (pnfs_layout_is_valid(lo) &&
+ nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) {
+ list_for_each_entry(lseg, &lo->plh_return_segs, pls_list) {
+ if (pnfs_seqid_is_newer(lseg->pls_seq, seq) ||
+ !pnfs_should_free_range(&lseg->pls_range, range))
+ continue;
+ pnfs_set_plh_return_info(lo, range->iomode, seq);
+ break;
+ }
+ }
+}
+
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
@@ -1577,9 +1597,8 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
switch (ret) {
case -NFS4ERR_NOMATCHING_LAYOUT:
spin_lock(&inode->i_lock);
- if (pnfs_layout_is_valid(lo) &&
- nfs4_stateid_match_other(&args->stateid, &lo->plh_stateid))
- pnfs_set_plh_return_info(lo, args->range.iomode, 0);
+ pnfs_layoutreturn_retry_later_locked(lo, &args->stateid,
+ &args->range);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&inode->i_lock);
break;
--
2.45.2


2024-06-13 05:17:21

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 07/11] NFSv4/pNFS: Handle server reboots in pnfs_poc_release()

From: Trond Myklebust <[email protected]>

If the server reboots, then handle it by deferring the layout return.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a79ae47b3842..c8b1be1810e2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1586,8 +1586,7 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
}

void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
- struct nfs4_layoutreturn_res *res,
- int ret)
+ struct nfs4_layoutreturn_res *res, int ret)
{
struct pnfs_layout_hdr *lo = args->layout;
struct inode *inode = args->inode;
@@ -1595,6 +1594,9 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args,
struct nfs4_xdr_opaque_data *ld_private = args->ld_private;

switch (ret) {
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_NOMATCHING_LAYOUT:
spin_lock(&inode->i_lock);
pnfs_layoutreturn_retry_later_locked(lo, &args->stateid,
--
2.45.2


2024-06-13 05:17:30

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 08/11] NFSv4/pNFS: Retry the layout return later in case of a timeout or reboot

From: Trond Myklebust <[email protected]>

If the layout return failed due to a timeout or reboot, then leave the
layout segments on the list so that the layout return gets replayed
later.
The exception would be if we're freeing the inode.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 21 ++++++++++++++++++++-
fs/nfs/pnfs.c | 12 ++++++++++++
fs/nfs/pnfs.h | 3 +++
3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ae835d14ac75..952d1e930185 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9944,6 +9944,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
if (!nfs41_sequence_process(task, &lrp->res.seq_res))
return;

+ if (task->tk_rpc_status == -ETIMEDOUT) {
+ lrp->rpc_status = -EAGAIN;
+ lrp->res.lrs_present = 0;
+ return;
+ }
/*
* Was there an RPC level error? Assume the call succeeded,
* and that we need to release the layout
@@ -9966,6 +9971,15 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
fallthrough;
case 0:
break;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ nfs4_schedule_session_recovery(server->nfs_client->cl_session,
+ task->tk_status);
+ lrp->res.lrs_present = 0;
+ lrp->rpc_status = -EAGAIN;
+ task->tk_status = 0;
+ break;
case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
break;
@@ -9983,8 +9997,13 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
struct pnfs_layout_hdr *lo = lrp->args.layout;

- pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range,
+ if (lrp->rpc_status == 0 || !lrp->inode)
+ pnfs_layoutreturn_free_lsegs(
+ lo, &lrp->args.stateid, &lrp->args.range,
lrp->res.lrs_present ? &lrp->res.stateid : NULL);
+ else
+ pnfs_layoutreturn_retry_later(lo, &lrp->args.stateid,
+ &lrp->args.range);
nfs4_sequence_free_slot(&lrp->res.seq_res);
if (lrp->ld_private.ops && lrp->ld_private.ops->free)
lrp->ld_private.ops->free(&lrp->ld_private);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c8b1be1810e2..04a52fa3d28c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1191,6 +1191,18 @@ pnfs_layoutreturn_retry_later_locked(struct pnfs_layout_hdr *lo,
}
}

+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range)
+{
+ struct inode *inode = lo->plh_inode;
+
+ spin_lock(&inode->i_lock);
+ pnfs_layoutreturn_retry_later_locked(lo, arg_stateid, range);
+ pnfs_clear_layoutreturn_waitbit(lo);
+ spin_unlock(&inode->i_lock);
+}
+
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 8fa0f152ed19..cd23a38eac75 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -328,6 +328,9 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
enum pnfs_iomode iomode,
bool strict_iomode,
gfp_t gfp_flags);
+void pnfs_layoutreturn_retry_later(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *arg_stateid,
+ const struct pnfs_layout_range *range);
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
--
2.45.2