Trond Myklebust (4):
pNFS: Ensure LAYOUTGET and LAYOUTRETURN are properly serialised
pNFS: Fix pnfs_set_layout_stateid() to clear NFS_LAYOUT_INVALID_STID
pNFS: Clear out all layout segments if the server unsets
lrp->res.lrs_present
pNFS: Don't forget the layout stateid if there are outstanding
LAYOUTGETs
fs/nfs/nfs4proc.c | 9 ++++++---
fs/nfs/pnfs.c | 42 ++++++++++++++++++++++++------------------
2 files changed, 30 insertions(+), 21 deletions(-)
--
2.7.4
According to RFC5661, the client is responsible for serialising
LAYOUTGET and LAYOUTRETURN to avoid ambiguity. Consider the case
where we send both in parallel.
Client Server
====== ======
LAYOUTGET(seqid=X)
LAYOUTRETURN(seqid=X)
LAYOUTGET return seqid=X+1
LAYOUTRETURN return seqid=X+2
Process LAYOUTRETURN
Forget layout stateid
Process LAYOUTGET
Set seqid=X+1
The client processes the layoutget/layoutreturn in the wrong order,
and since the result of the layoutreturn was to clear the only
existing layout segment, the client forgets the layout stateid.
When the LAYOUTGET comes in, it is treated as having a completely
new stateid, and so the client sets the wrong sequence id...
Fix is to check if there are outstanding LAYOUTGET requests
before we send the LAYOUTRETURN (note that LAYOUGET will already
wait if it sees an outstanding LAYOUTRETURN).
Signed-off-by: Trond Myklebust <[email protected]>
Cc: [email protected] # v4.5+
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6daf034645c8..519ad320f5cd 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -899,6 +899,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
nfs4_stateid *stateid,
enum pnfs_iomode *iomode)
{
+ /* Serialise LAYOUTGET/LAYOUTRETURN */
+ if (atomic_read(&lo->plh_outstanding) != 0)
+ return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return false;
pnfs_get_layout_hdr(lo);
--
2.7.4
If the layout was marked as invalid, we want to ensure to initialise
the layout header fields correctly.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 36 +++++++++++++++++++-----------------
1 file changed, 19 insertions(+), 17 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 519ad320f5cd..cd8b5fca33f6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -768,17 +768,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
pnfs_destroy_layouts_byclid(clp, false);
}
+static void
+pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
+{
+ lo->plh_return_iomode = 0;
+ lo->plh_return_seq = 0;
+ clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+}
+
/* update lo->plh_stateid with new if is more recent */
void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
bool update_barrier)
{
u32 oldseq, newseq, new_barrier = 0;
- bool invalid = !pnfs_layout_is_valid(lo);
oldseq = be32_to_cpu(lo->plh_stateid.seqid);
newseq = be32_to_cpu(new->seqid);
- if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) {
+
+ if (!pnfs_layout_is_valid(lo)) {
+ nfs4_stateid_copy(&lo->plh_stateid, new);
+ lo->plh_barrier = newseq;
+ pnfs_clear_layoutreturn_info(lo);
+ clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ return;
+ }
+ if (pnfs_seqid_is_newer(newseq, oldseq)) {
nfs4_stateid_copy(&lo->plh_stateid, new);
/*
* Because of wraparound, we want to keep the barrier
@@ -790,7 +805,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
new_barrier = be32_to_cpu(new->seqid);
else if (new_barrier == 0)
return;
- if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
+ if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
lo->plh_barrier = new_barrier;
}
@@ -886,14 +901,6 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
}
-static void
-pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
-{
- lo->plh_return_iomode = 0;
- lo->plh_return_seq = 0;
- clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
-}
-
static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
nfs4_stateid *stateid,
@@ -1801,16 +1808,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
*/
pnfs_mark_layout_stateid_invalid(lo, &free_me);
- nfs4_stateid_copy(&lo->plh_stateid, &res->stateid);
- lo->plh_barrier = be32_to_cpu(res->stateid.seqid);
+ pnfs_set_layout_stateid(lo, &res->stateid, true);
}
pnfs_get_lseg(lseg);
pnfs_layout_insert_lseg(lo, lseg, &free_me);
- if (!pnfs_layout_is_valid(lo)) {
- pnfs_clear_layoutreturn_info(lo);
- clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
- }
if (res->return_on_close)
--
2.7.4
If the server fails to set lrp->res.lrs_present in the LAYOUTRETURN reply,
then that means it believes the client holds no more layout state for that
file, and that the layout stateid is now invalid.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f5aecaabcb7c..c380d2ee4137 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8190,10 +8190,13 @@ static void nfs4_layoutreturn_release(void *calldata)
dprintk("--> %s\n", __func__);
spin_lock(&lo->plh_inode->i_lock);
- pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,
- be32_to_cpu(lrp->args.stateid.seqid));
- if (lrp->res.lrs_present && pnfs_layout_is_valid(lo))
+ if (lrp->res.lrs_present) {
+ pnfs_mark_matching_lsegs_invalid(lo, &freeme,
+ &lrp->args.range,
+ be32_to_cpu(lrp->args.stateid.seqid));
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+ } else
+ pnfs_mark_layout_stateid_invalid(lo, &freeme);
pnfs_clear_layoutreturn_waitbit(lo);
spin_unlock(&lo->plh_inode->i_lock);
nfs4_sequence_free_slot(&lrp->res.seq_res);
--
2.7.4
If there are outstanding LAYOUTGET rpc calls, then we want to ensure that
we keep the layout stateid around so we that don't inadvertently pick up
an old/misordered sequence id.
The race is as follows:
Client Server
====== ======
LAYOUTGET(seqid)
LAYOUTGET(seqid)
return LAYOUTGET(seqid+1)
return LAYOUTGET(seqid+2)
process LAYOUTGET(seqid+2)
forget layout
process LAYOUTGET(seqid+1)
If it forgets the layout stateid before processing seqid+1, then
the client will not check the layout->plh_barrier, and so will set
the stateid with seqid+1.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cd8b5fca33f6..2c93a85eda51 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
atomic_dec(&lo->plh_refcount);
if (list_empty(&lo->plh_segs)) {
- set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ if (atomic_read(&lo->plh_outstanding) == 0)
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
}
rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
--
2.7.4
On Mon, 2016-09-05 at 09:45 -0400, Trond Myklebust wrote:
> Trond Myklebust (4):
> pNFS: Ensure LAYOUTGET and LAYOUTRETURN are properly serialised
> pNFS: Fix pnfs_set_layout_stateid() to clear
> NFS_LAYOUT_INVALID_STID
> pNFS: Clear out all layout segments if the server unsets
> lrp->res.lrs_present
> pNFS: Don't forget the layout stateid if there are outstanding
> LAYOUTGETs
>
> fs/nfs/nfs4proc.c | 9 ++++++---
> fs/nfs/pnfs.c | 42 ++++++++++++++++++++++++------------------
> 2 files changed, 30 insertions(+), 21 deletions(-)
>
All look good to me:
Reviewed-by: Jeff Layton <[email protected]>