2014-02-20 02:28:29

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 1/7] NFSv4.1: Fix wraparound issues in pnfs_seqid_is_newer()

Subtraction of signed integers does not have well defined wraparound
semantics in the C99 standard. In order to be wraparound-safe, we
have to use unsigned subtraction, and then cast the result.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 4755858e37a0..6e67ada6c22c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -662,7 +662,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
*/
static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
{
- return (s32)s1 - (s32)s2 > 0;
+ return (s32)(s1 - s2) > 0;
}

/* update lo->plh_stateid with new if is more recent */
--
1.8.5.3



2014-02-20 02:28:31

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 3/7] NFSv4.1: Ensure that the layout recall callback matches layout stateids

It is not sufficient to compare filehandles when we receive a layout
recall from the server; we also need to check that the layout stateids
match.

Reported-by: shaobingqing <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index ae2e87b95453..570c8a1d2f3d 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -112,7 +112,8 @@ out:
* TODO: keep track of all layouts (and delegations) in a hash table
* hashed by filehandle.
*/
-static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
+ struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct nfs_server *server;
struct inode *ino;
@@ -120,6 +121,8 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,

list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
+ if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid))
+ continue;
if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh))
continue;
ino = igrab(lo->plh_inode);
@@ -141,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
return NULL;
}

-static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh)
+static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
+ struct nfs_fh *fh, nfs4_stateid *stateid)
{
struct pnfs_layout_hdr *lo;

spin_lock(&clp->cl_lock);
rcu_read_lock();
- lo = get_layout_by_fh_locked(clp, fh);
+ lo = get_layout_by_fh_locked(clp, fh, stateid);
rcu_read_unlock();
spin_unlock(&clp->cl_lock);

@@ -162,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp,
u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
LIST_HEAD(free_me_list);

- lo = get_layout_by_fh(clp, &args->cbl_fh);
+ lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
if (!lo)
- return NFS4ERR_NOMATCHING_LAYOUT;
+ goto out;

ino = lo->plh_inode;
spin_lock(&ino->i_lock);
@@ -179,6 +183,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
pnfs_free_lseg_list(&free_me_list);
pnfs_put_layout_hdr(lo);
iput(ino);
+out:
return rv;
}

--
1.8.5.3


2014-02-20 02:28:41

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 7/7] NFSv4: Clear the open state flags if the new stateid does not match

RFC3530 and RFC5661 both prescribe that the 'opaque' field of the
open stateid returned by new OPEN/OPEN_DOWNGRADE/CLOSE calls for
the same file and open owner should match.
If this is not the case, assume that the open state has been lost,
and that we need to recover it.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4_fs.h | 1 +
fs/nfs/nfs4proc.c | 30 ++++++++++++++++++++++++++----
fs/nfs/nfs4state.c | 2 +-
3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index df81fcc138a7..e1d1badbe53c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -427,6 +427,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
extern void nfs_inode_find_state_and_recover(struct inode *inode,
const nfs4_stateid *stateid);
+extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1f593a0bd938..2427ef4c4d63 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1137,13 +1137,30 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
nfs4_state_set_mode_locked(state, state->state | fmode);
}

+static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state)
+{
+ struct nfs_client *clp = state->owner->so_server->nfs_client;
+ bool need_recover = false;
+
+ if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly)
+ need_recover = true;
+ if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly)
+ need_recover = true;
+ if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr)
+ need_recover = true;
+ if (need_recover)
+ nfs4_state_mark_reclaim_nograce(clp, state);
+}
+
static bool nfs_need_update_open_stateid(struct nfs4_state *state,
nfs4_stateid *stateid)
{
if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
return true;
- if (!nfs4_stateid_match_other(stateid, &state->open_stateid))
+ if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) {
+ nfs_test_and_clear_all_open_stateid(state);
return true;
+ }
if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
return true;
return false;
@@ -1179,6 +1196,8 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *state
write_seqlock(&state->seqlock);
nfs_clear_open_stateid_locked(state, stateid, fmode);
write_sequnlock(&state->seqlock);
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+ nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
}

static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
@@ -1255,6 +1274,8 @@ no_delegation:
__update_open_stateid(state, open_stateid, NULL, fmode);
ret = 1;
}
+ if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
+ nfs4_schedule_state_manager(state->owner->so_server->nfs_client);

return ret;
}
@@ -1488,12 +1509,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
struct nfs4_state *newstate;
int ret;

+ /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
/* memory barrier prior to reading state->n_* */
clear_bit(NFS_DELEGATED_STATE, &state->flags);
clear_bit(NFS_OPEN_STATE, &state->flags);
smp_rmb();
if (state->n_rdwr != 0) {
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
@@ -1501,7 +1525,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
return -ESTALE;
}
if (state->n_wronly != 0) {
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
if (ret != 0)
return ret;
@@ -1509,7 +1532,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
return -ESTALE;
}
if (state->n_rdonly != 0) {
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
if (ret != 0)
return ret;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e5be72518bd7..b524df9f6a74 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1321,7 +1321,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st
return 1;
}

-static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
+int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
--
1.8.5.3


2014-02-20 02:28:40

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 6/7] NFSv4: Use correct locking when updating nfs4_state in nfs4_close_done

The stateid and state->flags should be updated atomically under
protection of the state->seqlock.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 65 ++++++++++++++++++++++++++++++-------------------------
1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 96e0bd42f38c..1f593a0bd938 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1149,6 +1149,38 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state,
return false;
}

+static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
+ nfs4_stateid *stateid, fmode_t fmode)
+{
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
+ switch (fmode & (FMODE_READ|FMODE_WRITE)) {
+ case FMODE_WRITE:
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ break;
+ case FMODE_READ:
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ break;
+ case 0:
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_OPEN_STATE, &state->flags);
+ }
+ if (stateid == NULL)
+ return;
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ nfs4_stateid_copy(&state->stateid, stateid);
+ nfs4_stateid_copy(&state->open_stateid, stateid);
+}
+
+static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+{
+ write_seqlock(&state->seqlock);
+ nfs_clear_open_stateid_locked(state, stateid, fmode);
+ write_sequnlock(&state->seqlock);
+}
+
static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
{
switch (fmode) {
@@ -1168,13 +1200,6 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
nfs4_stateid_copy(&state->open_stateid, stateid);
}

-static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
-{
- write_seqlock(&state->seqlock);
- nfs_set_open_stateid_locked(state, stateid, fmode);
- write_sequnlock(&state->seqlock);
-}
-
static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode)
{
/*
@@ -2489,26 +2514,6 @@ static void nfs4_free_closedata(void *data)
kfree(calldata);
}

-static void nfs4_close_clear_stateid_flags(struct nfs4_state *state,
- fmode_t fmode)
-{
- spin_lock(&state->owner->so_lock);
- clear_bit(NFS_O_RDWR_STATE, &state->flags);
- switch (fmode & (FMODE_READ|FMODE_WRITE)) {
- case FMODE_WRITE:
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
- break;
- case FMODE_READ:
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- break;
- case 0:
- clear_bit(NFS_O_RDONLY_STATE, &state->flags);
- clear_bit(NFS_O_WRONLY_STATE, &state->flags);
- clear_bit(NFS_OPEN_STATE, &state->flags);
- }
- spin_unlock(&state->owner->so_lock);
-}
-
static void nfs4_close_done(struct rpc_task *task, void *data)
{
struct nfs4_closedata *calldata = data;
@@ -2527,9 +2532,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->roc)
pnfs_roc_set_barrier(state->inode,
calldata->roc_barrier);
- nfs_set_open_stateid(state, &calldata->res.stateid, 0);
+ nfs_clear_open_stateid(state, &calldata->res.stateid, 0);
renew_lease(server, calldata->timestamp);
- break;
+ goto out_release;
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_OLD_STATEID:
@@ -2543,7 +2548,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
goto out_release;
}
}
- nfs4_close_clear_stateid_flags(state, calldata->arg.fmode);
+ nfs_clear_open_stateid(state, NULL, calldata->arg.fmode);
out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
--
1.8.5.3


2014-02-20 02:28:30

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 2/7] NFSv4: Don't update the open stateid unless it is newer than the old one

This patch is in preparation for the NFSv4.1 parallel open capability.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4_fs.h | 10 ++++++++++
fs/nfs/nfs4proc.c | 21 +++++++++++++++++----
2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a5b27c2d9689..df81fcc138a7 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -500,6 +500,16 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei
return memcmp(dst, src, sizeof(*dst)) == 0;
}

+static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src)
+{
+ return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0;
+}
+
+static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2)
+{
+ return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0;
+}
+
static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
{
return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2da6a698b8f7..96e0bd42f38c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1137,12 +1137,20 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
nfs4_state_set_mode_locked(state, state->state | fmode);
}

+static bool nfs_need_update_open_stateid(struct nfs4_state *state,
+ nfs4_stateid *stateid)
+{
+ if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0)
+ return true;
+ if (!nfs4_stateid_match_other(stateid, &state->open_stateid))
+ return true;
+ if (nfs4_stateid_is_newer(stateid, &state->open_stateid))
+ return true;
+ return false;
+}
+
static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
{
- if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- nfs4_stateid_copy(&state->stateid, stateid);
- nfs4_stateid_copy(&state->open_stateid, stateid);
- set_bit(NFS_OPEN_STATE, &state->flags);
switch (fmode) {
case FMODE_READ:
set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -1153,6 +1161,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *
case FMODE_READ|FMODE_WRITE:
set_bit(NFS_O_RDWR_STATE, &state->flags);
}
+ if (!nfs_need_update_open_stateid(state, stateid))
+ return;
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ nfs4_stateid_copy(&state->stateid, stateid);
+ nfs4_stateid_copy(&state->open_stateid, stateid);
}

static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
--
1.8.5.3


2014-02-20 02:28:33

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 4/7] NFSv4.1: Minor optimisation in get_layout_by_fh_locked()

If the filehandles match, but the igrab() fails, or the layout is
freed before we can get it, then just return NULL.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 570c8a1d2f3d..41db5258e7a7 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -127,13 +127,13 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
continue;
ino = igrab(lo->plh_inode);
if (!ino)
- continue;
+ break;
spin_lock(&ino->i_lock);
/* Is this layout in the process of being freed? */
if (NFS_I(ino)->layout != lo) {
spin_unlock(&ino->i_lock);
iput(ino);
- continue;
+ break;
}
pnfs_get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
--
1.8.5.3


2014-02-20 02:28:34

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 5/7] NFSv4.1: Ensure that we free existing layout segments if we get a new layout

If the server returns a completely new layout stateid in response to our
LAYOUTGET, then make sure to free any existing layout segments.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 15 +++++++++++++++
1 file changed, 15 insertions(+)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6e67ada6c22c..cb53d450ae32 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -665,6 +665,17 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
return (s32)(s1 - s2) > 0;
}

+static void
+pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new,
+ struct list_head *free_me_list)
+{
+ if (nfs4_stateid_match_other(&lo->plh_stateid, new))
+ return;
+ /* Layout is new! Kill existing layout segments */
+ pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL);
+}
+
/* update lo->plh_stateid with new if is more recent */
void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
@@ -1315,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
struct inode *ino = lo->plh_inode;
+ LIST_HEAD(free_me);
int status = 0;

/* Inject layout blob into I/O device driver */
@@ -1341,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out_forget_reply;
}

+ /* Check that the new stateid matches the old stateid */
+ pnfs_verify_layout_stateid(lo, &res->stateid, &free_me);
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid, false);

@@ -1355,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}

spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&free_me);
return lseg;
out:
return ERR_PTR(status);
--
1.8.5.3