2015-01-24 05:39:33

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 0/5] Parallelise OPEN/OPEN_DOWNGRADE/CLOSE in NFSv4.x (x>0)

Hi all,

The following patchset improves the tracking of stateids in the CLOSE code
to ensure that we can rely on it in situations where we remove the NFSv4.0
serialisation of state that shares the same open owners.
It then proceeds to add a series of changes to the NFSv4.0 seqid code to
ensure that it can be removed in the case of NFSv4.0 opens.
Only lightly tested so far, but it seems to be working...

In principle there is nothing stopping us from doing the same with byte
range locks; we only need to add better atomicity of stateid updates,
so that we can track the seqid number in the stateid and ensure that it
never regresses. The exercise has been left for the reader...

Cheers
Trond

Trond Myklebust (5):
NFSv4: Fix an atomicity problem in CLOSE
NFSv4: More CLOSE/OPEN races
NFSv4: Convert nfs_alloc_seqid() to return an ERR_PTR() if allocation
fails
NFSv4: Check for NULL argument in nfs_*_seqid() functions
NFSv4.1: Allow parallel OPEN/OPEN_DOWNGRADE/CLOSE

fs/nfs/nfs4_fs.h | 3 +++
fs/nfs/nfs4proc.c | 50 ++++++++++++++++++++++++++++++++++++++++---------
fs/nfs/nfs4state.c | 31 ++++++++++++++++++------------
fs/nfs/nfs4xdr.c | 9 ++++++---
include/linux/nfs_xdr.h | 2 +-
5 files changed, 70 insertions(+), 25 deletions(-)

--
2.1.0



2015-01-24 05:39:34

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 1/5] NFSv4: Fix an atomicity problem in CLOSE

If we are to remove the serialisation of OPEN/CLOSE, then we need to
ensure that the stateid sent as part of a CLOSE operation does not
change after we test the state in nfs4_close_prepare.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 7 ++++++-
fs/nfs/nfs4xdr.c | 4 ++--
include/linux/nfs_xdr.h | 2 +-
3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c347705b0161..4863dec10865 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2587,6 +2587,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_EXPIRED:
+ if (!nfs4_stateid_match(&calldata->arg.stateid,
+ &state->stateid)) {
+ rpc_restart_call_prepare(task);
+ goto out_release;
+ }
if (calldata->arg.fmode == 0)
break;
default:
@@ -2619,6 +2624,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
+ nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid);
/* Calculate the change in open mode */
calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
@@ -2757,7 +2763,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->inode = state->inode;
calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode);
- calldata->arg.stateid = &state->open_stateid;
/* Serialization for the sequence id */
calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
if (calldata->arg.seqid == NULL)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cb4376b78ed9..7e7be5ab70bb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1125,7 +1125,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
{
encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr);
encode_nfs4_seqid(xdr, arg->seqid);
- encode_nfs4_stateid(xdr, arg->stateid);
+ encode_nfs4_stateid(xdr, &arg->stateid);
}

static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
@@ -1530,7 +1530,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
{
encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr);
- encode_nfs4_stateid(xdr, arg->stateid);
+ encode_nfs4_stateid(xdr, &arg->stateid);
encode_nfs4_seqid(xdr, arg->seqid);
encode_share_access(xdr, arg->fmode);
}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 467c84efb596..7e38d641236e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -389,7 +389,7 @@ struct nfs_open_confirmres {
struct nfs_closeargs {
struct nfs4_sequence_args seq_args;
struct nfs_fh * fh;
- nfs4_stateid * stateid;
+ nfs4_stateid stateid;
struct nfs_seqid * seqid;
fmode_t fmode;
const u32 * bitmask;
--
2.1.0


2015-01-24 05:39:35

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 2/5] NFSv4: More CLOSE/OPEN races

If an OPEN RPC call races with a CLOSE or OPEN_DOWNGRADE so that it
updates the nfs_state structure before the CLOSE/OPEN_DOWNGRADE has
a chance to do so, then we know that the state->flags need to be
recalculated from scratch.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4863dec10865..a6c04a5812d0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1167,6 +1167,16 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state,
return false;
}

+static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
+{
+ if (state->n_wronly)
+ set_bit(NFS_O_WRONLY_STATE, &state->flags);
+ if (state->n_rdonly)
+ set_bit(NFS_O_RDONLY_STATE, &state->flags);
+ if (state->n_rdwr)
+ set_bit(NFS_O_RDWR_STATE, &state->flags);
+}
+
static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
nfs4_stateid *stateid, fmode_t fmode)
{
@@ -1185,8 +1195,12 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
}
if (stateid == NULL)
return;
- if (!nfs_need_update_open_stateid(state, stateid))
+ /* Handle races with OPEN */
+ if (!nfs4_stateid_match_other(stateid, &state->open_stateid) ||
+ !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
+ nfs_resync_open_stateid_locked(state);
return;
+ }
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
nfs4_stateid_copy(&state->stateid, stateid);
nfs4_stateid_copy(&state->open_stateid, stateid);
--
2.1.0


2015-01-24 05:39:36

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 3/5] NFSv4: Convert nfs_alloc_seqid() to return an ERR_PTR() if allocation fails

When we relax the sequencing on the NFSv4.1 OPEN/CLOSE code, we will want
to use the value NULL to indicate that no sequencing is needed.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 10 +++++-----
fs/nfs/nfs4state.c | 10 +++++-----
2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a6c04a5812d0..c9c38077075c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -988,7 +988,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
goto err_free_p;

p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
- if (p->o_arg.seqid == NULL)
+ if (IS_ERR(p->o_arg.seqid))
goto err_free_label;
nfs_sb_active(dentry->d_sb);
p->dentry = dget(dentry);
@@ -2779,7 +2779,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->arg.fh = NFS_FH(state->inode);
/* Serialization for the sequence id */
calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
- if (calldata->arg.seqid == NULL)
+ if (IS_ERR(calldata->arg.seqid))
goto out_free_calldata;
calldata->arg.fmode = 0;
calldata->arg.bitmask = server->cache_consistency_bitmask;
@@ -5517,7 +5517,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
goto out;
seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
status = -ENOMEM;
- if (seqid == NULL)
+ if (IS_ERR(seqid))
goto out;
task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid);
status = PTR_ERR(task);
@@ -5558,10 +5558,10 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
- if (p->arg.open_seqid == NULL)
+ if (IS_ERR(p->arg.open_seqid))
goto out_free;
p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
- if (p->arg.lock_seqid == NULL)
+ if (IS_ERR(p->arg.lock_seqid))
goto out_free_seqid;
p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5194933ed419..b922e43d69b8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1003,11 +1003,11 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m
struct nfs_seqid *new;

new = kmalloc(sizeof(*new), gfp_mask);
- if (new != NULL) {
- new->sequence = counter;
- INIT_LIST_HEAD(&new->list);
- new->task = NULL;
- }
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ new->sequence = counter;
+ INIT_LIST_HEAD(&new->list);
+ new->task = NULL;
return new;
}

--
2.1.0


2015-01-24 05:39:37

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 4/5] NFSv4: Check for NULL argument in nfs_*_seqid() functions

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4state.c | 21 ++++++++++++++-------
fs/nfs/nfs4xdr.c | 5 ++++-
2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b922e43d69b8..590f096fd011 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1015,7 +1015,7 @@ void nfs_release_seqid(struct nfs_seqid *seqid)
{
struct nfs_seqid_counter *sequence;

- if (list_empty(&seqid->list))
+ if (seqid == NULL || list_empty(&seqid->list))
return;
sequence = seqid->sequence;
spin_lock(&sequence->lock);
@@ -1071,13 +1071,15 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)

void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
{
- struct nfs4_state_owner *sp = container_of(seqid->sequence,
- struct nfs4_state_owner, so_seqid);
- struct nfs_server *server = sp->so_server;
+ struct nfs4_state_owner *sp;
+
+ if (seqid == NULL)
+ return;

+ sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid);
if (status == -NFS4ERR_BAD_SEQID)
nfs4_drop_state_owner(sp);
- if (!nfs4_has_session(server->nfs_client))
+ if (!nfs4_has_session(sp->so_server->nfs_client))
nfs_increment_seqid(status, seqid);
}

@@ -1088,14 +1090,18 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
*/
void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
{
- nfs_increment_seqid(status, seqid);
+ if (seqid != NULL)
+ nfs_increment_seqid(status, seqid);
}

int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
{
- struct nfs_seqid_counter *sequence = seqid->sequence;
+ struct nfs_seqid_counter *sequence;
int status = 0;

+ if (seqid == NULL)
+ goto out;
+ sequence = seqid->sequence;
spin_lock(&sequence->lock);
seqid->task = task;
if (list_empty(&seqid->list))
@@ -1106,6 +1112,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
status = -EAGAIN;
unlock:
spin_unlock(&sequence->lock);
+out:
return status;
}

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 7e7be5ab70bb..d05fada4929c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -946,7 +946,10 @@ static void encode_uint64(struct xdr_stream *xdr, u64 n)
static void encode_nfs4_seqid(struct xdr_stream *xdr,
const struct nfs_seqid *seqid)
{
- encode_uint32(xdr, seqid->sequence->counter);
+ if (seqid != NULL)
+ encode_uint32(xdr, seqid->sequence->counter);
+ else
+ encode_uint32(xdr, 0);
}

static void encode_compound_hdr(struct xdr_stream *xdr,
--
2.1.0


2015-01-24 05:39:38

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 5/5] NFSv4.1: Allow parallel OPEN/OPEN_DOWNGRADE/CLOSE

Remove the serialisation of OPEN/OPEN_DOWNGRADE and CLOSE calls for the
case of NFSv4.1 and newer.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4_fs.h | 3 +++
fs/nfs/nfs4proc.c | 17 +++++++++++++++--
2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a08178764cf9..e57290a66b76 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -44,6 +44,7 @@ enum nfs4_client_state {
#define NFS4_RENEW_TIMEOUT 0x01
#define NFS4_RENEW_DELEGATION_CB 0x02

+struct nfs_seqid_counter;
struct nfs4_minor_version_ops {
u32 minor_version;
unsigned init_caps;
@@ -56,6 +57,8 @@ struct nfs4_minor_version_ops {
struct nfs_fsinfo *);
void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *);
+ struct nfs_seqid *
+ (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
const struct rpc_call_ops *call_sync_ops;
const struct nfs4_state_recovery_ops *reboot_recovery_ops;
const struct nfs4_state_recovery_ops *nograce_recovery_ops;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c9c38077075c..0a279ad5421f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -977,6 +977,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
struct dentry *parent = dget_parent(dentry);
struct inode *dir = parent->d_inode;
struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
struct nfs4_opendata *p;

p = kzalloc(sizeof(*p), gfp_mask);
@@ -987,7 +988,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
if (IS_ERR(p->f_label))
goto err_free_p;

- p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
+ alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
+ p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask);
if (IS_ERR(p->o_arg.seqid))
goto err_free_label;
nfs_sb_active(dentry->d_sb);
@@ -2751,6 +2753,7 @@ static bool nfs4_roc(struct inode *inode)
int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
{
struct nfs_server *server = NFS_SERVER(state->inode);
+ struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
struct nfs4_closedata *calldata;
struct nfs4_state_owner *sp = state->owner;
struct rpc_task *task;
@@ -2778,7 +2781,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode);
/* Serialization for the sequence id */
- calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
+ alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
+ calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask);
if (IS_ERR(calldata->arg.seqid))
goto out_free_calldata;
calldata->arg.fmode = 0;
@@ -8414,6 +8418,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
.match_stateid = nfs4_match_stateid,
.find_root_sec = nfs4_find_root_sec,
.free_lock_state = nfs4_release_lockowner,
+ .alloc_seqid = nfs_alloc_seqid,
.call_sync_ops = &nfs40_call_sync_ops,
.reboot_recovery_ops = &nfs40_reboot_recovery_ops,
.nograce_recovery_ops = &nfs40_nograce_recovery_ops,
@@ -8422,6 +8427,12 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
};

#if defined(CONFIG_NFS_V4_1)
+static struct nfs_seqid *
+nfs_alloc_no_seqid(struct nfs_seqid_counter *arg1, gfp_t arg2)
+{
+ return NULL;
+}
+
static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.minor_version = 1,
.init_caps = NFS_CAP_READDIRPLUS
@@ -8435,6 +8446,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
.match_stateid = nfs41_match_stateid,
.find_root_sec = nfs41_find_root_sec,
.free_lock_state = nfs41_free_lock_state,
+ .alloc_seqid = nfs_alloc_no_seqid,
.call_sync_ops = &nfs41_call_sync_ops,
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
@@ -8461,6 +8473,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
.find_root_sec = nfs41_find_root_sec,
.free_lock_state = nfs41_free_lock_state,
.call_sync_ops = &nfs41_call_sync_ops,
+ .alloc_seqid = nfs_alloc_no_seqid,
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
.state_renewal_ops = &nfs41_state_renewal_ops,
--
2.1.0


2015-01-24 11:46:31

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 1/5] NFSv4: Fix an atomicity problem in CLOSE

On Sat, 24 Jan 2015 00:39:24 -0500
Trond Myklebust <[email protected]> wrote:

> If we are to remove the serialisation of OPEN/CLOSE, then we need to
> ensure that the stateid sent as part of a CLOSE operation does not
> change after we test the state in nfs4_close_prepare.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/nfs4proc.c | 7 ++++++-
> fs/nfs/nfs4xdr.c | 4 ++--
> include/linux/nfs_xdr.h | 2 +-
> 3 files changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index c347705b0161..4863dec10865 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -2587,6 +2587,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
> case -NFS4ERR_OLD_STATEID:
> case -NFS4ERR_BAD_STATEID:
> case -NFS4ERR_EXPIRED:
> + if (!nfs4_stateid_match(&calldata->arg.stateid,
> + &state->stateid)) {
> + rpc_restart_call_prepare(task);
> + goto out_release;
> + }

Do we need a similar check in the open codepath -- possibly in
nfs4_open_done? AFAICT, currently if the OPEN ends up "losing" the race
here, then we'll fall into full-on stateid recovery, which is almost
certainly not what we want.

> if (calldata->arg.fmode == 0)
> break;
> default:
> @@ -2619,6 +2624,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
> is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
> is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
> is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
> + nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid);
> /* Calculate the change in open mode */
> calldata->arg.fmode = 0;
> if (state->n_rdwr == 0) {
> @@ -2757,7 +2763,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
> calldata->inode = state->inode;
> calldata->state = state;
> calldata->arg.fh = NFS_FH(state->inode);
> - calldata->arg.stateid = &state->open_stateid;
> /* Serialization for the sequence id */
> calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
> if (calldata->arg.seqid == NULL)
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index cb4376b78ed9..7e7be5ab70bb 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -1125,7 +1125,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
> {
> encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr);
> encode_nfs4_seqid(xdr, arg->seqid);
> - encode_nfs4_stateid(xdr, arg->stateid);
> + encode_nfs4_stateid(xdr, &arg->stateid);
> }
>
> static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
> @@ -1530,7 +1530,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
> static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr)
> {
> encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr);
> - encode_nfs4_stateid(xdr, arg->stateid);
> + encode_nfs4_stateid(xdr, &arg->stateid);
> encode_nfs4_seqid(xdr, arg->seqid);
> encode_share_access(xdr, arg->fmode);
> }
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 467c84efb596..7e38d641236e 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -389,7 +389,7 @@ struct nfs_open_confirmres {
> struct nfs_closeargs {
> struct nfs4_sequence_args seq_args;
> struct nfs_fh * fh;
> - nfs4_stateid * stateid;
> + nfs4_stateid stateid;
> struct nfs_seqid * seqid;
> fmode_t fmode;
> const u32 * bitmask;


--
Jeff Layton <[email protected]>

2015-01-24 11:53:41

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 1/5] NFSv4: Fix an atomicity problem in CLOSE

On Sat, 24 Jan 2015 06:46:28 -0500
Jeff Layton <[email protected]> wrote:

> On Sat, 24 Jan 2015 00:39:24 -0500
> Trond Myklebust <[email protected]> wrote:
>
> > If we are to remove the serialisation of OPEN/CLOSE, then we need to
> > ensure that the stateid sent as part of a CLOSE operation does not
> > change after we test the state in nfs4_close_prepare.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > fs/nfs/nfs4proc.c | 7 ++++++-
> > fs/nfs/nfs4xdr.c | 4 ++--
> > include/linux/nfs_xdr.h | 2 +-
> > 3 files changed, 9 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index c347705b0161..4863dec10865 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -2587,6 +2587,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
> > case -NFS4ERR_OLD_STATEID:
> > case -NFS4ERR_BAD_STATEID:
> > case -NFS4ERR_EXPIRED:
> > + if (!nfs4_stateid_match(&calldata->arg.stateid,
> > + &state->stateid)) {
> > + rpc_restart_call_prepare(task);
> > + goto out_release;
> > + }
>
> Do we need a similar check in the open codepath -- possibly in
> nfs4_open_done? AFAICT, currently if the OPEN ends up "losing" the race
> here, then we'll fall into full-on stateid recovery, which is almost
> certainly not what we want.
>

(facepalm)

Oh nm...stupid q. The OPEN can't lose the race since you won't
necessarily have a stateid at the start of the call, and you're
upgrading anyway.

I think you're correct that CLOSE is the only place you have to worry
about it.

--
Jeff Layton <[email protected]>

2015-01-24 11:58:03

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 0/5] Parallelise OPEN/OPEN_DOWNGRADE/CLOSE in NFSv4.x (x>0)

On Sat, 24 Jan 2015 00:39:23 -0500
Trond Myklebust <[email protected]> wrote:

> Hi all,
>
> The following patchset improves the tracking of stateids in the CLOSE code
> to ensure that we can rely on it in situations where we remove the NFSv4.0
> serialisation of state that shares the same open owners.
> It then proceeds to add a series of changes to the NFSv4.0 seqid code to
> ensure that it can be removed in the case of NFSv4.0 opens.
> Only lightly tested so far, but it seems to be working...
>
> In principle there is nothing stopping us from doing the same with byte
> range locks; we only need to add better atomicity of stateid updates,
> so that we can track the seqid number in the stateid and ensure that it
> never regresses. The exercise has been left for the reader...
>
> Cheers
> Trond
>
> Trond Myklebust (5):
> NFSv4: Fix an atomicity problem in CLOSE
> NFSv4: More CLOSE/OPEN races
> NFSv4: Convert nfs_alloc_seqid() to return an ERR_PTR() if allocation
> fails
> NFSv4: Check for NULL argument in nfs_*_seqid() functions
> NFSv4.1: Allow parallel OPEN/OPEN_DOWNGRADE/CLOSE
>
> fs/nfs/nfs4_fs.h | 3 +++
> fs/nfs/nfs4proc.c | 50 ++++++++++++++++++++++++++++++++++++++++---------
> fs/nfs/nfs4state.c | 31 ++++++++++++++++++------------
> fs/nfs/nfs4xdr.c | 9 ++++++---
> include/linux/nfs_xdr.h | 2 +-
> 5 files changed, 70 insertions(+), 25 deletions(-)
>

Nice work! I gave it a once-over and it looks pretty sound. I'll plan
to give it a good thrashing soon.

--
Jeff Layton <[email protected]>

2015-01-25 13:19:45

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 0/5] Parallelise OPEN/OPEN_DOWNGRADE/CLOSE in NFSv4.x (x>0)

On Sat, 24 Jan 2015 00:39:23 -0500
Trond Myklebust <[email protected]> wrote:

> Hi all,
>
> The following patchset improves the tracking of stateids in the CLOSE code
> to ensure that we can rely on it in situations where we remove the NFSv4.0
> serialisation of state that shares the same open owners.
> It then proceeds to add a series of changes to the NFSv4.0 seqid code to
> ensure that it can be removed in the case of NFSv4.0 opens.
> Only lightly tested so far, but it seems to be working...
>
> In principle there is nothing stopping us from doing the same with byte
> range locks; we only need to add better atomicity of stateid updates,
> so that we can track the seqid number in the stateid and ensure that it
> never regresses. The exercise has been left for the reader...
>
> Cheers
> Trond
>
> Trond Myklebust (5):
> NFSv4: Fix an atomicity problem in CLOSE
> NFSv4: More CLOSE/OPEN races
> NFSv4: Convert nfs_alloc_seqid() to return an ERR_PTR() if allocation
> fails
> NFSv4: Check for NULL argument in nfs_*_seqid() functions
> NFSv4.1: Allow parallel OPEN/OPEN_DOWNGRADE/CLOSE
>
> fs/nfs/nfs4_fs.h | 3 +++
> fs/nfs/nfs4proc.c | 50 ++++++++++++++++++++++++++++++++++++++++---------
> fs/nfs/nfs4state.c | 31 ++++++++++++++++++------------
> fs/nfs/nfs4xdr.c | 9 ++++++---
> include/linux/nfs_xdr.h | 2 +-
> 5 files changed, 70 insertions(+), 25 deletions(-)
>

FWIW, I checked out your devel branch and built a kernel with it. I
then ran a test that forks off a bunch of processes and has them create
and close a number of files as rapidly as possible:

Stock fedora 3.19.0-0.rc5.git2.1.fc22.x86_64:
$ time ./opentest01 /mnt/rawhide/opentest/

real 1m46.203s
user 0m0.745s
sys 0m15.713s

Trond's devel branch:
$ time ./opentest01 /mnt/rawhide/opentest/

real 0m58.994s
user 0m0.721s
sys 0m19.578s

(60+46)/59 == 1.79661016949152542372

So ~80% improvement in that test -- nice!
--
Jeff Layton <[email protected]>

2015-01-25 15:14:37

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 0/5] Parallelise OPEN/OPEN_DOWNGRADE/CLOSE in NFSv4.x (x>0)

On Sun, 25 Jan 2015 08:19:41 -0500
Jeff Layton <[email protected]> wrote:

> On Sat, 24 Jan 2015 00:39:23 -0500
> Trond Myklebust <[email protected]> wrote:
>
> > Hi all,
> >
> > The following patchset improves the tracking of stateids in the CLOSE code
> > to ensure that we can rely on it in situations where we remove the NFSv4.0
> > serialisation of state that shares the same open owners.
> > It then proceeds to add a series of changes to the NFSv4.0 seqid code to
> > ensure that it can be removed in the case of NFSv4.0 opens.
> > Only lightly tested so far, but it seems to be working...
> >
> > In principle there is nothing stopping us from doing the same with byte
> > range locks; we only need to add better atomicity of stateid updates,
> > so that we can track the seqid number in the stateid and ensure that it
> > never regresses. The exercise has been left for the reader...
> >
> > Cheers
> > Trond
> >
> > Trond Myklebust (5):
> > NFSv4: Fix an atomicity problem in CLOSE
> > NFSv4: More CLOSE/OPEN races
> > NFSv4: Convert nfs_alloc_seqid() to return an ERR_PTR() if allocation
> > fails
> > NFSv4: Check for NULL argument in nfs_*_seqid() functions
> > NFSv4.1: Allow parallel OPEN/OPEN_DOWNGRADE/CLOSE
> >
> > fs/nfs/nfs4_fs.h | 3 +++
> > fs/nfs/nfs4proc.c | 50 ++++++++++++++++++++++++++++++++++++++++---------
> > fs/nfs/nfs4state.c | 31 ++++++++++++++++++------------
> > fs/nfs/nfs4xdr.c | 9 ++++++---
> > include/linux/nfs_xdr.h | 2 +-
> > 5 files changed, 70 insertions(+), 25 deletions(-)
> >
>
> FWIW, I checked out your devel branch and built a kernel with it. I
> then ran a test that forks off a bunch of processes and has them create
> and close a number of files as rapidly as possible:
>
> Stock fedora 3.19.0-0.rc5.git2.1.fc22.x86_64:
> $ time ./opentest01 /mnt/rawhide/opentest/
>
> real 1m46.203s
> user 0m0.745s
> sys 0m15.713s
>
> Trond's devel branch:
> $ time ./opentest01 /mnt/rawhide/opentest/
>
> real 0m58.994s
> user 0m0.721s
> sys 0m19.578s
>
> (60+46)/59 == 1.79661016949152542372
>
> So ~80% improvement in that test -- nice!

...and for reference, the same test on v3 mount:

$ time ./opentest01 /mnt/rawhide/opentest/

real 0m58.692s
user 0m0.471s
sys 0m35.323s

...so with this patchset, v4.1 is now about as fast as v3 in this test.

It's also interesting that the system CPU time on v3 is so much higher
here. I've no explanation for that, but it might be good to research it
sometime.

--
Jeff Layton <[email protected]>