This is a follow up to yesterday's patchset to parallelise OPEN/CLOSE,
and basically extends the parallelism to cover LOCK/LOCKU. There is one
caveat, and that is that we still serialise the _first_ lock on a file.
Please apply on top of the patch series "Parallelise
OPEN/OPEN_DOWNGRADE/CLOSE in NFSv4.x (x>0)"
Trond Myklebust (5):
NFSv4: Fix atomicity problems with lock stateid updates
NFSv4: Always do open_to_lock_owner if the lock stateid is
uninitialised
NFSv4: Fix lock on-wire reordering issues
NFSv4: Update of VFS byte range lock must be atomic with the stateid
update
NFSv4.1: Allow parallel LOCK/LOCKU calls
fs/nfs/nfs4proc.c | 112 +++++++++++++++++++++++++++++++-----------------
fs/nfs/nfs4xdr.c | 6 +--
include/linux/nfs_xdr.h | 7 +--
3 files changed, 79 insertions(+), 46 deletions(-)
--
2.1.0
When we update the lock stateid, we really do need to ensure that this is
done under the state->state_lock, and that we are indeed only updating
confirmed locks with a newer version of the same stateid.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 42 +++++++++++++++++++++++++++++-------------
1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0a279ad5421f..db9d98eda07b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1297,6 +1297,23 @@ no_delegation:
return ret;
}
+static bool nfs4_update_lock_stateid(struct nfs4_lock_state *lsp,
+ const nfs4_stateid *stateid)
+{
+ struct nfs4_state *state = lsp->ls_state;
+ bool ret = false;
+
+ spin_lock(&state->state_lock);
+ if (!nfs4_stateid_match_other(stateid, &lsp->ls_stateid))
+ goto out_noupdate;
+ if (!nfs4_stateid_is_newer(stateid, &lsp->ls_stateid))
+ goto out_noupdate;
+ nfs4_stateid_copy(&lsp->ls_stateid, stateid);
+ ret = true;
+out_noupdate:
+ spin_unlock(&state->state_lock);
+ return ret;
+}
static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode)
{
@@ -5403,9 +5420,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
return;
switch (task->tk_status) {
case 0:
- nfs4_stateid_copy(&calldata->lsp->ls_stateid,
- &calldata->res.stateid);
renew_lease(calldata->server, calldata->timestamp);
+ nfs4_update_lock_stateid(calldata->lsp,
+ &calldata->res.stateid);
break;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
@@ -5626,6 +5643,7 @@ out_wait:
static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
+ struct nfs4_lock_state *lsp = data->lsp;
dprintk("%s: begin!\n", __func__);
@@ -5633,18 +5651,16 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
return;
data->rpc_status = task->tk_status;
- if (data->arg.new_lock_owner != 0) {
- if (data->rpc_status == 0)
- nfs_confirm_seqid(&data->lsp->ls_seqid, 0);
- else
- goto out;
- }
- if (data->rpc_status == 0) {
- nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid);
- set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags);
- renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
+ if (task->tk_status == 0) {
+ renew_lease(NFS_SERVER(data->ctx->dentry->d_inode),
+ data->timestamp);
+ if (data->arg.new_lock_owner != 0) {
+ nfs_confirm_seqid(&lsp->ls_seqid, 0);
+ nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
+ set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
+ } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
+ rpc_restart_call_prepare(task);
}
-out:
dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
}
--
2.1.0
The original text in RFC3530 was terribly confusing since it conflated
lockowners and lock stateids. RFC3530bis clarifies that you must use
open_to_lock_owner when there is no lock state for that file+lockowner
combination.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index db9d98eda07b..f12ded041a42 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5611,7 +5611,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
goto out_wait;
/* Do we need to do an open_to_lock_owner? */
- if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
+ if (!test_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags)) {
if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
goto out_release_lock_seqid;
}
--
2.1.0
This patch ensures that the server cannot reorder our LOCK/LOCKU
requests if they are sent in parallel on the wire.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 29 ++++++++++++++++++++++++-----
fs/nfs/nfs4xdr.c | 6 +++---
include/linux/nfs_xdr.h | 6 +++---
3 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f12ded041a42..41e7c2fc046e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5393,7 +5393,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
p->arg.fl = &p->fl;
p->arg.seqid = seqid;
p->res.seqid = seqid;
- p->arg.stateid = &lsp->ls_stateid;
p->lsp = lsp;
atomic_inc(&lsp->ls_count);
/* Ensure we don't close file until we're done freeing locks! */
@@ -5428,6 +5427,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
+ if (!nfs4_stateid_match(&calldata->arg.stateid,
+ &calldata->lsp->ls_stateid))
+ rpc_restart_call_prepare(task);
break;
default:
if (nfs4_async_handle_error(task, calldata->server,
@@ -5443,6 +5445,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
goto out_wait;
+ nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid);
if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
/* Note: exit _without_ running nfs4_locku_done */
goto out_no_action;
@@ -5584,7 +5587,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
if (IS_ERR(p->arg.lock_seqid))
goto out_free_seqid;
- p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
p->arg.lock_owner.id = lsp->ls_seqid.owner_id;
p->arg.lock_owner.s_dev = server->s_dev;
@@ -5615,11 +5617,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
goto out_release_lock_seqid;
}
- data->arg.open_stateid = &state->open_stateid;
+ nfs4_stateid_copy(&data->arg.open_stateid,
+ &state->open_stateid);
data->arg.new_lock_owner = 1;
data->res.open_seqid = data->arg.open_seqid;
- } else
+ } else {
data->arg.new_lock_owner = 0;
+ nfs4_stateid_copy(&data->arg.lock_stateid,
+ &data->lsp->ls_stateid);
+ }
if (!nfs4_valid_open_stateid(state)) {
data->rpc_status = -EBADF;
task->tk_action = NULL;
@@ -5651,7 +5657,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
return;
data->rpc_status = task->tk_status;
- if (task->tk_status == 0) {
+ switch (task->tk_status) {
+ case 0:
renew_lease(NFS_SERVER(data->ctx->dentry->d_inode),
data->timestamp);
if (data->arg.new_lock_owner != 0) {
@@ -5660,6 +5667,18 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
} else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
rpc_restart_call_prepare(task);
+ break;
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ if (data->arg.new_lock_owner != 0) {
+ if (!nfs4_stateid_match(&data->arg.open_stateid,
+ &lsp->ls_state->open_stateid))
+ rpc_restart_call_prepare(task);
+ } else if (!nfs4_stateid_match(&data->arg.lock_stateid,
+ &lsp->ls_stateid))
+ rpc_restart_call_prepare(task);
}
dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d05fada4929c..e3018e7a316c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1304,12 +1304,12 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
*p = cpu_to_be32(args->new_lock_owner);
if (args->new_lock_owner){
encode_nfs4_seqid(xdr, args->open_seqid);
- encode_nfs4_stateid(xdr, args->open_stateid);
+ encode_nfs4_stateid(xdr, &args->open_stateid);
encode_nfs4_seqid(xdr, args->lock_seqid);
encode_lockowner(xdr, &args->lock_owner);
}
else {
- encode_nfs4_stateid(xdr, args->lock_stateid);
+ encode_nfs4_stateid(xdr, &args->lock_stateid);
encode_nfs4_seqid(xdr, args->lock_seqid);
}
}
@@ -1333,7 +1333,7 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr);
encode_uint32(xdr, nfs4_lock_type(args->fl, 0));
encode_nfs4_seqid(xdr, args->seqid);
- encode_nfs4_stateid(xdr, args->stateid);
+ encode_nfs4_stateid(xdr, &args->stateid);
p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->fl->fl_start);
xdr_encode_hyper(p, nfs4_lock_length(args->fl));
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7e38d641236e..b6a6953c0f09 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -416,9 +416,9 @@ struct nfs_lock_args {
struct nfs_fh * fh;
struct file_lock * fl;
struct nfs_seqid * lock_seqid;
- nfs4_stateid * lock_stateid;
+ nfs4_stateid lock_stateid;
struct nfs_seqid * open_seqid;
- nfs4_stateid * open_stateid;
+ nfs4_stateid open_stateid;
struct nfs_lowner lock_owner;
unsigned char block : 1;
unsigned char reclaim : 1;
@@ -437,7 +437,7 @@ struct nfs_locku_args {
struct nfs_fh * fh;
struct file_lock * fl;
struct nfs_seqid * seqid;
- nfs4_stateid * stateid;
+ nfs4_stateid stateid;
};
struct nfs_locku_res {
--
2.1.0
Ensure that we test the lock stateid remained unchanged while we were
updating the VFS tracking of the byte range lock. Have the process
replay the lock to the server if we detect that was not the case.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 37 +++++++++++++++----------------------
include/linux/nfs_xdr.h | 1 +
2 files changed, 16 insertions(+), 22 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 41e7c2fc046e..9f6baf98942c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5420,9 +5420,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
switch (task->tk_status) {
case 0:
renew_lease(calldata->server, calldata->timestamp);
- nfs4_update_lock_stateid(calldata->lsp,
- &calldata->res.stateid);
- break;
+ do_vfs_lock(calldata->fl.fl_file, &calldata->fl);
+ if (nfs4_update_lock_stateid(calldata->lsp,
+ &calldata->res.stateid))
+ break;
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
case -NFS4ERR_STALE_STATEID:
@@ -5661,6 +5662,13 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
case 0:
renew_lease(NFS_SERVER(data->ctx->dentry->d_inode),
data->timestamp);
+ if (data->arg.new_lock) {
+ data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
+ if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) {
+ rpc_restart_call_prepare(task);
+ break;
+ }
+ }
if (data->arg.new_lock_owner != 0) {
nfs_confirm_seqid(&lsp->ls_seqid, 0);
nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid);
@@ -5760,7 +5768,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
if (recovery_type == NFS_LOCK_RECLAIM)
data->arg.reclaim = NFS_LOCK_RECLAIM;
nfs4_set_sequence_privileged(&data->arg.seq_args);
- }
+ } else
+ data->arg.new_lock = 1;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
@@ -5884,10 +5893,8 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- struct nfs4_state_owner *sp = state->owner;
struct nfs_inode *nfsi = NFS_I(state->inode);
unsigned char fl_flags = request->fl_flags;
- unsigned int seq;
int status = -ENOLCK;
if ((fl_flags & FL_POSIX) &&
@@ -5907,25 +5914,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
/* ...but avoid races with delegation recall... */
request->fl_flags = fl_flags & ~FL_SLEEP;
status = do_vfs_lock(request->fl_file, request);
- goto out_unlock;
- }
- seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
- up_read(&nfsi->rwsem);
- status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
- if (status != 0)
+ up_read(&nfsi->rwsem);
goto out;
- down_read(&nfsi->rwsem);
- if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
- status = -NFS4ERR_DELAY;
- goto out_unlock;
}
- /* Note: we always want to sleep here! */
- request->fl_flags = fl_flags | FL_SLEEP;
- if (do_vfs_lock(request->fl_file, request) < 0)
- printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock "
- "manager!\n", __func__);
-out_unlock:
up_read(&nfsi->rwsem);
+ status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
out:
request->fl_flags = fl_flags;
return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b6a6953c0f09..e5c3b620a609 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -422,6 +422,7 @@ struct nfs_lock_args {
struct nfs_lowner lock_owner;
unsigned char block : 1;
unsigned char reclaim : 1;
+ unsigned char new_lock : 1;
unsigned char new_lock_owner : 1;
};
--
2.1.0
Note, however, that we still serialise on the open stateid if the lock
stateid is unconfirmed. Hopefully that will not prove too much of a
burden for first time locks; it should leave the ability to parallelise
OPENs unchanged, since they no longer call the serialisation primitives.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9f6baf98942c..66befb0dd241 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5517,6 +5517,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
struct nfs_seqid *seqid;
struct nfs4_lock_state *lsp;
struct rpc_task *task;
+ struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
int status = 0;
unsigned char fl_flags = request->fl_flags;
@@ -5540,7 +5541,8 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
lsp = request->fl_u.nfs4_fl.owner;
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0)
goto out;
- seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
+ alloc_seqid = NFS_SERVER(inode)->nfs_client->cl_mvops->alloc_seqid;
+ seqid = alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
status = -ENOMEM;
if (IS_ERR(seqid))
goto out;
@@ -5575,6 +5577,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
struct nfs4_lockdata *p;
struct inode *inode = lsp->ls_state->inode;
struct nfs_server *server = NFS_SERVER(inode);
+ struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
p = kzalloc(sizeof(*p), gfp_mask);
if (p == NULL)
@@ -5585,7 +5588,8 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
if (IS_ERR(p->arg.open_seqid))
goto out_free;
- p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
+ alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid;
+ p->arg.lock_seqid = alloc_seqid(&lsp->ls_seqid, gfp_mask);
if (IS_ERR(p->arg.lock_seqid))
goto out_free_seqid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
--
2.1.0