LinuxLists.cc - [RFC PATCH 0/4] nfsd: add CB_NOTIFY

2016-08-02 19:09:46

Subject: [RFC PATCH 0/4] nfsd: add CB_NOTIFY_LOCK support

A small set of patches that should add CB_NOTIFY_LOCK support for knfsd.
The basic idea is to use FL_SLEEP to set blocks when a lock is contended,
and then queue a callback to issue a CB_NOTIFY_LOCK in the lm_notify op.

Per the RFC, we take no steps to reserve the lock for the client. This is
a simple notification to tell the client that it may want to poll for it
again.

It also takes steps to clean out old, abandoned blocks when the client
loses interest in obtaining the lock.

Only lightly tested so far, but it seems to do the right thing.
The client-side piece is the next step.

Jeff Layton (4):
nfsd: plumb in a CB_NOTIFY_LOCK operation
nfsd: have nfsd4_lock use blocking locks for v4.1+ locks
nfsd: add a LRU list for blocked locks
nfsd: set the MAY_NOTIFY_LOCK flag in OPEN replies

fs/nfsd/netns.h | 1 +
fs/nfsd/nfs4callback.c | 57 +++++++++++++
fs/nfsd/nfs4state.c | 209 ++++++++++++++++++++++++++++++++++++++++++----
fs/nfsd/state.h | 21 ++++-
fs/nfsd/xdr4cb.h | 9 ++
include/uapi/linux/nfs4.h | 5 +-
6 files changed, 281 insertions(+), 21 deletions(-)

--
2.7.4

2016-08-02 18:23:12

by Jeff Layton

[permalink] [raw]

Subject: [RFC PATCH 2/4] nfsd: have nfsd4_lock use blocking locks for v4.1+ locks

Create a new per-lockowner+per-inode structure that contains a
file_lock. Have nfsd4_lock add this structure to the lockowner's list
prior to setting the lock. Then call the vfs and request a blocking lock
(by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
back, then we dequeue the block structure and free it. When the next
lock request comes in, we'll look for an existing block for the same
filehandle and dequeue and reuse it if there is one.

When the lock comes free (a'la an lm_notify call), we dequeue it
from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
inform the client that it should retry the lock request.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++-----
fs/nfsd/state.h | 12 +++--
2 files changed, 147 insertions(+), 17 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 70d0b9b33031..38367201c14f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
static void free_session(struct nfsd4_session *);

static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;

static bool is_session_dead(struct nfsd4_session *ses)
{
@@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
spin_unlock(&nn->client_lock);
}

+static struct nfsd4_blocked_lock *
+find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
+ struct nfsd_net *nn)
+{
+ struct nfsd4_blocked_lock *cur, *found = NULL;
+
+ spin_lock(&nn->client_lock);
+ list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
+ if (fh_match(fh, &cur->nbl_fh)) {
+ list_del_init(&cur->nbl_list);
+ found = cur;
+ break;
+ }
+ }
+ spin_unlock(&nn->client_lock);
+ if (found)
+ posix_unblock_lock(&found->nbl_lock);
+ return found;
+}
+
+static struct nfsd4_blocked_lock *
+find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
+ struct nfsd_net *nn)
+{
+ struct nfsd4_blocked_lock *nbl;
+
+ nbl = find_blocked_lock(lo, fh, nn);
+ if (!nbl) {
+ nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
+ if (nbl) {
+ fh_copy_shallow(&nbl->nbl_fh, fh);
+ locks_init_lock(&nbl->nbl_lock);
+ nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
+ &nfsd4_cb_notify_lock_ops,
+ NFSPROC4_CLNT_CB_NOTIFY_LOCK);
+ }
+ }
+ return nbl;
+}
+
+static void
+free_blocked_lock(struct nfsd4_blocked_lock *nbl)
+{
+ locks_release_private(&nbl->nbl_lock);
+ kfree(nbl);
+}
+
+static int
+nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+ /*
+ * Since this is just an optimization, we don't try very hard if it
+ * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
+ * just quit trying on anything else.
+ */
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 1 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
+{
+ struct nfsd4_blocked_lock *nbl = container_of(cb,
+ struct nfsd4_blocked_lock, nbl_cb);
+
+ free_blocked_lock(nbl);
+}
+
+static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
+ .done = nfsd4_cb_notify_lock_done,
+ .release = nfsd4_cb_notify_lock_release,
+};
+
static inline struct nfs4_stateowner *
nfs4_get_stateowner(struct nfs4_stateowner *sop)
{
@@ -5296,7 +5375,29 @@ nfsd4_fl_put_owner(fl_owner_t owner)
nfs4_put_stateowner(&lo->lo_owner);
}

+static void
+nfsd4_lm_notify(struct file_lock *fl)
+{
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
+ struct net *net = lo->lo_owner.so_client->net;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd4_blocked_lock *nbl = container_of(fl,
+ struct nfsd4_blocked_lock, nbl_lock);
+ bool queue = false;
+
+ spin_lock(&nn->client_lock);
+ if (!list_empty(&nbl->nbl_list)) {
+ list_del_init(&nbl->nbl_list);
+ queue = true;
+ }
+ spin_unlock(&nn->client_lock);
+
+ if (queue)
+ nfsd4_run_cb(&nbl->nbl_cb);
+}
+
static const struct lock_manager_operations nfsd_posix_mng_ops = {
+ .lm_notify = nfsd4_lm_notify,
.lm_get_owner = nfsd4_fl_get_owner,
.lm_put_owner = nfsd4_fl_put_owner,
};
@@ -5394,6 +5495,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
if (!lo)
return NULL;
+ INIT_LIST_HEAD(&lo->lo_blocked);
INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
lo->lo_owner.so_is_open_owner = 0;
lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
@@ -5558,12 +5660,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *open_stp = NULL;
struct nfs4_file *fp;
struct file *filp = NULL;
+ struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
__be32 status = 0;
int lkflg;
int err;
bool new = false;
+ unsigned char fl_type;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);

@@ -5630,13 +5734,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!locks_in_grace(net) && lock->lk_reclaim)
goto out;

- file_lock = locks_alloc_lock();
- if (!file_lock) {
- dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
- status = nfserr_jukebox;
- goto out;
- }
-
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READ_LT:
@@ -5646,7 +5743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (filp)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
spin_unlock(&fp->fi_lock);
- file_lock->fl_type = F_RDLCK;
+ fl_type = F_RDLCK;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
@@ -5655,17 +5752,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (filp)
get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
spin_unlock(&fp->fi_lock);
- file_lock->fl_type = F_WRLCK;
+ fl_type = F_WRLCK;
break;
default:
status = nfserr_inval;
goto out;
}
+
if (!filp) {
status = nfserr_openmode;
goto out;
}

+ nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
+ if (!nbl) {
+ dprintk("NFSD: %s: unable to allocate block!\n", __func__);
+ status = nfserr_jukebox;
+ goto out;
+ }
+
+ file_lock = &nbl->nbl_lock;
+ file_lock->fl_type = fl_type;
file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
file_lock->fl_pid = current->tgid;
file_lock->fl_file = filp;
@@ -5682,18 +5789,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}

+ if (nfsd4_has_session(cstate)) {
+ file_lock->fl_flags |= FL_SLEEP;
+ spin_lock(&nn->client_lock);
+ list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
+ spin_unlock(&nn->client_lock);
+ }
+
err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
- switch (-err) {
+ switch (err) {
case 0: /* success! */
nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
status = 0;
break;
- case (EAGAIN): /* conflock holds conflicting lock */
+ case FILE_LOCK_DEFERRED:
+ nbl = NULL;
+ /* Fallthrough */
+ case EAGAIN: /* conflock holds conflicting lock */
status = nfserr_denied;
dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
nfs4_set_lock_denied(conflock, &lock->lk_denied);
break;
- case (EDEADLK):
+ case EDEADLK:
status = nfserr_deadlock;
break;
default:
@@ -5702,6 +5819,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
}
out:
+ if (nbl) {
+ /* dequeue it if we queued it before */
+ if (nfsd4_has_session(cstate)) {
+ spin_lock(&nn->client_lock);
+ list_del_init(&nbl->nbl_list);
+ spin_unlock(&nn->client_lock);
+ }
+ free_blocked_lock(nbl);
+ }
if (filp)
fput(filp);
if (lock_stp) {
@@ -5725,8 +5851,6 @@ out:
if (open_stp)
nfs4_put_stid(&open_stp->st_stid);
nfsd4_bump_seqid(cstate, status);
- if (file_lock)
- locks_free_lock(file_lock);
if (conflock)
locks_free_lock(conflock);
return status;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ecf3f4671654..056b0e4c485b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -440,11 +440,11 @@ struct nfs4_openowner {
/*
* Represents a generic "lockowner". Similar to an openowner. References to it
* are held by the lock stateids that are created on its behalf. This object is
- * a superset of the nfs4_stateowner struct (or would be if it needed any extra
- * fields).
+ * a superset of the nfs4_stateowner struct.
*/
struct nfs4_lockowner {
- struct nfs4_stateowner lo_owner; /* must be first element */
+ struct nfs4_stateowner lo_owner; /* must be first element */
+ struct list_head lo_blocked; /* blocked file_locks */
};

static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
@@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
return (s32)(a->si_generation - b->si_generation) > 0;
}

+/*
+ * When a client tries to get a lock on a file, we set one of these objects
+ * on the blocking lock. When the lock becomes free, we can then issue a
+ * CB_NOTIFY_LOCK to the server.
+ */
struct nfsd4_blocked_lock {
+ struct list_head nbl_list;
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
--
2.7.4

2016-08-02 18:38:57

by Jeff Layton

[permalink] [raw]

Subject: [RFC PATCH 4/4] nfsd: set the MAY_NOTIFY_LOCK flag in OPEN replies

If we are using v4.1+, then we can send notification when contended
locks become free. Inform the client of that fact.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4state.c | 6 ++++--
include/uapi/linux/nfs4.h | 5 +++--
2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6a81e6b86438..3cfb4c326060 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4493,9 +4493,11 @@ out:
* To finish the open response, we just need to set the rflags.
*/
open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
- if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
- !nfsd4_has_session(&resp->cstate))
+ if (nfsd4_has_session(&resp->cstate))
+ open->op_rflags |= NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK;
+ else if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED))
open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
+
if (dp)
nfs4_put_stid(&dp->dl_stid);
if (stp)
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 2b871e0858d9..4ae62796bfde 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -39,8 +39,9 @@
#define NFS4_FH_VOL_MIGRATION 0x0004
#define NFS4_FH_VOL_RENAME 0x0008

-#define NFS4_OPEN_RESULT_CONFIRM 0x0002
-#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
+#define NFS4_OPEN_RESULT_CONFIRM 0x0002
+#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
+#define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020

#define NFS4_SHARE_ACCESS_MASK 0x000F
#define NFS4_SHARE_ACCESS_READ 0x0001
--
2.7.4

2016-08-02 18:38:57

by Jeff Layton

[permalink] [raw]

Subject: [RFC PATCH 1/4] nfsd: plumb in a CB_NOTIFY_LOCK operation

Add the encoding/decoding for CB_NOTIFY_LOCK operations.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/nfs4callback.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/state.h | 7 +++++++
fs/nfsd/xdr4cb.h | 9 ++++++++
3 files changed, 73 insertions(+)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 04c68d900324..8fb6e0abaca3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -623,6 +623,62 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
}
#endif /* CONFIG_NFSD_PNFS */

+static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 8 + 4 + so->so_owner.len);
+ p = xdr_encode_opaque_fixed(p, &so->so_client->cl_clientid, 8);
+ xdr_encode_opaque(p, &so->so_owner, so->so_owner.len);
+}
+
+static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ const struct nfsd4_callback *cb)
+{
+ const struct nfsd4_blocked_lock *nbl =
+ container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
+ struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = 0,
+ .minorversion = cb->cb_minorversion,
+ };
+
+ __be32 *p;
+
+ BUG_ON(hdr.minorversion == 0);
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+
+ p = xdr_reserve_space(xdr, 4);
+ *p = cpu_to_be32(OP_CB_NOTIFY_LOCK);
+ encode_nfs_fh4(xdr, &nbl->nbl_fh);
+ encode_stateowner(xdr, &lo->lo_owner);
+ hdr.nops++;
+
+ encode_cb_nops(&hdr);
+}
+
+static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+
+ if (cb) {
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+ }
+ return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
+}
+
/*
* RPC procedure tables
*/
@@ -643,6 +699,7 @@ static struct rpc_procinfo nfs4_cb_procedures[] = {
#ifdef CONFIG_NFSD_PNFS
PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout),
#endif
+ PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
};

static struct rpc_version nfs_cb_version4 = {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 64053eadeb81..ecf3f4671654 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -571,6 +571,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_LAYOUT,
NFSPROC4_CLNT_CB_SEQUENCE,
+ NFSPROC4_CLNT_CB_NOTIFY_LOCK,
};

/* Returns true iff a is later than b: */
@@ -579,6 +580,12 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
return (s32)(a->si_generation - b->si_generation) > 0;
}

+struct nfsd4_blocked_lock {
+ struct file_lock nbl_lock;
+ struct knfsd_fh nbl_fh;
+ struct nfsd4_callback nbl_cb;
+};
+
struct nfsd4_compound_state;
struct nfsd_net;

diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index c47f6fdb111a..49b719dfef95 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -28,3 +28,12 @@
#define NFS4_dec_cb_layout_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+
+#define NFS4_enc_cb_notify_lock_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 2 + 1 + \
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \
+ enc_nfs4_fh_sz)
+#define NFS4_dec_cb_notify_lock_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + \
+ op_dec_sz)
--
2.7.4

2016-08-02 18:46:00

by Jeff Layton

[permalink] [raw]

Subject: Re: [RFC PATCH 2/4] nfsd: have nfsd4_lock use blocking locks for v4.1+ locks

On Tue, 2016-08-02 at 14:15 -0400, Jeff Layton wrote:
> Create a new per-lockowner+per-inode structure that contains a
> file_lock. Have nfsd4_lock add this structure to the lockowner's list
> prior to setting the lock. Then call the vfs and request a blocking lock
> (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
> back, then we dequeue the block structure and free it. When the next
> lock request comes in, we'll look for an existing block for the same
> filehandle and dequeue and reuse it if there is one.
>
> When the lock comes free (a'la an lm_notify call), we dequeue it
> from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
> inform the client that it should retry the lock request.
>
> > Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++-----
> fs/nfsd/state.h | 12 +++--
> 2 files changed, 147 insertions(+), 17 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 70d0b9b33031..38367201c14f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
> static void free_session(struct nfsd4_session *);
>
> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>
> static bool is_session_dead(struct nfsd4_session *ses)
> {
> @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
> > spin_unlock(&nn->client_lock);
> }
>
> +static struct nfsd4_blocked_lock *
> +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > + struct nfsd_net *nn)
> +{
> > + struct nfsd4_blocked_lock *cur, *found = NULL;
> +
> > + spin_lock(&nn->client_lock);
> > + list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
> > + if (fh_match(fh, &cur->nbl_fh)) {
> > + list_del_init(&cur->nbl_list);
> > + found = cur;
> > + break;
> > + }
> > + }
> > + spin_unlock(&nn->client_lock);
> > + if (found)
> > + posix_unblock_lock(&found->nbl_lock);
> > + return found;
> +}
> +
> +static struct nfsd4_blocked_lock *
> +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > + struct nfsd_net *nn)
> +{
> > + struct nfsd4_blocked_lock *nbl;
> +
> > + nbl = find_blocked_lock(lo, fh, nn);
> > + if (!nbl) {
> > + nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
> > + if (nbl) {
> > + fh_copy_shallow(&nbl->nbl_fh, fh);
> > + locks_init_lock(&nbl->nbl_lock);
> > + nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
> > + &nfsd4_cb_notify_lock_ops,
> > + NFSPROC4_CLNT_CB_NOTIFY_LOCK);
> > + }
> > + }
> > + return nbl;
> +}
> +
> +static void
> +free_blocked_lock(struct nfsd4_blocked_lock *nbl)
> +{
> > + locks_release_private(&nbl->nbl_lock);
> > + kfree(nbl);
> +}
> +
> +static int
> +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
> +{
> > + /*
> > + * Since this is just an optimization, we don't try very hard if it
> > + * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
> > + * just quit trying on anything else.
> > + */
> > + switch (task->tk_status) {
> > + case -NFS4ERR_DELAY:
> > + rpc_delay(task, 1 * HZ);
> > + return 0;
> > + default:
> > + return 1;
> > + }
> +}
> +
> +static void
> +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
> +{
> > > + struct nfsd4_blocked_lock *nbl = container_of(cb,
> > + struct nfsd4_blocked_lock, nbl_cb);
> +
> > + free_blocked_lock(nbl);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
> > > + .done = nfsd4_cb_notify_lock_done,
> > > + .release = nfsd4_cb_notify_lock_release,
> +};
> +
> static inline struct nfs4_stateowner *
> nfs4_get_stateowner(struct nfs4_stateowner *sop)
> {
> @@ -5296,7 +5375,29 @@ nfsd4_fl_put_owner(fl_owner_t owner)
> > nfs4_put_stateowner(&lo->lo_owner);
> }
>
> +static void
> +nfsd4_lm_notify(struct file_lock *fl)
> +{
> > > + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
> > > + struct net *net = lo->lo_owner.so_client->net;
> > > + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > > + struct nfsd4_blocked_lock *nbl = container_of(fl,
> > + struct nfsd4_blocked_lock, nbl_lock);
> > + bool queue = false;
> +
> > + spin_lock(&nn->client_lock);
> > + if (!list_empty(&nbl->nbl_list)) {
> > + list_del_init(&nbl->nbl_list);
> > + queue = true;
> > + }
> > + spin_unlock(&nn->client_lock);
> +
> > + if (queue)
> > + nfsd4_run_cb(&nbl->nbl_cb);
> +}
> +
> static const struct lock_manager_operations nfsd_posix_mng_ops = {
> > + .lm_notify = nfsd4_lm_notify,
> > .lm_get_owner = nfsd4_fl_get_owner,
> > .lm_put_owner = nfsd4_fl_put_owner,
> };
> @@ -5394,6 +5495,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
> > lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
> > if (!lo)
> > return NULL;
> > + INIT_LIST_HEAD(&lo->lo_blocked);
> > INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
> > lo->lo_owner.so_is_open_owner = 0;
> > lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
> @@ -5558,12 +5660,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > struct nfs4_ol_stateid *open_stp = NULL;
> > struct nfs4_file *fp;
> > struct file *filp = NULL;
> > + struct nfsd4_blocked_lock *nbl = NULL;
> > struct file_lock *file_lock = NULL;
> > struct file_lock *conflock = NULL;
> > __be32 status = 0;
> > int lkflg;
> > int err;
> > bool new = false;
> > + unsigned char fl_type;
> > struct net *net = SVC_NET(rqstp);
> > struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>
> @@ -5630,13 +5734,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > if (!locks_in_grace(net) && lock->lk_reclaim)
> > goto out;
>
> > - file_lock = locks_alloc_lock();
> > - if (!file_lock) {
> > - dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
> > - status = nfserr_jukebox;
> > - goto out;
> > - }
> -
> > fp = lock_stp->st_stid.sc_file;
> > switch (lock->lk_type) {
> > case NFS4_READ_LT:
> @@ -5646,7 +5743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > if (filp)
> > get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
> > spin_unlock(&fp->fi_lock);
> > - file_lock->fl_type = F_RDLCK;
> > + fl_type = F_RDLCK;
> > break;
> > case NFS4_WRITE_LT:
> > case NFS4_WRITEW_LT:
> @@ -5655,17 +5752,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > if (filp)
> > get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
> > spin_unlock(&fp->fi_lock);
> > - file_lock->fl_type = F_WRLCK;
> > + fl_type = F_WRLCK;
> > break;
> > default:
> > status = nfserr_inval;
> > goto out;
> > }
> +
> > if (!filp) {
> > status = nfserr_openmode;
> > goto out;
> > }
>
> > + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
> > + if (!nbl) {
> > + dprintk("NFSD: %s: unable to allocate block!\n", __func__);
> > + status = nfserr_jukebox;
> > + goto out;
> > + }
> +
> > + file_lock = &nbl->nbl_lock;
> > + file_lock->fl_type = fl_type;
> > file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
> > file_lock->fl_pid = current->tgid;
> > file_lock->fl_file = filp;
> @@ -5682,18 +5789,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > goto out;
> > }
>
> > + if (nfsd4_has_session(cstate)) {
> > + file_lock->fl_flags |= FL_SLEEP;
> > + spin_lock(&nn->client_lock);
> > + list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> > + spin_unlock(&nn->client_lock);
> > + }
> +
> > err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
> > - switch (-err) {
> > > + switch (err) {

Ouch, already see a bug here. Need to propagate the negative to the
other return codes.

> > > case 0: /* success! */
> > nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
> > status = 0;
> > break;
> > > - case (EAGAIN): /* conflock holds conflicting lock */
> > + case FILE_LOCK_DEFERRED:
> > + nbl = NULL;
> > + /* Fallthrough */
> > > + case EAGAIN: /* conflock holds conflicting lock */
> > status = nfserr_denied;
> > dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
> > nfs4_set_lock_denied(conflock, &lock->lk_denied);
> > break;
> > - case (EDEADLK):
> > + case EDEADLK:
> > status = nfserr_deadlock;
> > break;
> > default:
> @@ -5702,6 +5819,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > break;
> > }
> out:
> > + if (nbl) {
> > + /* dequeue it if we queued it before */
> > + if (nfsd4_has_session(cstate)) {
> > + spin_lock(&nn->client_lock);
> > + list_del_init(&nbl->nbl_list);
> > + spin_unlock(&nn->client_lock);
> > + }
> > + free_blocked_lock(nbl);
> > + }
> > if (filp)
> > fput(filp);
> > if (lock_stp) {
> @@ -5725,8 +5851,6 @@ out:
> > if (open_stp)
> > nfs4_put_stid(&open_stp->st_stid);
> > nfsd4_bump_seqid(cstate, status);
> > - if (file_lock)
> > - locks_free_lock(file_lock);
> > if (conflock)
> > locks_free_lock(conflock);
> > return status;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index ecf3f4671654..056b0e4c485b 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -440,11 +440,11 @@ struct nfs4_openowner {
> /*
> * Represents a generic "lockowner". Similar to an openowner. References to it
> * are held by the lock stateids that are created on its behalf. This object is
> - * a superset of the nfs4_stateowner struct (or would be if it needed any extra
> - * fields).
> + * a superset of the nfs4_stateowner struct.
> */
> struct nfs4_lockowner {
> > > - struct nfs4_stateowner lo_owner; /* must be first element */
> > > > + struct nfs4_stateowner lo_owner; /* must be first element */
> > > > + struct list_head lo_blocked; /* blocked file_locks */
> };
>
> static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
> @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > return (s32)(a->si_generation - b->si_generation) > 0;
> }
>
> +/*
> + * When a client tries to get a lock on a file, we set one of these objects
> + * on the blocking lock. When the lock becomes free, we can then issue a
> + * CB_NOTIFY_LOCK to the server.
> + */
> struct nfsd4_blocked_lock {
> > > + struct list_head nbl_list;
> > > struct file_lock nbl_lock;
> > > struct knfsd_fh nbl_fh;
> > > struct nfsd4_callback nbl_cb;

--
Jeff Layton <[email protected]>

2016-08-02 19:09:46

by Jeff Layton

[permalink] [raw]

Subject: [RFC PATCH 3/4] nfsd: add a LRU list for blocked locks

It's possible for a client to call in on a lock that is blocked for a
long time, but discontinue polling for it. A malicious client could
even set a lock on a file, and then spam the server with failing lock
requests from different lockowners that pile up in a DoS attack.

Add the blocked lock structures to a per-net namespace LRU when hashing
them, and timestamp them. If the lock request is not revisited after a
lease period, we'll drop it under the assumption that the client is no
longer interested.

This also gives us a mechanism to clean up these objects at server
shutdown time as well.

Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/netns.h | 1 +
fs/nfsd/nfs4state.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/state.h | 2 ++
3 files changed, 54 insertions(+)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 5fbf3bbd00d0..b10d557f9c9e 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -84,6 +84,7 @@ struct nfsd_net {
struct list_head client_lru;
struct list_head close_lru;
struct list_head del_recall_lru;
+ struct list_head blocked_locks_lru;

struct delayed_work laundromat_work;

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 38367201c14f..6a81e6b86438 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -221,6 +221,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
if (fh_match(fh, &cur->nbl_fh)) {
list_del_init(&cur->nbl_list);
+ list_del_init(&cur->nbl_lru);
found = cur;
break;
}
@@ -4583,6 +4584,7 @@ nfs4_laundromat(struct nfsd_net *nn)
struct nfs4_openowner *oo;
struct nfs4_delegation *dp;
struct nfs4_ol_stateid *stp;
+ struct nfsd4_blocked_lock *nbl;
struct list_head *pos, *next, reaplist;
time_t cutoff = get_seconds() - nn->nfsd4_lease;
time_t t, new_timeo = nn->nfsd4_lease;
@@ -4651,6 +4653,30 @@ nfs4_laundromat(struct nfsd_net *nn)
}
spin_unlock(&nn->client_lock);

+ INIT_LIST_HEAD(&reaplist);
+ spin_lock(&nn->client_lock);
+ while (!list_empty(&nn->blocked_locks_lru)) {
+ nbl = list_first_entry(&nn->blocked_locks_lru,
+ struct nfsd4_blocked_lock, nbl_lru);
+ if (time_after((unsigned long)nbl->nbl_time,
+ (unsigned long)cutoff)) {
+ t = nbl->nbl_time - cutoff;
+ new_timeo = min(new_timeo, t);
+ break;
+ }
+ list_move(&nbl->nbl_lru, &reaplist);
+ list_del_init(&nbl->nbl_list);
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&reaplist)) {
+ nbl = list_first_entry(&nn->blocked_locks_lru,
+ struct nfsd4_blocked_lock, nbl_lru);
+ list_del_init(&nbl->nbl_lru);
+ posix_unblock_lock(&nbl->nbl_lock);
+ free_blocked_lock(nbl);
+ }
+
new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
return new_timeo;
}
@@ -5385,9 +5411,11 @@ nfsd4_lm_notify(struct file_lock *fl)
struct nfsd4_blocked_lock, nbl_lock);
bool queue = false;

+ /* An empty list means that something else is going to be using it */
spin_lock(&nn->client_lock);
if (!list_empty(&nbl->nbl_list)) {
list_del_init(&nbl->nbl_list);
+ list_del_init(&nbl->nbl_lru);
queue = true;
}
spin_unlock(&nn->client_lock);
@@ -5791,8 +5819,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,

if (nfsd4_has_session(cstate)) {
file_lock->fl_flags |= FL_SLEEP;
+ nbl->nbl_time = jiffies;
spin_lock(&nn->client_lock);
list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
+ list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
spin_unlock(&nn->client_lock);
}

@@ -5824,6 +5854,7 @@ out:
if (nfsd4_has_session(cstate)) {
spin_lock(&nn->client_lock);
list_del_init(&nbl->nbl_list);
+ list_del_init(&nbl->nbl_lru);
spin_unlock(&nn->client_lock);
}
free_blocked_lock(nbl);
@@ -6849,6 +6880,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->client_lru);
INIT_LIST_HEAD(&nn->close_lru);
INIT_LIST_HEAD(&nn->del_recall_lru);
+ INIT_LIST_HEAD(&nn->blocked_locks_lru);
spin_lock_init(&nn->client_lock);

INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
@@ -6946,6 +6978,7 @@ nfs4_state_shutdown_net(struct net *net)
struct nfs4_delegation *dp = NULL;
struct list_head *pos, *next, reaplist;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd4_blocked_lock *nbl;

cancel_delayed_work_sync(&nn->laundromat_work);
locks_end_grace(&nn->nfsd4_manager);
@@ -6966,6 +6999,24 @@ nfs4_state_shutdown_net(struct net *net)
nfs4_put_stid(&dp->dl_stid);
}

+ BUG_ON(!list_empty(&reaplist));
+ spin_lock(&nn->client_lock);
+ while (!list_empty(&nn->blocked_locks_lru)) {
+ nbl = list_first_entry(&nn->blocked_locks_lru,
+ struct nfsd4_blocked_lock, nbl_lru);
+ list_move(&nbl->nbl_lru, &reaplist);
+ list_del_init(&nbl->nbl_list);
+ }
+ spin_unlock(&nn->client_lock);
+
+ while (!list_empty(&reaplist)) {
+ nbl = list_first_entry(&nn->blocked_locks_lru,
+ struct nfsd4_blocked_lock, nbl_lru);
+ list_del_init(&nbl->nbl_lru);
+ posix_unblock_lock(&nbl->nbl_lock);
+ free_blocked_lock(nbl);
+ }
+
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 056b0e4c485b..e495e74309b8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -587,6 +587,8 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
*/
struct nfsd4_blocked_lock {
struct list_head nbl_list;
+ struct list_head nbl_lru;
+ unsigned long nbl_time;
struct file_lock nbl_lock;
struct knfsd_fh nbl_fh;
struct nfsd4_callback nbl_cb;
--
2.7.4

2016-08-02 20:43:15

by J. Bruce Fields

[permalink] [raw]

Subject: Re: [RFC PATCH 2/4] nfsd: have nfsd4_lock use blocking locks for v4.1+ locks

On Tue, Aug 02, 2016 at 02:15:29PM -0400, Jeff Layton wrote:
> Create a new per-lockowner+per-inode structure that contains a
> file_lock. Have nfsd4_lock add this structure to the lockowner's list
> prior to setting the lock. Then call the vfs and request a blocking lock
> (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
> back, then we dequeue the block structure and free it. When the next
> lock request comes in, we'll look for an existing block for the same
> filehandle and dequeue and reuse it if there is one.

I'm probably just missing it because I only skimmed the patch quickly,
but I don't see you distinguishing the blocking and non-blocking case.
I think we only want to do this in the {READ,WRITE}W_LT case, as
{READ,WRITE}_LT is a hint from the client that it's not going to wait
for the lock. Sending a notify without a preceding blocking request is
probably a (relatively harmless) protocol bug.

--b.

>
> When the lock comes free (a'la an lm_notify call), we dequeue it
> from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
> inform the client that it should retry the lock request.
>
> Signed-off-by: Jeff Layton <[email protected]>
> ---
> fs/nfsd/nfs4state.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++-----
> fs/nfsd/state.h | 12 +++--
> 2 files changed, 147 insertions(+), 17 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 70d0b9b33031..38367201c14f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
> static void free_session(struct nfsd4_session *);
>
> static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>
> static bool is_session_dead(struct nfsd4_session *ses)
> {
> @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
> spin_unlock(&nn->client_lock);
> }
>
> +static struct nfsd4_blocked_lock *
> +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> + struct nfsd_net *nn)
> +{
> + struct nfsd4_blocked_lock *cur, *found = NULL;
> +
> + spin_lock(&nn->client_lock);
> + list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
> + if (fh_match(fh, &cur->nbl_fh)) {
> + list_del_init(&cur->nbl_list);
> + found = cur;
> + break;
> + }
> + }
> + spin_unlock(&nn->client_lock);
> + if (found)
> + posix_unblock_lock(&found->nbl_lock);
> + return found;
> +}
> +
> +static struct nfsd4_blocked_lock *
> +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> + struct nfsd_net *nn)
> +{
> + struct nfsd4_blocked_lock *nbl;
> +
> + nbl = find_blocked_lock(lo, fh, nn);
> + if (!nbl) {
> + nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
> + if (nbl) {
> + fh_copy_shallow(&nbl->nbl_fh, fh);
> + locks_init_lock(&nbl->nbl_lock);
> + nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
> + &nfsd4_cb_notify_lock_ops,
> + NFSPROC4_CLNT_CB_NOTIFY_LOCK);
> + }
> + }
> + return nbl;
> +}
> +
> +static void
> +free_blocked_lock(struct nfsd4_blocked_lock *nbl)
> +{
> + locks_release_private(&nbl->nbl_lock);
> + kfree(nbl);
> +}
> +
> +static int
> +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
> +{
> + /*
> + * Since this is just an optimization, we don't try very hard if it
> + * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
> + * just quit trying on anything else.
> + */
> + switch (task->tk_status) {
> + case -NFS4ERR_DELAY:
> + rpc_delay(task, 1 * HZ);
> + return 0;
> + default:
> + return 1;
> + }
> +}
> +
> +static void
> +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
> +{
> + struct nfsd4_blocked_lock *nbl = container_of(cb,
> + struct nfsd4_blocked_lock, nbl_cb);
> +
> + free_blocked_lock(nbl);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
> + .done = nfsd4_cb_notify_lock_done,
> + .release = nfsd4_cb_notify_lock_release,
> +};
> +
> static inline struct nfs4_stateowner *
> nfs4_get_stateowner(struct nfs4_stateowner *sop)
> {
> @@ -5296,7 +5375,29 @@ nfsd4_fl_put_owner(fl_owner_t owner)
> nfs4_put_stateowner(&lo->lo_owner);
> }
>
> +static void
> +nfsd4_lm_notify(struct file_lock *fl)
> +{
> + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
> + struct net *net = lo->lo_owner.so_client->net;
> + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> + struct nfsd4_blocked_lock *nbl = container_of(fl,
> + struct nfsd4_blocked_lock, nbl_lock);
> + bool queue = false;
> +
> + spin_lock(&nn->client_lock);
> + if (!list_empty(&nbl->nbl_list)) {
> + list_del_init(&nbl->nbl_list);
> + queue = true;
> + }
> + spin_unlock(&nn->client_lock);
> +
> + if (queue)
> + nfsd4_run_cb(&nbl->nbl_cb);
> +}
> +
> static const struct lock_manager_operations nfsd_posix_mng_ops = {
> + .lm_notify = nfsd4_lm_notify,
> .lm_get_owner = nfsd4_fl_get_owner,
> .lm_put_owner = nfsd4_fl_put_owner,
> };
> @@ -5394,6 +5495,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
> lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
> if (!lo)
> return NULL;
> + INIT_LIST_HEAD(&lo->lo_blocked);
> INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
> lo->lo_owner.so_is_open_owner = 0;
> lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
> @@ -5558,12 +5660,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> struct nfs4_ol_stateid *open_stp = NULL;
> struct nfs4_file *fp;
> struct file *filp = NULL;
> + struct nfsd4_blocked_lock *nbl = NULL;
> struct file_lock *file_lock = NULL;
> struct file_lock *conflock = NULL;
> __be32 status = 0;
> int lkflg;
> int err;
> bool new = false;
> + unsigned char fl_type;
> struct net *net = SVC_NET(rqstp);
> struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>
> @@ -5630,13 +5734,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if (!locks_in_grace(net) && lock->lk_reclaim)
> goto out;
>
> - file_lock = locks_alloc_lock();
> - if (!file_lock) {
> - dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
> - status = nfserr_jukebox;
> - goto out;
> - }
> -
> fp = lock_stp->st_stid.sc_file;
> switch (lock->lk_type) {
> case NFS4_READ_LT:
> @@ -5646,7 +5743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if (filp)
> get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
> spin_unlock(&fp->fi_lock);
> - file_lock->fl_type = F_RDLCK;
> + fl_type = F_RDLCK;
> break;
> case NFS4_WRITE_LT:
> case NFS4_WRITEW_LT:
> @@ -5655,17 +5752,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> if (filp)
> get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
> spin_unlock(&fp->fi_lock);
> - file_lock->fl_type = F_WRLCK;
> + fl_type = F_WRLCK;
> break;
> default:
> status = nfserr_inval;
> goto out;
> }
> +
> if (!filp) {
> status = nfserr_openmode;
> goto out;
> }
>
> + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
> + if (!nbl) {
> + dprintk("NFSD: %s: unable to allocate block!\n", __func__);
> + status = nfserr_jukebox;
> + goto out;
> + }
> +
> + file_lock = &nbl->nbl_lock;
> + file_lock->fl_type = fl_type;
> file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
> file_lock->fl_pid = current->tgid;
> file_lock->fl_file = filp;
> @@ -5682,18 +5789,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> goto out;
> }
>
> + if (nfsd4_has_session(cstate)) {
> + file_lock->fl_flags |= FL_SLEEP;
> + spin_lock(&nn->client_lock);
> + list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> + spin_unlock(&nn->client_lock);
> + }
> +
> err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
> - switch (-err) {
> + switch (err) {
> case 0: /* success! */
> nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
> status = 0;
> break;
> - case (EAGAIN): /* conflock holds conflicting lock */
> + case FILE_LOCK_DEFERRED:
> + nbl = NULL;
> + /* Fallthrough */
> + case EAGAIN: /* conflock holds conflicting lock */
> status = nfserr_denied;
> dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
> nfs4_set_lock_denied(conflock, &lock->lk_denied);
> break;
> - case (EDEADLK):
> + case EDEADLK:
> status = nfserr_deadlock;
> break;
> default:
> @@ -5702,6 +5819,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> break;
> }
> out:
> + if (nbl) {
> + /* dequeue it if we queued it before */
> + if (nfsd4_has_session(cstate)) {
> + spin_lock(&nn->client_lock);
> + list_del_init(&nbl->nbl_list);
> + spin_unlock(&nn->client_lock);
> + }
> + free_blocked_lock(nbl);
> + }
> if (filp)
> fput(filp);
> if (lock_stp) {
> @@ -5725,8 +5851,6 @@ out:
> if (open_stp)
> nfs4_put_stid(&open_stp->st_stid);
> nfsd4_bump_seqid(cstate, status);
> - if (file_lock)
> - locks_free_lock(file_lock);
> if (conflock)
> locks_free_lock(conflock);
> return status;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index ecf3f4671654..056b0e4c485b 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -440,11 +440,11 @@ struct nfs4_openowner {
> /*
> * Represents a generic "lockowner". Similar to an openowner. References to it
> * are held by the lock stateids that are created on its behalf. This object is
> - * a superset of the nfs4_stateowner struct (or would be if it needed any extra
> - * fields).
> + * a superset of the nfs4_stateowner struct.
> */
> struct nfs4_lockowner {
> - struct nfs4_stateowner lo_owner; /* must be first element */
> + struct nfs4_stateowner lo_owner; /* must be first element */
> + struct list_head lo_blocked; /* blocked file_locks */
> };
>
> static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
> @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> return (s32)(a->si_generation - b->si_generation) > 0;
> }
>
> +/*
> + * When a client tries to get a lock on a file, we set one of these objects
> + * on the blocking lock. When the lock becomes free, we can then issue a
> + * CB_NOTIFY_LOCK to the server.
> + */
> struct nfsd4_blocked_lock {
> + struct list_head nbl_list;
> struct file_lock nbl_lock;
> struct knfsd_fh nbl_fh;
> struct nfsd4_callback nbl_cb;
> --
> 2.7.4

2016-08-02 21:27:17

by Jeff Layton

[permalink] [raw]

Subject: Re: [RFC PATCH 0/4] nfsd: add CB_NOTIFY_LOCK support

On Tue, 2016-08-02 at 16:38 -0400, J. Bruce Fields wrote:
> On Tue, Aug 02, 2016 at 02:15:27PM -0400, Jeff Layton wrote:
> >
> > A small set of patches that should add CB_NOTIFY_LOCK support for knfsd.
> > The basic idea is to use FL_SLEEP to set blocks when a lock is contended,
> > and then queue a callback to issue a CB_NOTIFY_LOCK in the lm_notify op.
> >
> > Per the RFC, we take no steps to reserve the lock for the client. This is
> > a simple notification to tell the client that it may want to poll for it
> > again.
> >
> > It also takes steps to clean out old, abandoned blocks when the client
> > loses interest in obtaining the lock.
>
> I had to double-check the spec language here: ok, 5661 9.6 does
> explicitly say that clients can't depend on the notify, so they do need
> to continue polling--so that cleanup doesn't risk leaving clients
> blocking indefinitely.  Might be worth a comment referencing that
> language.
>

Yep, exactly. This is 100% a best-effort kind of thing. The clients
have to poll regardless of whether we send a notification.

> > Looks like you're not attempting any sort of fair queueing, so clients
> that get a notify just race for the lock?  That's fine.
>

Yep. I don't think that we can reasonably do anything more
sophisticated. Hopefully the delay between callbacks will be enough to
prevent the herd from thundering too badly.

> > Thanks for looking at this!
>
> --b.
>

No problem. It has been on my to-do list for a while now...

> >
> >
> > Only lightly tested so far, but it seems to do the right thing.
> > The client-side piece is the next step.
> >
> > Jeff Layton (4):
> > nfsd: plumb in a CB_NOTIFY_LOCK operation
> > nfsd: have nfsd4_lock use blocking locks for v4.1+ locks
> > nfsd: add a LRU list for blocked locks
> > nfsd: set the MAY_NOTIFY_LOCK flag in OPEN replies
> >
> > fs/nfsd/netns.h           |   1 +
> > fs/nfsd/nfs4callback.c    |  57 +++++++++++++
> > fs/nfsd/nfs4state.c       | 209 ++++++++++++++++++++++++++++++++++++++++++----
> > fs/nfsd/state.h           |  21 ++++-
> > fs/nfsd/xdr4cb.h          |   9 ++
> > include/uapi/linux/nfs4.h |   5 +-
> > 6 files changed, 281 insertions(+), 21 deletions(-)
> >
> > --
> > 2.7.4

--
Jeff Layton <[email protected]>

2016-08-02 21:30:29

by Jeff Layton

[permalink] [raw]

Subject: Re: [RFC PATCH 2/4] nfsd: have nfsd4_lock use blocking locks for v4.1+ locks

On Tue, 2016-08-02 at 16:43 -0400, J. Bruce Fields wrote:
> On Tue, Aug 02, 2016 at 02:15:29PM -0400, Jeff Layton wrote:
> >
> > Create a new per-lockowner+per-inode structure that contains a
> > file_lock. Have nfsd4_lock add this structure to the lockowner's list
> > prior to setting the lock. Then call the vfs and request a blocking lock
> > (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
> > back, then we dequeue the block structure and free it. When the next
> > lock request comes in, we'll look for an existing block for the same
> > filehandle and dequeue and reuse it if there is one.
>
> I'm probably just missing it because I only skimmed the patch quickly,
> but I don't see you distinguishing the blocking and non-blocking case.
> I think we only want to do this in the {READ,WRITE}W_LT case, as
> {READ,WRITE}_LT is a hint from the client that it's not going to wait
> for the lock.  Sending a notify without a preceding blocking request is
> probably a (relatively harmless) protocol bug.
>
> --b.
>

Ahh good point, I'm not distinguishing those cases currently. I'll fix
that up in the next iteration. And yeah, in the event that the client
isn't actually waiting on the lock, it should just ignore the
notification. Still, best not to notify unless something is waiting for
it.

I've not yet coded up the client-side part of this yet either, so for
now the client just replies with NFS4ERR_NOTSUPP anyway. Adding that is
next...

Thanks for having a look!

> >
> >
> > When the lock comes free (a'la an lm_notify call), we dequeue it
> > from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
> > inform the client that it should retry the lock request.
> >
> > > > Signed-off-by: Jeff Layton <[email protected]>
> > ---
> > fs/nfsd/nfs4state.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++-----
> > fs/nfsd/state.h     |  12 +++--
> > 2 files changed, 147 insertions(+), 17 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 70d0b9b33031..38367201c14f 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
> > static void free_session(struct nfsd4_session *);
> >
> > static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
> >
> > static bool is_session_dead(struct nfsd4_session *ses)
> > {
> > @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
> > > > spin_unlock(&nn->client_lock);
> > }
> >
> > +static struct nfsd4_blocked_lock *
> > +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > > > + struct nfsd_net *nn)
> > +{
> > > > + struct nfsd4_blocked_lock *cur, *found = NULL;
> > +
> > > > + spin_lock(&nn->client_lock);
> > > > + list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
> > > > + if (fh_match(fh, &cur->nbl_fh)) {
> > > > + list_del_init(&cur->nbl_list);
> > > > + found = cur;
> > > > + break;
> > > > + }
> > > > + }
> > > > + spin_unlock(&nn->client_lock);
> > > > + if (found)
> > > > + posix_unblock_lock(&found->nbl_lock);
> > > > + return found;
> > +}
> > +
> > +static struct nfsd4_blocked_lock *
> > +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > > > + struct nfsd_net *nn)
> > +{
> > > > + struct nfsd4_blocked_lock *nbl;
> > +
> > > > + nbl = find_blocked_lock(lo, fh, nn);
> > > > + if (!nbl) {
> > > > + nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
> > > > + if (nbl) {
> > > > + fh_copy_shallow(&nbl->nbl_fh, fh);
> > > > + locks_init_lock(&nbl->nbl_lock);
> > > > + nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
> > > > + &nfsd4_cb_notify_lock_ops,
> > > > + NFSPROC4_CLNT_CB_NOTIFY_LOCK);
> > > > + }
> > > > + }
> > > > + return nbl;
> > +}
> > +
> > +static void
> > +free_blocked_lock(struct nfsd4_blocked_lock *nbl)
> > +{
> > > > + locks_release_private(&nbl->nbl_lock);
> > > > + kfree(nbl);
> > +}
> > +
> > +static int
> > +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
> > +{
> > > > + /*
> > > > + * Since this is just an optimization, we don't try very hard if it
> > > > + * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
> > > > + * just quit trying on anything else.
> > > > + */
> > > > + switch (task->tk_status) {
> > > > + case -NFS4ERR_DELAY:
> > > > + rpc_delay(task, 1 * HZ);
> > > > + return 0;
> > > > + default:
> > > > + return 1;
> > > > + }
> > +}
> > +
> > +static void
> > +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
> > +{
> > > > > > + struct nfsd4_blocked_lock *nbl = container_of(cb,
> > > > + struct nfsd4_blocked_lock, nbl_cb);
> > +
> > > > + free_blocked_lock(nbl);
> > +}
> > +
> > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
> > > > > > + .done = nfsd4_cb_notify_lock_done,
> > > > > > + .release = nfsd4_cb_notify_lock_release,
> > +};
> > +
> > static inline struct nfs4_stateowner *
> > nfs4_get_stateowner(struct nfs4_stateowner *sop)
> > {
> > @@ -5296,7 +5375,29 @@ nfsd4_fl_put_owner(fl_owner_t owner)
> > > > nfs4_put_stateowner(&lo->lo_owner);
> > }
> >
> > +static void
> > +nfsd4_lm_notify(struct file_lock *fl)
> > +{
> > > > > > + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner;
> > > > > > + struct net *net = lo->lo_owner.so_client->net;
> > > > > > + struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> > > > > > + struct nfsd4_blocked_lock *nbl = container_of(fl,
> > > > + struct nfsd4_blocked_lock, nbl_lock);
> > > > + bool queue = false;
> > +
> > > > + spin_lock(&nn->client_lock);
> > > > + if (!list_empty(&nbl->nbl_list)) {
> > > > + list_del_init(&nbl->nbl_list);
> > > > + queue = true;
> > > > + }
> > > > + spin_unlock(&nn->client_lock);
> > +
> > > > + if (queue)
> > > > + nfsd4_run_cb(&nbl->nbl_cb);
> > +}
> > +
> > static const struct lock_manager_operations nfsd_posix_mng_ops  = {
> > > > + .lm_notify = nfsd4_lm_notify,
> > > > .lm_get_owner = nfsd4_fl_get_owner,
> > > > .lm_put_owner = nfsd4_fl_put_owner,
> > };
> > @@ -5394,6 +5495,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
> > > > lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
> > > > if (!lo)
> > > > return NULL;
> > > > + INIT_LIST_HEAD(&lo->lo_blocked);
> > > > INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
> > > > lo->lo_owner.so_is_open_owner = 0;
> > > > lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
> > @@ -5558,12 +5660,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > struct nfs4_ol_stateid *open_stp = NULL;
> > > > struct nfs4_file *fp;
> > > > struct file *filp = NULL;
> > > > + struct nfsd4_blocked_lock *nbl = NULL;
> > > > struct file_lock *file_lock = NULL;
> > > > struct file_lock *conflock = NULL;
> > > > __be32 status = 0;
> > > > int lkflg;
> > > > int err;
> > > > bool new = false;
> > > > + unsigned char fl_type;
> > > > struct net *net = SVC_NET(rqstp);
> > > > struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> >
> > @@ -5630,13 +5734,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > if (!locks_in_grace(net) && lock->lk_reclaim)
> > > > goto out;
> >
> > > > - file_lock = locks_alloc_lock();
> > > > - if (!file_lock) {
> > > > - dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
> > > > - status = nfserr_jukebox;
> > > > - goto out;
> > > > - }
> > -
> > > > fp = lock_stp->st_stid.sc_file;
> > > > switch (lock->lk_type) {
> > > > case NFS4_READ_LT:
> > @@ -5646,7 +5743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > if (filp)
> > > > get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
> > > > spin_unlock(&fp->fi_lock);
> > > > - file_lock->fl_type = F_RDLCK;
> > > > + fl_type = F_RDLCK;
> > > > break;
> > > > case NFS4_WRITE_LT:
> > > > case NFS4_WRITEW_LT:
> > @@ -5655,17 +5752,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > if (filp)
> > > > get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
> > > > spin_unlock(&fp->fi_lock);
> > > > - file_lock->fl_type = F_WRLCK;
> > > > + fl_type = F_WRLCK;
> > > > break;
> > > > default:
> > > > status = nfserr_inval;
> > > > goto out;
> > > > }
> > +
> > > > if (!filp) {
> > > > status = nfserr_openmode;
> > > > goto out;
> > > > }
> >
> > > > + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
> > > > + if (!nbl) {
> > > > + dprintk("NFSD: %s: unable to allocate block!\n", __func__);
> > > > + status = nfserr_jukebox;
> > > > + goto out;
> > > > + }
> > +
> > > > + file_lock = &nbl->nbl_lock;
> > > > + file_lock->fl_type = fl_type;
> > > > file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
> > > > file_lock->fl_pid = current->tgid;
> > > > file_lock->fl_file = filp;
> > @@ -5682,18 +5789,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > goto out;
> > > > }
> >
> > > > + if (nfsd4_has_session(cstate)) {
> > > > + file_lock->fl_flags |= FL_SLEEP;
> > > > + spin_lock(&nn->client_lock);
> > > > + list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> > > > + spin_unlock(&nn->client_lock);
> > > > + }
> > +
> > > > err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
> > > > - switch (-err) {
> > > > + switch (err) {
> > > > case 0: /* success! */
> > > > nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
> > > > status = 0;
> > > > break;
> > > > > > - case (EAGAIN): /* conflock holds conflicting lock */
> > > > + case FILE_LOCK_DEFERRED:
> > > > + nbl = NULL;
> > > > + /* Fallthrough */
> > > > > > + case EAGAIN: /* conflock holds conflicting lock */
> > > > status = nfserr_denied;
> > > > dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
> > > > nfs4_set_lock_denied(conflock, &lock->lk_denied);
> > > > break;
> > > > - case (EDEADLK):
> > > > + case EDEADLK:
> > > > status = nfserr_deadlock;
> > > > break;
> > > > default:
> > @@ -5702,6 +5819,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > > > break;
> > > > }
> > out:
> > > > + if (nbl) {
> > > > + /* dequeue it if we queued it before */
> > > > + if (nfsd4_has_session(cstate)) {
> > > > + spin_lock(&nn->client_lock);
> > > > + list_del_init(&nbl->nbl_list);
> > > > + spin_unlock(&nn->client_lock);
> > > > + }
> > > > + free_blocked_lock(nbl);
> > > > + }
> > > > if (filp)
> > > > fput(filp);
> > > > if (lock_stp) {
> > @@ -5725,8 +5851,6 @@ out:
> > > > if (open_stp)
> > > > nfs4_put_stid(&open_stp->st_stid);
> > > > nfsd4_bump_seqid(cstate, status);
> > > > - if (file_lock)
> > > > - locks_free_lock(file_lock);
> > > > if (conflock)
> > > > locks_free_lock(conflock);
> > > > return status;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index ecf3f4671654..056b0e4c485b 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -440,11 +440,11 @@ struct nfs4_openowner {
> > /*
> > * Represents a generic "lockowner". Similar to an openowner. References to it
> > * are held by the lock stateids that are created on its behalf. This object is
> > - * a superset of the nfs4_stateowner struct (or would be if it needed any extra
> > - * fields).
> > + * a superset of the nfs4_stateowner struct.
> > */
> > struct nfs4_lockowner {
> > > > > > - struct nfs4_stateowner lo_owner; /* must be first element */
> > > > > > > > + struct nfs4_stateowner lo_owner; /* must be first element */
> > > > > > > > + struct list_head lo_blocked; /* blocked file_locks */
> > };
> >
> > static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
> > @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> > > > return (s32)(a->si_generation - b->si_generation) > 0;
> > }
> >
> > +/*
> > + * When a client tries to get a lock on a file, we set one of these objects
> > + * on the blocking lock. When the lock becomes free, we can then issue a
> > + * CB_NOTIFY_LOCK to the server.
> > + */
> > struct nfsd4_blocked_lock {
> > > > > > + struct list_head nbl_list;
> > > > > > struct file_lock nbl_lock;
> > > > > > struct knfsd_fh nbl_fh;
> > > > > > struct nfsd4_callback nbl_cb;
> > --
> > 2.7.4

--
Jeff Layton <[email protected]>

2016-08-02 21:34:52

by J. Bruce Fields

[permalink] [raw]

Subject: Re: [RFC PATCH 0/4] nfsd: add CB_NOTIFY_LOCK support

On Tue, Aug 02, 2016 at 02:15:27PM -0400, Jeff Layton wrote:
> A small set of patches that should add CB_NOTIFY_LOCK support for knfsd.
> The basic idea is to use FL_SLEEP to set blocks when a lock is contended,
> and then queue a callback to issue a CB_NOTIFY_LOCK in the lm_notify op.
>
> Per the RFC, we take no steps to reserve the lock for the client. This is
> a simple notification to tell the client that it may want to poll for it
> again.
>
> It also takes steps to clean out old, abandoned blocks when the client
> loses interest in obtaining the lock.

I had to double-check the spec language here: ok, 5661 9.6 does
explicitly say that clients can't depend on the notify, so they do need
to continue polling--so that cleanup doesn't risk leaving clients
blocking indefinitely. Might be worth a comment referencing that
language.

Looks like you're not attempting any sort of fair queueing, so clients
that get a notify just race for the lock? That's fine.

Thanks for looking at this!

--b.

>
> Only lightly tested so far, but it seems to do the right thing.
> The client-side piece is the next step.
>
> Jeff Layton (4):
> nfsd: plumb in a CB_NOTIFY_LOCK operation
> nfsd: have nfsd4_lock use blocking locks for v4.1+ locks
> nfsd: add a LRU list for blocked locks
> nfsd: set the MAY_NOTIFY_LOCK flag in OPEN replies
>
> fs/nfsd/netns.h | 1 +
> fs/nfsd/nfs4callback.c | 57 +++++++++++++
> fs/nfsd/nfs4state.c | 209 ++++++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/state.h | 21 ++++-
> fs/nfsd/xdr4cb.h | 9 ++
> include/uapi/linux/nfs4.h | 5 +-
> 6 files changed, 281 insertions(+), 21 deletions(-)
>
> --
> 2.7.4