Return-Path: Received: from mail-qk0-f169.google.com ([209.85.220.169]:36829 "EHLO mail-qk0-f169.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752337AbcHBSqA (ORCPT ); Tue, 2 Aug 2016 14:46:00 -0400 Received: by mail-qk0-f169.google.com with SMTP id v123so50500531qkh.3 for ; Tue, 02 Aug 2016 11:46:00 -0700 (PDT) Message-ID: <1470163555.15226.1.camel@redhat.com> Subject: Re: [RFC PATCH 2/4] nfsd: have nfsd4_lock use blocking locks for v4.1+ locks From: Jeff Layton To: linux-nfs@vger.kernel.org Cc: bfields@fieldses.org Date: Tue, 02 Aug 2016 14:45:55 -0400 In-Reply-To: <1470161731-11301-3-git-send-email-jlayton@redhat.com> References: <1470161731-11301-1-git-send-email-jlayton@redhat.com> <1470161731-11301-3-git-send-email-jlayton@redhat.com> Content-Type: text/plain; charset="UTF-8" Mime-Version: 1.0 Sender: linux-nfs-owner@vger.kernel.org List-ID: On Tue, 2016-08-02 at 14:15 -0400, Jeff Layton wrote: > Create a new per-lockowner+per-inode structure that contains a > file_lock. Have nfsd4_lock add this structure to the lockowner's list > prior to setting the lock. Then call the vfs and request a blocking lock > (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED > back, then we dequeue the block structure and free it. When the next > lock request comes in, we'll look for an existing block for the same > filehandle and dequeue and reuse it if there is one. > > When the lock comes free (a'la an lm_notify call), we dequeue it > from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to > inform the client that it should retry the lock request. > > > Signed-off-by: Jeff Layton > --- >  fs/nfsd/nfs4state.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++----- >  fs/nfsd/state.h     |  12 +++-- >  2 files changed, 147 insertions(+), 17 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 70d0b9b33031..38367201c14f 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab; >  static void free_session(struct nfsd4_session *); >   >  static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; >   >  static bool is_session_dead(struct nfsd4_session *ses) >  { > @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses) > >   spin_unlock(&nn->client_lock); >  } >   > +static struct nfsd4_blocked_lock * > +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, > > + struct nfsd_net *nn) > +{ > > + struct nfsd4_blocked_lock *cur, *found = NULL; > + > > + spin_lock(&nn->client_lock); > > + list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { > > + if (fh_match(fh, &cur->nbl_fh)) { > > + list_del_init(&cur->nbl_list); > > + found = cur; > > + break; > > + } > > + } > > + spin_unlock(&nn->client_lock); > > + if (found) > > + posix_unblock_lock(&found->nbl_lock); > > + return found; > +} > + > +static struct nfsd4_blocked_lock * > +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh, > > + struct nfsd_net *nn) > +{ > > + struct nfsd4_blocked_lock *nbl; > + > > + nbl = find_blocked_lock(lo, fh, nn); > > + if (!nbl) { > > + nbl= kmalloc(sizeof(*nbl), GFP_KERNEL); > > + if (nbl) { > > + fh_copy_shallow(&nbl->nbl_fh, fh); > > + locks_init_lock(&nbl->nbl_lock); > > + nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client, > > + &nfsd4_cb_notify_lock_ops, > > + NFSPROC4_CLNT_CB_NOTIFY_LOCK); > > + } > > + } > > + return nbl; > +} > + > +static void > +free_blocked_lock(struct nfsd4_blocked_lock *nbl) > +{ > > + locks_release_private(&nbl->nbl_lock); > > + kfree(nbl); > +} > + > +static int > +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) > +{ > > + /* > > +  * Since this is just an optimization, we don't try very hard if it > > +  * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and > > +  * just quit trying on anything else. > > +  */ > > + switch (task->tk_status) { > > + case -NFS4ERR_DELAY: > > + rpc_delay(task, 1 * HZ); > > + return 0; > > + default: > > + return 1; > > + } > +} > + > +static void > +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb) > +{ > > > + struct nfsd4_blocked_lock *nbl = container_of(cb, > > + struct nfsd4_blocked_lock, nbl_cb); > + > > + free_blocked_lock(nbl); > +} > + > +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = { > > > + .done = nfsd4_cb_notify_lock_done, > > > + .release = nfsd4_cb_notify_lock_release, > +}; > + >  static inline struct nfs4_stateowner * >  nfs4_get_stateowner(struct nfs4_stateowner *sop) >  { > @@ -5296,7 +5375,29 @@ nfsd4_fl_put_owner(fl_owner_t owner) > >   nfs4_put_stateowner(&lo->lo_owner); >  } >   > +static void > +nfsd4_lm_notify(struct file_lock *fl) > +{ > > > + struct nfs4_lockowner *lo = (struct nfs4_lockowner *)fl->fl_owner; > > > + struct net *net = lo->lo_owner.so_client->net; > > > + struct nfsd_net *nn = net_generic(net, nfsd_net_id); > > > + struct nfsd4_blocked_lock *nbl = container_of(fl, > > + struct nfsd4_blocked_lock, nbl_lock); > > + bool queue = false; > + > > + spin_lock(&nn->client_lock); > > + if (!list_empty(&nbl->nbl_list)) { > > + list_del_init(&nbl->nbl_list); > > + queue = true; > > + } > > + spin_unlock(&nn->client_lock); > + > > + if (queue) > > + nfsd4_run_cb(&nbl->nbl_cb); > +} > + >  static const struct lock_manager_operations nfsd_posix_mng_ops  = { > > + .lm_notify = nfsd4_lm_notify, > >   .lm_get_owner = nfsd4_fl_get_owner, > >   .lm_put_owner = nfsd4_fl_put_owner, >  }; > @@ -5394,6 +5495,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, > >   lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); > >   if (!lo) > >   return NULL; > > + INIT_LIST_HEAD(&lo->lo_blocked); > >   INIT_LIST_HEAD(&lo->lo_owner.so_stateids); > >   lo->lo_owner.so_is_open_owner = 0; > >   lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; > @@ -5558,12 +5660,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > >   struct nfs4_ol_stateid *open_stp = NULL; > >   struct nfs4_file *fp; > >   struct file *filp = NULL; > > + struct nfsd4_blocked_lock *nbl = NULL; > >   struct file_lock *file_lock = NULL; > >   struct file_lock *conflock = NULL; > >   __be32 status = 0; > >   int lkflg; > >   int err; > >   bool new = false; > > + unsigned char fl_type; > >   struct net *net = SVC_NET(rqstp); > >   struct nfsd_net *nn = net_generic(net, nfsd_net_id); >   > @@ -5630,13 +5734,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > >   if (!locks_in_grace(net) && lock->lk_reclaim) > >   goto out; >   > > - file_lock = locks_alloc_lock(); > > - if (!file_lock) { > > - dprintk("NFSD: %s: unable to allocate lock!\n", __func__); > > - status = nfserr_jukebox; > > - goto out; > > - } > - > >   fp = lock_stp->st_stid.sc_file; > >   switch (lock->lk_type) { > >   case NFS4_READ_LT: > @@ -5646,7 +5743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > >   if (filp) > >   get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); > >   spin_unlock(&fp->fi_lock); > > - file_lock->fl_type = F_RDLCK; > > + fl_type = F_RDLCK; > >   break; > >   case NFS4_WRITE_LT: > >   case NFS4_WRITEW_LT: > @@ -5655,17 +5752,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > >   if (filp) > >   get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); > >   spin_unlock(&fp->fi_lock); > > - file_lock->fl_type = F_WRLCK; > > + fl_type = F_WRLCK; > >   break; > >   default: > >   status = nfserr_inval; > >   goto out; > >   } > + > >   if (!filp) { > >   status = nfserr_openmode; > >   goto out; > >   } >   > > + nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn); > > + if (!nbl) { > > + dprintk("NFSD: %s: unable to allocate block!\n", __func__); > > + status = nfserr_jukebox; > > + goto out; > > + } > + > > + file_lock = &nbl->nbl_lock; > > + file_lock->fl_type = fl_type; > >   file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner)); > >   file_lock->fl_pid = current->tgid; > >   file_lock->fl_file = filp; > @@ -5682,18 +5789,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > >   goto out; > >   } >   > > + if (nfsd4_has_session(cstate)) { > > + file_lock->fl_flags |= FL_SLEEP; > > + spin_lock(&nn->client_lock); > > + list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); > > + spin_unlock(&nn->client_lock); > > + } > + > >   err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); > > - switch (-err) { > > > + switch (err) { Ouch, already see a bug here. Need to propagate the negative to the other return codes. > > >   case 0: /* success! */ > >   nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid); > >   status = 0; > >   break; > > > - case (EAGAIN): /* conflock holds conflicting lock */ > > + case FILE_LOCK_DEFERRED: > > + nbl = NULL; > > + /* Fallthrough */ > > > + case EAGAIN: /* conflock holds conflicting lock */ > >   status = nfserr_denied; > >   dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); > >   nfs4_set_lock_denied(conflock, &lock->lk_denied); > >   break; > > - case (EDEADLK): > > + case EDEADLK: > >   status = nfserr_deadlock; > >   break; > >   default: > @@ -5702,6 +5819,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > >   break; > >   } >  out: > > + if (nbl) { > > + /* dequeue it if we queued it before */ > > + if (nfsd4_has_session(cstate)) { > > + spin_lock(&nn->client_lock); > > + list_del_init(&nbl->nbl_list); > > + spin_unlock(&nn->client_lock); > > + } > > + free_blocked_lock(nbl); > > + } > >   if (filp) > >   fput(filp); > >   if (lock_stp) { > @@ -5725,8 +5851,6 @@ out: > >   if (open_stp) > >   nfs4_put_stid(&open_stp->st_stid); > >   nfsd4_bump_seqid(cstate, status); > > - if (file_lock) > > - locks_free_lock(file_lock); > >   if (conflock) > >   locks_free_lock(conflock); > >   return status; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ecf3f4671654..056b0e4c485b 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -440,11 +440,11 @@ struct nfs4_openowner { >  /* >   * Represents a generic "lockowner". Similar to an openowner. References to it >   * are held by the lock stateids that are created on its behalf. This object is > - * a superset of the nfs4_stateowner struct (or would be if it needed any extra > - * fields). > + * a superset of the nfs4_stateowner struct. >   */ >  struct nfs4_lockowner { > > > - struct nfs4_stateowner lo_owner; /* must be first element */ > > > > + struct nfs4_stateowner lo_owner; /* must be first element */ > > > > + struct list_head lo_blocked; /* blocked file_locks */ >  }; >   >  static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) > @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) > >   return (s32)(a->si_generation - b->si_generation) > 0; >  } >   > +/* > + * When a client tries to get a lock on a file, we set one of these objects > + * on the blocking lock. When the lock becomes free, we can then issue a > + * CB_NOTIFY_LOCK to the server. > + */ >  struct nfsd4_blocked_lock { > > > + struct list_head nbl_list; > > >   struct file_lock nbl_lock; > > >   struct knfsd_fh nbl_fh; > > >   struct nfsd4_callback nbl_cb; -- Jeff Layton