Message-ID: <1470163555.15226.1.camel@redhat.com>
Subject: Re: [RFC PATCH 2/4] nfsd: have nfsd4_lock use blocking locks for
 v4.1+ locks
From: Jeff Layton <jlayton@redhat.com>
To: linux-nfs@vger.kernel.org
Cc: bfields@fieldses.org
Date: Tue, 02 Aug 2016 14:45:55 -0400
In-Reply-To: <1470161731-11301-3-git-send-email-jlayton@redhat.com>
References: <1470161731-11301-1-git-send-email-jlayton@redhat.com>
	 <1470161731-11301-3-git-send-email-jlayton@redhat.com>
Content-Type: text/plain; charset="UTF-8"
Mime-Version: 1.0
Sender: linux-nfs-owner@vger.kernel.org

On Tue, 2016-08-02 at 14:15 -0400, Jeff Layton wrote:
> Create a new per-lockowner+per-inode structure that contains a
> file_lock. Have nfsd4_lock add this structure to the lockowner's list
> prior to setting the lock. Then call the vfs and request a blocking lock
> (by setting FL_SLEEP). If we get anything besides FILE_LOCK_DEFERRED
> back, then we dequeue the block structure and free it. When the next
> lock request comes in, we'll look for an existing block for the same
> filehandle and dequeue and reuse it if there is one.
> 
> When the lock comes free (a'la an lm_notify call), we dequeue it
> from the lockowner's list and kick off a CB_NOTIFY_LOCK callback to
> inform the client that it should retry the lock request.
> 
> > Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  fs/nfsd/nfs4state.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++-----
>  fs/nfsd/state.h     |  12 +++--
>  2 files changed, 147 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 70d0b9b33031..38367201c14f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -99,6 +99,7 @@ static struct kmem_cache *odstate_slab;
>  static void free_session(struct nfsd4_session *);
>  
>  static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
>  
>  static bool is_session_dead(struct nfsd4_session *ses)
>  {
> @@ -210,6 +211,84 @@ static void nfsd4_put_session(struct nfsd4_session *ses)
> >  	spin_unlock(&nn->client_lock);
>  }
>  
> +static struct nfsd4_blocked_lock *
> +find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > +			struct nfsd_net *nn)
> +{
> > +	struct nfsd4_blocked_lock *cur, *found = NULL;
> +
> > +	spin_lock(&nn->client_lock);
> > +	list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
> > +		if (fh_match(fh, &cur->nbl_fh)) {
> > +			list_del_init(&cur->nbl_list);
> > +			found = cur;
> > +			break;
> > +		}
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	if (found)
> > +		posix_unblock_lock(&found->nbl_lock);
> > +	return found;
> +}
> +
> +static struct nfsd4_blocked_lock *
> +find_or_allocate_block(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
> > +			struct nfsd_net *nn)
> +{
> > +	struct nfsd4_blocked_lock *nbl;
> +
> > +	nbl = find_blocked_lock(lo, fh, nn);
> > +	if (!nbl) {
> > +		nbl= kmalloc(sizeof(*nbl), GFP_KERNEL);
> > +		if (nbl) {
> > +			fh_copy_shallow(&nbl->nbl_fh, fh);
> > +			locks_init_lock(&nbl->nbl_lock);
> > +			nfsd4_init_cb(&nbl->nbl_cb, lo->lo_owner.so_client,
> > +					&nfsd4_cb_notify_lock_ops,
> > +					NFSPROC4_CLNT_CB_NOTIFY_LOCK);
> > +		}
> > +	}
> > +	return nbl;
> +}
> +
> +static void
> +free_blocked_lock(struct nfsd4_blocked_lock *nbl)
> +{
> > +	locks_release_private(&nbl->nbl_lock);
> > +	kfree(nbl);
> +}
> +
> +static int
> +nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
> +{
> > +	/*
> > +	 * Since this is just an optimization, we don't try very hard if it
> > +	 * turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
> > +	 * just quit trying on anything else.
> > +	 */
> > +	switch (task->tk_status) {
> > +	case -NFS4ERR_DELAY:
> > +		rpc_delay(task, 1 * HZ);
> > +		return 0;
> > +	default:
> > +		return 1;
> > +	}
> +}
> +
> +static void
> +nfsd4_cb_notify_lock_release(struct nfsd4_callback *cb)
> +{
> > > +	struct nfsd4_blocked_lock	*nbl = container_of(cb,
> > +						struct nfsd4_blocked_lock, nbl_cb);
> +
> > +	free_blocked_lock(nbl);
> +}
> +
> +static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops = {
> > > +	.done		= nfsd4_cb_notify_lock_done,
> > > +	.release	= nfsd4_cb_notify_lock_release,
> +};
> +
>  static inline struct nfs4_stateowner *
>  nfs4_get_stateowner(struct nfs4_stateowner *sop)
>  {
> @@ -5296,7 +5375,29 @@ nfsd4_fl_put_owner(fl_owner_t owner)
> >  		nfs4_put_stateowner(&lo->lo_owner);
>  }
>  
> +static void
> +nfsd4_lm_notify(struct file_lock *fl)
> +{
> > > +	struct nfs4_lockowner		*lo = (struct nfs4_lockowner *)fl->fl_owner;
> > > +	struct net			*net = lo->lo_owner.so_client->net;
> > > +	struct nfsd_net			*nn = net_generic(net, nfsd_net_id);
> > > +	struct nfsd4_blocked_lock	*nbl = container_of(fl,
> > +						struct nfsd4_blocked_lock, nbl_lock);
> > +	bool queue = false;
> +
> > +	spin_lock(&nn->client_lock);
> > +	if (!list_empty(&nbl->nbl_list)) {
> > +		list_del_init(&nbl->nbl_list);
> > +		queue = true;
> > +	}
> > +	spin_unlock(&nn->client_lock);
> +
> > +	if (queue)
> > +		nfsd4_run_cb(&nbl->nbl_cb);
> +}
> +
>  static const struct lock_manager_operations nfsd_posix_mng_ops  = {
> > +	.lm_notify = nfsd4_lm_notify,
> >  	.lm_get_owner = nfsd4_fl_get_owner,
> >  	.lm_put_owner = nfsd4_fl_put_owner,
>  };
> @@ -5394,6 +5495,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
> >  	lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
> >  	if (!lo)
> >  		return NULL;
> > +	INIT_LIST_HEAD(&lo->lo_blocked);
> >  	INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
> >  	lo->lo_owner.so_is_open_owner = 0;
> >  	lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
> @@ -5558,12 +5660,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	struct nfs4_ol_stateid *open_stp = NULL;
> >  	struct nfs4_file *fp;
> >  	struct file *filp = NULL;
> > +	struct nfsd4_blocked_lock *nbl = NULL;
> >  	struct file_lock *file_lock = NULL;
> >  	struct file_lock *conflock = NULL;
> >  	__be32 status = 0;
> >  	int lkflg;
> >  	int err;
> >  	bool new = false;
> > +	unsigned char fl_type;
> >  	struct net *net = SVC_NET(rqstp);
> >  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  
> @@ -5630,13 +5734,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	if (!locks_in_grace(net) && lock->lk_reclaim)
> >  		goto out;
>  
> > -	file_lock = locks_alloc_lock();
> > -	if (!file_lock) {
> > -		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
> > -		status = nfserr_jukebox;
> > -		goto out;
> > -	}
> -
> >  	fp = lock_stp->st_stid.sc_file;
> >  	switch (lock->lk_type) {
> >  		case NFS4_READ_LT:
> @@ -5646,7 +5743,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  			if (filp)
> >  				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
> >  			spin_unlock(&fp->fi_lock);
> > -			file_lock->fl_type = F_RDLCK;
> > +			fl_type = F_RDLCK;
> >  			break;
> >  		case NFS4_WRITE_LT:
> >  		case NFS4_WRITEW_LT:
> @@ -5655,17 +5752,27 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  			if (filp)
> >  				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
> >  			spin_unlock(&fp->fi_lock);
> > -			file_lock->fl_type = F_WRLCK;
> > +			fl_type = F_WRLCK;
> >  			break;
> >  		default:
> >  			status = nfserr_inval;
> >  		goto out;
> >  	}
> +
> >  	if (!filp) {
> >  		status = nfserr_openmode;
> >  		goto out;
> >  	}
>  
> > +	nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
> > +	if (!nbl) {
> > +		dprintk("NFSD: %s: unable to allocate block!\n", __func__);
> > +		status = nfserr_jukebox;
> > +		goto out;
> > +	}
> +
> > +	file_lock = &nbl->nbl_lock;
> > +	file_lock->fl_type = fl_type;
> >  	file_lock->fl_owner = (fl_owner_t)lockowner(nfs4_get_stateowner(&lock_sop->lo_owner));
> >  	file_lock->fl_pid = current->tgid;
> >  	file_lock->fl_file = filp;
> @@ -5682,18 +5789,28 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  		goto out;
> >  	}
>  
> > +	if (nfsd4_has_session(cstate)) {
> > +		file_lock->fl_flags |= FL_SLEEP;
> > +		spin_lock(&nn->client_lock);
> > +		list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
> > +		spin_unlock(&nn->client_lock);
> > +	}
> +
> >  	err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
> > -	switch (-err) {
> > > +	switch (err) {

Ouch, already see a bug here. Need to propagate the negative to the
other return codes.

> > >  	case 0: /* success! */
> >  		nfs4_inc_and_copy_stateid(&lock->lk_resp_stateid, &lock_stp->st_stid);
> >  		status = 0;
> >  		break;
> > > -	case (EAGAIN):		/* conflock holds conflicting lock */
> > +	case FILE_LOCK_DEFERRED:
> > +		nbl = NULL;
> > +		/* Fallthrough */
> > > +	case EAGAIN:		/* conflock holds conflicting lock */
> >  		status = nfserr_denied;
> >  		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
> >  		nfs4_set_lock_denied(conflock, &lock->lk_denied);
> >  		break;
> > -	case (EDEADLK):
> > +	case EDEADLK:
> >  		status = nfserr_deadlock;
> >  		break;
> >  	default:
> @@ -5702,6 +5819,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  		break;
> >  	}
>  out:
> > +	if (nbl) {
> > +		/* dequeue it if we queued it before */
> > +		if (nfsd4_has_session(cstate)) {
> > +			spin_lock(&nn->client_lock);
> > +			list_del_init(&nbl->nbl_list);
> > +			spin_unlock(&nn->client_lock);
> > +		}
> > +		free_blocked_lock(nbl);
> > +	}
> >  	if (filp)
> >  		fput(filp);
> >  	if (lock_stp) {
> @@ -5725,8 +5851,6 @@ out:
> >  	if (open_stp)
> >  		nfs4_put_stid(&open_stp->st_stid);
> >  	nfsd4_bump_seqid(cstate, status);
> > -	if (file_lock)
> > -		locks_free_lock(file_lock);
> >  	if (conflock)
> >  		locks_free_lock(conflock);
> >  	return status;
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index ecf3f4671654..056b0e4c485b 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -440,11 +440,11 @@ struct nfs4_openowner {
>  /*
>   * Represents a generic "lockowner". Similar to an openowner. References to it
>   * are held by the lock stateids that are created on its behalf. This object is
> - * a superset of the nfs4_stateowner struct (or would be if it needed any extra
> - * fields).
> + * a superset of the nfs4_stateowner struct.
>   */
>  struct nfs4_lockowner {
> > > -	struct nfs4_stateowner	lo_owner; /* must be first element */
> > > > +	struct nfs4_stateowner	lo_owner;	/* must be first element */
> > > > +	struct list_head	lo_blocked;	/* blocked file_locks */
>  };
>  
>  static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so)
> @@ -580,7 +580,13 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b)
> >  	return (s32)(a->si_generation - b->si_generation) > 0;
>  }
>  
> +/*
> + * When a client tries to get a lock on a file, we set one of these objects
> + * on the blocking lock. When the lock becomes free, we can then issue a
> + * CB_NOTIFY_LOCK to the server.
> + */
>  struct nfsd4_blocked_lock {
> > > +	struct list_head	nbl_list;
> > >  	struct file_lock	nbl_lock;
> > >  	struct knfsd_fh		nbl_fh;
> > >  	struct nfsd4_callback	nbl_cb;


-- 
Jeff Layton <jlayton@redhat.com>