This code rewrites the layout state handling and the CB_LAYOUTRECALL paths.
It applies to Benny's current pnfs-submit branch (commit eb04948a).
Patch 01 - A session's callback patch that passes the CB_SEQUENCE info (the session in particular) up to subsequent operations. Andy is looking at issues Trond pointed out with this patch, but something very much like it will come.
02-12 - These are what smallish changes I could pull out of the rewrite. Patch 03 in particular introduces changes to refcounting that are at the heart of io drain notification.
13 - Sorry this patch is so huge, but I am simultaneously changing the stateid and RPC processing for LAYOUTGET, LAYOUTRETURN, and CB_LAYOUTRECALL, and I did not see an easy way to split them bisectably.
14 - More CB_LAYOUTRECALL changes that could be more easily split out
15-18 Andy's code that puts LAYOUTCOMMIT and LAYOUTRETURN in the same compound as CLOSE
Fred
On Thu, Nov 11, 2010 at 2:00 AM, Benny Halevy <[email protected]> wrote:
> On 2010-11-10 16:46, Fred Isaman wrote:
>> On Wed, Nov 10, 2010 at 9:35 AM, Benny Halevy <[email protected]> wrote:
>>> On 2010-11-04 17:22, Fred Isaman wrote:
>>>> Instead, have mark_invalid function that marks lseg invalid and
>>>> removes the reference that holds it in the list. ?Now when io is finished,
>>>> the lseg will automatically be removed from the list. ?This is
>>>> at the heart of many of the upcoming cb_layoutrecall changes.
>>>>
>>>> Signed-off-by: Fred Isaman <[email protected]>
>>>> ---
>>>> ?fs/nfs/nfs4xdr.c | ? ?3 +-
>>>> ?fs/nfs/pnfs.c ? ?| ?145 ++++++++++++++++++++++++++++++++++-------------------
>>>> ?fs/nfs/pnfs.h ? ?| ? ?1 +
>>>> ?3 files changed, 95 insertions(+), 54 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>> index 238eeb2..6d9ef2b 100644
>>>> --- a/fs/nfs/nfs4xdr.c
>>>> +++ b/fs/nfs/nfs4xdr.c
>>>> @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
>>>> ? ? ? ? ? ? ? p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>>>> ? ? ? ? ? ? ? p = xdr_encode_hyper(p, args->range.offset);
>>>> ? ? ? ? ? ? ? p = xdr_encode_hyper(p, args->range.length);
>>>> - ? ? ? ? ? ? pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
>>>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NULL);
>>>> + ? ? ? ? ? ? pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
>>>> ? ? ? ? ? ? ? p = xdr_encode_opaque_fixed(p, &stateid.data,
>>>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NFS4_STATEID_SIZE);
>>>> ? ? ? ? ? ? ? p = reserve_space(xdr, 4);
>>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>>> index 3bbe3be..4e5c68b 100644
>>>> --- a/fs/nfs/pnfs.c
>>>> +++ b/fs/nfs/pnfs.c
>>>> @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
>>>> ? ? ? lseg->layout = lo;
>>>> ?}
>>>>
>>>> +static void
>>>> +_put_lseg_common(struct pnfs_layout_segment *lseg)
>>>> +{
>>>> + ? ? BUG_ON(lseg->valid == true);
>>>> + ? ? list_del(&lseg->fi_list);
>>>> + ? ? if (list_empty(&lseg->layout->segs)) {
>>>> + ? ? ? ? ? ? struct nfs_client *clp;
>>>> +
>>>> + ? ? ? ? ? ? clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
>>>> + ? ? ? ? ? ? spin_lock(&clp->cl_lock);
>>>> + ? ? ? ? ? ? /* List does not take a reference, so no need for put here */
>>>> + ? ? ? ? ? ? list_del_init(&lseg->layout->layouts);
>>>> + ? ? ? ? ? ? spin_unlock(&clp->cl_lock);
>>>> + ? ? ? ? ? ? pnfs_invalidate_layout_stateid(lseg->layout);
>>>> + ? ? }
>>>> + ? ? rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
>>>> +}
>>>> +
>>>> +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
>>>> + * could sleep, so must be called outside of the lock.
>>>> + */
>>>> +static void
>>>> +put_lseg_locked(struct pnfs_layout_segment *lseg,
>>>> + ? ? ? ? ? ? struct list_head *tmp_list)
>>>> +{
>>>> + ? ? dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>> + ? ? ? ? ? ? atomic_read(&lseg->pls_refcount), lseg->valid);
>>>> + ? ? if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>> + ? ? ? ? ? ? _put_lseg_common(lseg);
>>>> + ? ? ? ? ? ? list_add(&lseg->fi_list, tmp_list);
>>>> + ? ? }
>>>> +}
>>>> +
>>>> ?void
>>>> ?put_lseg(struct pnfs_layout_segment *lseg)
>>>> ?{
>>>> - ? ? bool do_wake_up;
>>>> ? ? ? struct inode *ino;
>>>>
>>>> ? ? ? if (!lseg)
>>>> @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
>>>>
>>>> ? ? ? dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>> ? ? ? ? ? ? ? atomic_read(&lseg->pls_refcount), lseg->valid);
>>>> - ? ? do_wake_up = !lseg->valid;
>>>> ? ? ? ino = lseg->layout->inode;
>>>> - ? ? if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>> + ? ? if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
>>>> + ? ? ? ? ? ? _put_lseg_common(lseg);
>>>> + ? ? ? ? ? ? spin_unlock(&ino->i_lock);
>>>> ? ? ? ? ? ? ? NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>> ? ? ? ? ? ? ? /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>> ? ? ? ? ? ? ? put_layout_hdr(ino);
>>>> ? ? ? }
>>>> - ? ? if (do_wake_up)
>>>> - ? ? ? ? ? ? rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
>>>> ?}
>>>> ?EXPORT_SYMBOL_GPL(put_lseg);
>>>>
>>>> @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
>>>> ? ? ? ? ? ? ? lseg->range.iomode == range->iomode);
>>>> ?}
>>>>
>>>> -static bool
>>>> -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
>>>> +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? struct list_head *tmp_list)
>>>> ?{
>>>> - ? ? return atomic_read(&lseg->pls_refcount) == 1;
>>>> + ? ? assert_spin_locked(&lseg->layout->inode->i_lock);
>>>> + ? ? if (lseg->valid) {
>>>> + ? ? ? ? ? ? lseg->valid = false;
>>>> + ? ? ? ? ? ? /* Remove the reference keeping the lseg in the
>>>> + ? ? ? ? ? ? ?* list. ?It will now be removed when all
>>>> + ? ? ? ? ? ? ?* outstanding io is finished.
>>>> + ? ? ? ? ? ? ?*/
>>>> + ? ? ? ? ? ? put_lseg_locked(lseg, tmp_list);
>>>> + ? ? }
>>>> ?}
>>>>
>>>> ?static void
>>>> @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
>>>> ? ? ? ? ? ? ? __func__, lo, range->offset, range->length, range->iomode);
>>>>
>>>> ? ? ? assert_spin_locked(&lo->inode->i_lock);
>>>> - ? ? list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
>>>> - ? ? ? ? ? ? if (!should_free_lseg(lseg, range) ||
>>>> - ? ? ? ? ? ? ? ? !_pnfs_can_return_lseg(lseg))
>>>> - ? ? ? ? ? ? ? ? ? ? continue;
>>>> - ? ? ? ? ? ? dprintk("%s: freeing lseg %p iomode %d "
>>>> - ? ? ? ? ? ? ? ? ? ? "offset %llu length %llu\n", __func__,
>>>> - ? ? ? ? ? ? ? ? ? ? lseg, lseg->range.iomode, lseg->range.offset,
>>>> - ? ? ? ? ? ? ? ? ? ? lseg->range.length);
>>>> - ? ? ? ? ? ? list_move(&lseg->fi_list, tmp_list);
>>>> - ? ? }
>>>> - ? ? if (list_empty(&lo->segs)) {
>>>> - ? ? ? ? ? ? struct nfs_client *clp;
>>>> -
>>>> - ? ? ? ? ? ? clp = NFS_SERVER(lo->inode)->nfs_client;
>>>> - ? ? ? ? ? ? spin_lock(&clp->cl_lock);
>>>> - ? ? ? ? ? ? /* List does not take a reference, so no need for put here */
>>>> - ? ? ? ? ? ? list_del_init(&lo->layouts);
>>>> - ? ? ? ? ? ? spin_unlock(&clp->cl_lock);
>>>> - ? ? ? ? ? ? pnfs_invalidate_layout_stateid(lo);
>>>> - ? ? }
>>>> -
>>>> + ? ? list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
>>>> + ? ? ? ? ? ? if (should_free_lseg(lseg, range)) {
>>>> + ? ? ? ? ? ? ? ? ? ? dprintk("%s: freeing lseg %p iomode %d "
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? "offset %llu length %llu\n", __func__,
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? lseg, lseg->range.iomode, lseg->range.offset,
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? lseg->range.length);
>>>> + ? ? ? ? ? ? ? ? ? ? mark_lseg_invalid(lseg, tmp_list);
>>>> + ? ? ? ? ? ? }
>>>> ? ? ? dprintk("%s:Return\n", __func__);
>>>> ?}
>>>>
>>>> ?static void
>>>> -pnfs_free_lseg_list(struct list_head *tmp_list)
>>>> +pnfs_free_lseg_list(struct list_head *free_me)
>>>> ?{
>>>> - ? ? struct pnfs_layout_segment *lseg;
>>>> + ? ? struct pnfs_layout_segment *lseg, *tmp;
>>>> + ? ? struct inode *ino;
>>>>
>>>> - ? ? while (!list_empty(tmp_list)) {
>>>> - ? ? ? ? ? ? lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
>>>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? fi_list);
>>>> - ? ? ? ? ? ? dprintk("%s calling put_lseg on %p\n", __func__, lseg);
>>>> - ? ? ? ? ? ? list_del(&lseg->fi_list);
>>>> - ? ? ? ? ? ? put_lseg(lseg);
>>>> + ? ? list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
>>>> + ? ? ? ? ? ? BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
>>>> + ? ? ? ? ? ? ino = lseg->layout->inode;
>>>> + ? ? ? ? ? ? NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>> + ? ? ? ? ? ? /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>> + ? ? ? ? ? ? put_layout_hdr(ino);
>>>> ? ? ? }
>>>> + ? ? INIT_LIST_HEAD(free_me);
>>>> ?}
>>>>
>>>> ?void
>>>> @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
>>>> ? ? ? dprintk("<-- %s\n", __func__);
>>>> ?}
>>>>
>>>> +/* Layoutreturn may use an invalid stateid, just copy what is there */
>>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
>>>> +{
>>>> + ? ? int seq;
>>>> +
>>>> + ? ? do {
>>>> + ? ? ? ? ? ? seq = read_seqbegin(&lo->seqlock);
>>>> + ? ? ? ? ? ? memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>>>> + ? ? } while (read_seqretry(&lo->seqlock, seq));
>>>> +}
>>>> +
>>>> ?void
>>>> ?pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>>> ? ? ? ? ? ? ? ? ? ? ? struct nfs4_state *open_state)
>>>> @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
>>>> ? ? ? return out;
>>>> ?}
>>>>
>>>> +/* Return true if there is layout based io in progress in the given range.
>>>> + * Assumes range has already been marked invalid, and layout marked to
>>>> + * prevent any new lseg from being inserted.
>>>> + */
>>>> ?bool
>>>> ?pnfs_return_layout_barrier(struct nfs_inode *nfsi,
>>>> ? ? ? ? ? ? ? ? ? ? ? ? ?struct pnfs_layout_range *range)
>>>> ?{
>>>> - ? ? struct pnfs_layout_segment *lseg;
>>>> + ? ? struct pnfs_layout_segment *lseg, *tmp;
>>>> ? ? ? bool ret = false;
>>>>
>>>> ? ? ? spin_lock(&nfsi->vfs_inode.i_lock);
>>>> - ? ? list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
>>>> - ? ? ? ? ? ? if (!should_free_lseg(lseg, range))
>>>> - ? ? ? ? ? ? ? ? ? ? continue;
>>>> - ? ? ? ? ? ? lseg->valid = false;
>>>> - ? ? ? ? ? ? if (!_pnfs_can_return_lseg(lseg)) {
>>>> - ? ? ? ? ? ? ? ? ? ? dprintk("%s: wait on lseg %p refcount %d\n",
>>>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? __func__, lseg,
>>>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? atomic_read(&lseg->pls_refcount));
>>>> + ? ? list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
>>>
>>> Why do you need the safe version here while the inode is locked?
>>>
>>
>> We don't.
>
> OK. I'll fix that then :)
>
>>
>>
>>>> + ? ? ? ? ? ? if (should_free_lseg(lseg, range)) {
>>>> ? ? ? ? ? ? ? ? ? ? ? ret = true;
>>>
>>> But this will always return "true" if there's any lseg to return,
>>> not only if (!_pnfs_can_return_lseg(lseg)).
>>>
>>> What am I missing? :)
>>>
>>
>> A return of "true" means the caller should wait. ?So if there is any
>> lseg still left to return, we should return true. ?The refcounting has
>> changed so that once the pending IO is finished, the lseg will
>> automatically be removed from the list. ?I suspect that what you are
>> missing is that...the refcount in the invalid case is one less than
>> what it used to be.
>
> Thanks. I see what you mean now.
>
> What's missing is plh_block_lgets which is introduced only
> in [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall
> Otherwise, new lsegs can be inserted into the list in between.
>
> Benny
>
Hmmm, you're right. Let me see if I can tease out enough of the
blocking code from patch 13 to make it work. The other option is to
just merge the two patches together.
Fred
>>
>> Fred
>>
>>> Benny
>>>
>>>> + ? ? ? ? ? ? ? ? ? ? break;
>>>> ? ? ? ? ? ? ? }
>>>> - ? ? }
>>>> ? ? ? spin_unlock(&nfsi->vfs_inode.i_lock);
>>>> ? ? ? dprintk("%s:Return %d\n", __func__, ret);
>>>> ? ? ? return ret;
>>>> @@ -574,12 +611,10 @@ void
>>>> ?pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>>>> ?{
>>>> ? ? ? struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
>>>> - ? ? LIST_HEAD(tmp_list);
>>>>
>>>> ? ? ? if (lrp->args.return_type != RETURN_FILE)
>>>> ? ? ? ? ? ? ? return;
>>>> ? ? ? spin_lock(&lrp->args.inode->i_lock);
>>>> - ? ? pnfs_clear_lseg_list(lo, &tmp_list, &lrp->args.range);
>>>> ? ? ? if (!lrp->res.valid)
>>>> ? ? ? ? ? ? ? ; ? ? ? /* forgetful model internal release */
>>>> ? ? ? else if (!lrp->res.lrs_present)
>>>> @@ -588,7 +623,6 @@ pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>>>> ? ? ? ? ? ? ? pnfs_set_layout_stateid(lo, &lrp->res.stateid);
>>>> ? ? ? put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
>>>> ? ? ? spin_unlock(&lrp->args.inode->i_lock);
>>>> - ? ? pnfs_free_lseg_list(&tmp_list);
>>>> ?}
>>>>
>>>> ?static int
>>>> @@ -641,7 +675,11 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>>>> ? ? ? arg.offset = 0;
>>>> ? ? ? arg.length = NFS4_MAX_UINT64;
>>>>
>>>> + ? ? /* probably should BUGON if type != RETURN_FILE */
>>>> ? ? ? if (type == RETURN_FILE) {
>>>> + ? ? ? ? ? ? LIST_HEAD(tmp_list);
>>>> + ? ? ? ? ? ? struct pnfs_layout_segment *lseg, *tmp;
>>>> +
>>>> ? ? ? ? ? ? ? spin_lock(&ino->i_lock);
>>>> ? ? ? ? ? ? ? lo = nfsi->layout;
>>>> ? ? ? ? ? ? ? if (lo && !has_layout_to_return(lo, &arg))
>>>> @@ -652,10 +690,13 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>>>> ? ? ? ? ? ? ? ? ? ? ? goto out;
>>>> ? ? ? ? ? ? ? }
>>>>
>>>> + ? ? ? ? ? ? list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
>>>> + ? ? ? ? ? ? ? ? ? ? if (should_free_lseg(lseg, &arg))
>>>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? mark_lseg_invalid(lseg, &tmp_list);
>>>> ? ? ? ? ? ? ? /* Reference matched in pnfs_layoutreturn_release */
>>>> ? ? ? ? ? ? ? get_layout_hdr_locked(lo);
>>>> -
>>>> ? ? ? ? ? ? ? spin_unlock(&ino->i_lock);
>>>> + ? ? ? ? ? ? pnfs_free_lseg_list(&tmp_list);
>>>>
>>>> ? ? ? ? ? ? ? if (layoutcommit_needed(nfsi)) {
>>>> ? ? ? ? ? ? ? ? ? ? ? if (stateid && !wait) { /* callback */
>>>> @@ -1171,7 +1212,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
>>>> ? ? ? nfsi->layout->write_end_pos = 0;
>>>> ? ? ? nfsi->layout->cred = NULL;
>>>> ? ? ? __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
>>>> - ? ? pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout, NULL);
>>>> + ? ? pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
>>>>
>>>> ? ? ? /* Reference for layoutcommit matched in pnfs_layoutcommit_release */
>>>> ? ? ? get_layout_hdr_locked(NFS_I(inode)->layout);
>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>> index 05dd5e0..000acf0 100644
>>>> --- a/fs/nfs/pnfs.h
>>>> +++ b/fs/nfs/pnfs.h
>>>> @@ -206,6 +206,7 @@ void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
>>>> ?void pnfs_destroy_layout(struct nfs_inode *);
>>>> ?void pnfs_destroy_all_layouts(struct nfs_client *);
>>>> ?void put_layout_hdr(struct inode *inode);
>>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
>>>> ?void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>>> ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct nfs4_state *open_state);
>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to [email protected]
>>> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>
On 2010-11-04 17:22, Fred Isaman wrote:
> Instead, have mark_invalid function that marks lseg invalid and
> removes the reference that holds it in the list. Now when io is finished,
> the lseg will automatically be removed from the list. This is
> at the heart of many of the upcoming cb_layoutrecall changes.
>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/nfs4xdr.c | 3 +-
> fs/nfs/pnfs.c | 145 ++++++++++++++++++++++++++++++++++-------------------
> fs/nfs/pnfs.h | 1 +
> 3 files changed, 95 insertions(+), 54 deletions(-)
>
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 238eeb2..6d9ef2b 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
> p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
> p = xdr_encode_hyper(p, args->range.offset);
> p = xdr_encode_hyper(p, args->range.length);
> - pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
> - NULL);
> + pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
> p = xdr_encode_opaque_fixed(p, &stateid.data,
> NFS4_STATEID_SIZE);
> p = reserve_space(xdr, 4);
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 3bbe3be..4e5c68b 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
> lseg->layout = lo;
> }
>
> +static void
> +_put_lseg_common(struct pnfs_layout_segment *lseg)
> +{
> + BUG_ON(lseg->valid == true);
> + list_del(&lseg->fi_list);
> + if (list_empty(&lseg->layout->segs)) {
> + struct nfs_client *clp;
> +
> + clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
> + spin_lock(&clp->cl_lock);
> + /* List does not take a reference, so no need for put here */
> + list_del_init(&lseg->layout->layouts);
> + spin_unlock(&clp->cl_lock);
> + pnfs_invalidate_layout_stateid(lseg->layout);
> + }
> + rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
> +}
> +
> +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
> + * could sleep, so must be called outside of the lock.
> + */
> +static void
> +put_lseg_locked(struct pnfs_layout_segment *lseg,
> + struct list_head *tmp_list)
> +{
> + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
> + atomic_read(&lseg->pls_refcount), lseg->valid);
> + if (atomic_dec_and_test(&lseg->pls_refcount)) {
> + _put_lseg_common(lseg);
> + list_add(&lseg->fi_list, tmp_list);
> + }
> +}
> +
> void
> put_lseg(struct pnfs_layout_segment *lseg)
> {
> - bool do_wake_up;
> struct inode *ino;
>
> if (!lseg)
> @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
>
> dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
> atomic_read(&lseg->pls_refcount), lseg->valid);
> - do_wake_up = !lseg->valid;
> ino = lseg->layout->inode;
> - if (atomic_dec_and_test(&lseg->pls_refcount)) {
> + if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
> + _put_lseg_common(lseg);
> + spin_unlock(&ino->i_lock);
> NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
> /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
> put_layout_hdr(ino);
> }
> - if (do_wake_up)
> - rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
> }
> EXPORT_SYMBOL_GPL(put_lseg);
>
> @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
> lseg->range.iomode == range->iomode);
> }
>
> -static bool
> -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
> +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
> + struct list_head *tmp_list)
> {
> - return atomic_read(&lseg->pls_refcount) == 1;
> + assert_spin_locked(&lseg->layout->inode->i_lock);
> + if (lseg->valid) {
> + lseg->valid = false;
> + /* Remove the reference keeping the lseg in the
> + * list. It will now be removed when all
> + * outstanding io is finished.
> + */
> + put_lseg_locked(lseg, tmp_list);
> + }
> }
>
> static void
> @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
> __func__, lo, range->offset, range->length, range->iomode);
>
> assert_spin_locked(&lo->inode->i_lock);
> - list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
> - if (!should_free_lseg(lseg, range) ||
> - !_pnfs_can_return_lseg(lseg))
> - continue;
> - dprintk("%s: freeing lseg %p iomode %d "
> - "offset %llu length %llu\n", __func__,
> - lseg, lseg->range.iomode, lseg->range.offset,
> - lseg->range.length);
> - list_move(&lseg->fi_list, tmp_list);
> - }
> - if (list_empty(&lo->segs)) {
> - struct nfs_client *clp;
> -
> - clp = NFS_SERVER(lo->inode)->nfs_client;
> - spin_lock(&clp->cl_lock);
> - /* List does not take a reference, so no need for put here */
> - list_del_init(&lo->layouts);
> - spin_unlock(&clp->cl_lock);
> - pnfs_invalidate_layout_stateid(lo);
> - }
> -
> + list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
> + if (should_free_lseg(lseg, range)) {
> + dprintk("%s: freeing lseg %p iomode %d "
> + "offset %llu length %llu\n", __func__,
> + lseg, lseg->range.iomode, lseg->range.offset,
> + lseg->range.length);
> + mark_lseg_invalid(lseg, tmp_list);
> + }
> dprintk("%s:Return\n", __func__);
> }
>
> static void
> -pnfs_free_lseg_list(struct list_head *tmp_list)
> +pnfs_free_lseg_list(struct list_head *free_me)
> {
> - struct pnfs_layout_segment *lseg;
> + struct pnfs_layout_segment *lseg, *tmp;
> + struct inode *ino;
>
> - while (!list_empty(tmp_list)) {
> - lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
> - fi_list);
> - dprintk("%s calling put_lseg on %p\n", __func__, lseg);
> - list_del(&lseg->fi_list);
> - put_lseg(lseg);
> + list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
> + BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
> + ino = lseg->layout->inode;
> + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
> + /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
> + put_layout_hdr(ino);
> }
> + INIT_LIST_HEAD(free_me);
> }
>
> void
> @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
> dprintk("<-- %s\n", __func__);
> }
>
> +/* Layoutreturn may use an invalid stateid, just copy what is there */
> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
> +{
> + int seq;
> +
> + do {
> + seq = read_seqbegin(&lo->seqlock);
> + memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
> + } while (read_seqretry(&lo->seqlock, seq));
> +}
> +
> void
> pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
> struct nfs4_state *open_state)
> @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
> return out;
> }
>
> +/* Return true if there is layout based io in progress in the given range.
> + * Assumes range has already been marked invalid, and layout marked to
> + * prevent any new lseg from being inserted.
> + */
> bool
> pnfs_return_layout_barrier(struct nfs_inode *nfsi,
> struct pnfs_layout_range *range)
> {
> - struct pnfs_layout_segment *lseg;
> + struct pnfs_layout_segment *lseg, *tmp;
> bool ret = false;
>
> spin_lock(&nfsi->vfs_inode.i_lock);
> - list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
> - if (!should_free_lseg(lseg, range))
> - continue;
> - lseg->valid = false;
> - if (!_pnfs_can_return_lseg(lseg)) {
> - dprintk("%s: wait on lseg %p refcount %d\n",
> - __func__, lseg,
> - atomic_read(&lseg->pls_refcount));
> + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
Why do you need the safe version here while the inode is locked?
> + if (should_free_lseg(lseg, range)) {
> ret = true;
But this will always return "true" if there's any lseg to return,
not only if (!_pnfs_can_return_lseg(lseg)).
What am I missing? :)
Benny
> + break;
> }
> - }
> spin_unlock(&nfsi->vfs_inode.i_lock);
> dprintk("%s:Return %d\n", __func__, ret);
> return ret;
> @@ -574,12 +611,10 @@ void
> pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
> {
> struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
> - LIST_HEAD(tmp_list);
>
> if (lrp->args.return_type != RETURN_FILE)
> return;
> spin_lock(&lrp->args.inode->i_lock);
> - pnfs_clear_lseg_list(lo, &tmp_list, &lrp->args.range);
> if (!lrp->res.valid)
> ; /* forgetful model internal release */
> else if (!lrp->res.lrs_present)
> @@ -588,7 +623,6 @@ pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
> pnfs_set_layout_stateid(lo, &lrp->res.stateid);
> put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
> spin_unlock(&lrp->args.inode->i_lock);
> - pnfs_free_lseg_list(&tmp_list);
> }
>
> static int
> @@ -641,7 +675,11 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
> arg.offset = 0;
> arg.length = NFS4_MAX_UINT64;
>
> + /* probably should BUGON if type != RETURN_FILE */
> if (type == RETURN_FILE) {
> + LIST_HEAD(tmp_list);
> + struct pnfs_layout_segment *lseg, *tmp;
> +
> spin_lock(&ino->i_lock);
> lo = nfsi->layout;
> if (lo && !has_layout_to_return(lo, &arg))
> @@ -652,10 +690,13 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
> goto out;
> }
>
> + list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
> + if (should_free_lseg(lseg, &arg))
> + mark_lseg_invalid(lseg, &tmp_list);
> /* Reference matched in pnfs_layoutreturn_release */
> get_layout_hdr_locked(lo);
> -
> spin_unlock(&ino->i_lock);
> + pnfs_free_lseg_list(&tmp_list);
>
> if (layoutcommit_needed(nfsi)) {
> if (stateid && !wait) { /* callback */
> @@ -1171,7 +1212,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
> nfsi->layout->write_end_pos = 0;
> nfsi->layout->cred = NULL;
> __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
> - pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout, NULL);
> + pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
>
> /* Reference for layoutcommit matched in pnfs_layoutcommit_release */
> get_layout_hdr_locked(NFS_I(inode)->layout);
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 05dd5e0..000acf0 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -206,6 +206,7 @@ void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
> void pnfs_destroy_layout(struct nfs_inode *);
> void pnfs_destroy_all_layouts(struct nfs_client *);
> void put_layout_hdr(struct inode *inode);
> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
> void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
> struct nfs4_state *open_state);
>
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfs/callback_proc.c | 53 +++++++++++++++++++++--------------------------
1 files changed, 24 insertions(+), 29 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 6e21add..3a19cec 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -21,6 +21,12 @@
#define NFSDBG_FACILITY NFSDBG_CALLBACK
#endif
+static struct nfs_client *
+find_client_from_cps(struct cb_process_state *cps, struct sockaddr *addr)
+{
+ return cps->session ? cps->session->clp : nfs_find_client(addr, 4);
+}
+
__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
struct cb_getattrres *res,
struct cb_process_state *cps)
@@ -32,13 +38,9 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
- if (cps->session) { /* set in cb_sequence */
- clp = cps->session->clp;
- } else {
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
- goto out;
- }
+ clp = find_client_from_cps(cps, args->addr);
+ if (clp == NULL)
+ goto out;
dprintk("NFS: GETATTR callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
@@ -81,13 +83,9 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
__be32 res;
res = htonl(NFS4ERR_BADHANDLE);
- if (cps->session) { /* set in cb_sequence */
- clp = cps->session->clp;
- } else {
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
- goto out;
- }
+ clp = find_client_from_cps(cps, args->addr);
+ if (clp == NULL)
+ goto out;
dprintk("NFS: RECALL callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
@@ -111,11 +109,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
}
iput(inode);
}
- if (!cps->session) {
- clp = nfs_find_client_next(prev);
- nfs_put_client(prev);
- }
- } while (!cps->session && clp != NULL);
+ if (cps->session)
+ break;
+ clp = nfs_find_client_next(prev);
+ nfs_put_client(prev);
+ } while (clp != NULL);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
return res;
@@ -420,11 +418,7 @@ __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
res = NFS4ERR_OP_NOT_IN_SESSION;
goto out;
}
- /* the callback must come from the MDS personality */
- if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
- res = NFS4ERR_INVAL;
- goto out;
- }
+
res = do_callback_layoutrecall(clp, args);
out:
dprintk("%s: exit with status = %d\n", __func__, res);
@@ -623,6 +617,12 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
if (status)
goto out_putclient;
+ /* The callback must come from the MDS personality */
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
+ status = htonl(NFS4ERR_INVAL);
+ goto out_putclient;
+ }
+
/*
* Check for pending referring calls. If a match is found, a
* related callback was received before the response to the original
@@ -702,11 +702,6 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
dprintk("NFS: RECALL_ANY callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
- /* the callback must come from the MDS personality */
- status = cpu_to_be32(NFS4ERR_NOTSUPP);
- if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
- goto out;
-
status = cpu_to_be32(NFS4ERR_INVAL);
if (!validate_bitmap_values((const unsigned long *)
&args->craa_type_mask))
--
1.7.2.3
nfs4_proc_layoutreturn and its descendants were assuming that
inode and lo were always available, but that is not true in the
case of a bulk return.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/callback_proc.c | 1 +
fs/nfs/nfs4proc.c | 37 ++++++++++++++++++-------------------
fs/nfs/pnfs.c | 4 +++-
include/linux/nfs_xdr.h | 1 +
4 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 3e022a8..53a85648 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -284,6 +284,7 @@ static int pnfs_recall_layout(void *data)
lrp->args.reclaim = 0;
lrp->args.layout_type = rl.cbl_layout_type;
lrp->args.return_type = rl.cbl_recall_type;
+ lrp->clp = clp;
lrp->args.range = rl.cbl_seg;
lrp->args.inode = inode;
nfs4_proc_layoutreturn(lrp, true);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 73bd44e..8d3965c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5557,23 +5557,23 @@ static void
nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
- struct inode *ino = lrp->args.inode;
- struct nfs_inode *nfsi = NFS_I(ino);
- struct nfs_server *server = NFS_SERVER(ino);
dprintk("--> %s\n", __func__);
- if ((lrp->args.return_type == RETURN_FILE) &&
- pnfs_return_layout_barrier(nfsi, &lrp->args.range)) {
- dprintk("%s: waiting on barrier\n", __func__);
- rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
- return;
+ if (lrp->args.return_type == RETURN_FILE) {
+ struct nfs_inode *nfsi = NFS_I(lrp->args.inode);
+
+ if (pnfs_return_layout_barrier(nfsi, &lrp->args.range)) {
+ dprintk("%s: waiting on barrier\n", __func__);
+ rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
+ return;
+ }
}
if (lrp->stateid) {
/* Forget the layout, without sending the return */
rpc_exit(task, 0);
return;
}
- if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
+ if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
&lrp->res.seq_res, 0, task))
return;
rpc_call_start(task);
@@ -5582,8 +5582,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
- struct inode *ino = lrp->args.inode;
- struct nfs_server *server = NFS_SERVER(ino);
+ struct nfs_server *server;
dprintk("--> %s\n", __func__);
@@ -5593,8 +5592,12 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
if (RPC_ASSASSINATED(task))
return;
- if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
- nfs_restart_rpc(task, server->nfs_client);
+ if (lrp->args.return_type == RETURN_FILE)
+ server = NFS_SERVER(lrp->args.inode);
+ else
+ server = NULL;
+ if (nfs4_async_handle_error(task, server, NULL, lrp->clp) == -EAGAIN)
+ nfs_restart_rpc(task, lrp->clp);
dprintk("<-- %s\n", __func__);
}
@@ -5602,10 +5605,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
static void nfs4_layoutreturn_release(void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
- struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
- dprintk("--> %s return_type %d lo %p\n", __func__,
- lrp->args.return_type, lo);
+ dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
pnfs_layoutreturn_release(lrp);
kfree(calldata);
@@ -5620,8 +5621,6 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
{
- struct inode *ino = lrp->args.inode;
- struct nfs_server *server = NFS_SERVER(ino);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
@@ -5629,7 +5628,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
.rpc_resp = &lrp->res,
};
struct rpc_task_setup task_setup_data = {
- .rpc_client = server->client,
+ .rpc_client = lrp->clp->cl_rpcclient,
.rpc_message = &msg,
.callback_ops = &nfs4_layoutreturn_call_ops,
.callback_data = lrp,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 01ecb95..34f6914 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -590,10 +590,11 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
void
pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
{
- struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+ struct pnfs_layout_hdr *lo;
if (lrp->args.return_type != RETURN_FILE)
return;
+ lo = NFS_I(lrp->args.inode)->layout;
spin_lock(&lrp->args.inode->i_lock);
if (!lrp->res.valid)
; /* forgetful model internal release */
@@ -630,6 +631,7 @@ return_layout(struct inode *ino, struct pnfs_layout_range *range,
lrp->args.range = *range;
lrp->args.inode = ino;
lrp->stateid = stateid;
+ lrp->clp = server->nfs_client;
status = nfs4_proc_layoutreturn(lrp, wait);
out:
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 1ff6cb0..0ee7cce 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -280,6 +280,7 @@ struct nfs4_layoutreturn {
struct nfs4_layoutreturn_res res;
struct rpc_cred *cred;
const nfs4_stateid *stateid;
+ struct nfs_client *clp;
int rpc_status;
};
--
1.7.2.1
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4proc.c | 3 +--
include/linux/nfs_xdr.h | 1 -
2 files changed, 1 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index de3ed2f..7b42a33 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5395,7 +5395,6 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
return;
}
}
- lgp->status = task->tk_status;
dprintk("<-- %s\n", __func__);
}
@@ -5451,7 +5450,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
status = nfs4_wait_for_completion_rpc_task(task);
if (status != 0)
goto out;
- status = lgp->status;
+ status = task->tk_status;
if (status != 0)
goto out;
status = pnfs_layout_process(lgp);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ebe11d3..f472405 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -223,7 +223,6 @@ struct nfs4_layoutget {
struct nfs4_layoutget_args args;
struct nfs4_layoutget_res res;
struct pnfs_layout_segment **lsegpp;
- int status;
};
struct nfs4_layoutcommit_args {
--
1.7.2.1
From: Andy Adamson <[email protected]>
Separate the layoutcommit operation args from the layoutcommit compound args
in preparation to add the layoutcommit operation to the close compound
when return-on-close set and layoutcommit is needed prior to the layoutreturn.
- Move pnfs_layoutcommit_arg inode to pnfs_layoutcommit_data because it is
not needed for encode_layoutcommit.
- Move pnfs_layoutcommit_data rpc_cred to pnfs_layoutcommit_arg so that
pnfs_layoutcommit_setup can be called with pnfs_layoutcommit_arg only.
- Move layoutcommit operation fields from pnfs_layoutcommit_arg to a new
struct nfs_layoutcommit_op_args which is passed to encode_layoutcommit.
This new structure will also be used for embedded layoutcommit calls.
Remove unused fields:
- Remove unused pnfs_layoutcommit_data rpc_task.
- Remove unused pnfs_layoutcommit_arg time_modify_changed and time_modify.
- Remove unused pnfs_layoutcommit_arg void layoutdriver_data which will be
restored for the block layoutdriver.
- Remove unused sizechanged and newsize from pnfs_layoutcommit_res.
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4proc.c | 23 +++++++++++------------
fs/nfs/nfs4xdr.c | 27 ++++++++++-----------------
fs/nfs/pnfs.c | 21 +++++++++------------
include/linux/nfs_xdr.h | 19 +++++++++----------
4 files changed, 39 insertions(+), 51 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f9b210e..bf179bb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5516,7 +5516,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *data)
{
struct nfs4_layoutcommit_data *ldata =
(struct nfs4_layoutcommit_data *)data;
- struct nfs_server *server = NFS_SERVER(ldata->args.inode);
+ struct nfs_server *server = NFS_SERVER(ldata->inode);
if (nfs4_setup_sequence(server, NULL, &ldata->args.seq_args,
&ldata->res.seq_res, 1, task))
@@ -5529,7 +5529,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutcommit_data *data =
(struct nfs4_layoutcommit_data *)calldata;
- struct nfs_server *server = NFS_SERVER(data->args.inode);
+ struct nfs_server *server = NFS_SERVER(data->inode);
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -5546,8 +5546,8 @@ static void nfs4_layoutcommit_release(void *lcdata)
(struct nfs4_layoutcommit_data *)lcdata;
/* Matched by get_layout in pnfs_layoutcommit_inode */
- put_layout_hdr(data->args.inode);
- put_rpccred(data->cred);
+ put_layout_hdr(data->inode);
+ put_rpccred(data->args.cred);
kfree(lcdata);
}
@@ -5565,11 +5565,11 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = data->cred,
+ .rpc_cred = data->args.cred,
};
struct rpc_task_setup task_setup_data = {
.task = &data->task,
- .rpc_client = NFS_CLIENT(data->args.inode),
+ .rpc_client = NFS_CLIENT(data->inode),
.rpc_message = &msg,
.callback_ops = &nfs4_layoutcommit_ops,
.callback_data = data,
@@ -5578,13 +5578,12 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, int issync)
struct rpc_task *task;
int status = 0;
- dprintk("NFS: %4d initiating layoutcommit call. %llu@%llu lbw: %llu "
+ dprintk("NFS: initiating layoutcommit call. %llu@%llu lbw: %llu "
"type: %d issync %d\n",
- data->task.tk_pid,
- data->args.range.length,
- data->args.range.offset,
- data->args.lastbytewritten,
- data->args.layout_type, issync);
+ data->args.op.range.length,
+ data->args.op.range.offset,
+ data->args.op.lastbytewritten,
+ data->args.op.layout_type, issync);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5c7ee03..1c8a3c4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1868,7 +1868,7 @@ encode_layoutget(struct xdr_stream *xdr,
static int
encode_layoutcommit(struct xdr_stream *xdr,
- const struct nfs4_layoutcommit_args *args,
+ const struct nfs4_layoutcommit_op_args *args,
struct compound_hdr *hdr)
{
__be32 *p;
@@ -1885,14 +1885,7 @@ encode_layoutcommit(struct xdr_stream *xdr,
p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(1); /* newoffset = TRUE */
p = xdr_encode_hyper(p, args->lastbytewritten);
- *p = cpu_to_be32(args->time_modify_changed != 0);
- if (args->time_modify_changed) {
- p = reserve_space(xdr, 12);
- *p++ = cpu_to_be32(0);
- *p++ = cpu_to_be32(args->time_modify.tv_sec);
- *p = cpu_to_be32(args->time_modify.tv_nsec);
- }
-
+ *p = cpu_to_be32(0); /* nt_timechanged = FALSE */
p = reserve_space(xdr, 4);
*p = cpu_to_be32(args->layout_type);
@@ -2818,7 +2811,7 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, uint32_t *p,
encode_compound_hdr(&xdr, req, &hdr);
encode_sequence(&xdr, &args->seq_args, &hdr);
encode_putfh(&xdr, args->fh, &hdr);
- encode_layoutcommit(&xdr, args, &hdr);
+ encode_layoutcommit(&xdr, &args->op, &hdr);
encode_getfattr(&xdr, args->bitmask, &hdr);
encode_nops(&hdr);
return 0;
@@ -5307,10 +5300,10 @@ out_overflow:
return -EIO;
}
-static int decode_layoutcommit(struct xdr_stream *xdr,
- struct rpc_rqst *req,
- struct nfs4_layoutcommit_res *res)
+static int decode_layoutcommit(struct xdr_stream *xdr)
{
+ u32 sizechanged;
+ u64 newsize;
__be32 *p;
int status;
@@ -5321,13 +5314,13 @@ static int decode_layoutcommit(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
- res->sizechanged = be32_to_cpup(p);
+ sizechanged = be32_to_cpup(p);
- if (res->sizechanged) {
+ if (sizechanged) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
- xdr_decode_hyper(p, &res->newsize);
+ xdr_decode_hyper(p, &newsize);
}
return 0;
out_overflow:
@@ -6462,7 +6455,7 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, uint32_t *p,
status = decode_putfh(&xdr);
if (status)
goto out;
- status = decode_layoutcommit(&xdr, rqstp, res);
+ status = decode_layoutcommit(&xdr);
if (status)
goto out;
decode_getfattr(&xdr, res->fattr, res->server,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2072522..0d5d95c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1264,21 +1264,18 @@ pnfs_layoutcommit_setup(struct inode *inode,
dprintk("--> %s\n", __func__);
- data->args.inode = inode;
+ data->inode = inode;
data->args.fh = NFS_FH(inode);
- data->args.layout_type = nfss->pnfs_curr_ld->id;
+ data->args.op.layout_type = nfss->pnfs_curr_ld->id;
data->res.fattr = &data->fattr;
nfs_fattr_init(&data->fattr);
- /* TODO: Need to determine the correct values */
- data->args.time_modify_changed = 0;
-
/* Set values from inode so it can be reset
*/
- data->args.range.iomode = IOMODE_RW;
- data->args.range.offset = write_begin_pos;
- data->args.range.length = write_end_pos - write_begin_pos + 1;
- data->args.lastbytewritten = min(write_end_pos,
+ data->args.op.range.iomode = IOMODE_RW;
+ data->args.op.range.offset = write_begin_pos;
+ data->args.op.range.length = write_end_pos - write_begin_pos + 1;
+ data->args.op.lastbytewritten = min(write_end_pos,
i_size_read(inode) - 1);
data->args.bitmask = nfss->attr_bitmask;
data->res.server = nfss;
@@ -1318,12 +1315,12 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
*/
write_begin_pos = nfsi->layout->write_begin_pos;
write_end_pos = nfsi->layout->write_end_pos;
- data->cred = nfsi->layout->cred;
+ data->args.cred = nfsi->layout->cred;
nfsi->layout->write_begin_pos = 0;
nfsi->layout->write_end_pos = 0;
nfsi->layout->cred = NULL;
__clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->plh_flags);
- pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
+ pnfs_copy_layout_stateid(&data->args.op.stateid, nfsi->layout);
/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
get_layout_hdr(NFS_I(inode)->layout);
@@ -1335,7 +1332,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
write_end_pos);
if (status) {
/* The layout driver failed to setup the layoutcommit */
- put_rpccred(data->cred);
+ put_rpccred(data->args.cred);
put_layout_hdr(inode);
goto out_free;
}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6c4ba71..851b09f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -225,25 +225,24 @@ struct nfs4_layoutget {
struct pnfs_layout_segment **lsegpp;
};
-struct nfs4_layoutcommit_args {
+struct nfs4_layoutcommit_op_args {
nfs4_stateid stateid;
__u64 lastbytewritten;
- __u32 time_modify_changed;
- struct timespec time_modify;
- const u32 *bitmask;
- struct nfs_fh *fh;
- struct inode *inode;
/* Values set by layout driver */
struct pnfs_layout_range range;
__u32 layout_type;
- void *layoutdriver_data;
+};
+
+struct nfs4_layoutcommit_args {
+ struct nfs4_layoutcommit_op_args op;
+ const u32 *bitmask;
+ struct nfs_fh *fh;
+ struct rpc_cred *cred;
struct nfs4_sequence_args seq_args;
};
struct nfs4_layoutcommit_res {
- __u32 sizechanged;
- __u64 newsize;
struct nfs_fattr *fattr;
const struct nfs_server *server;
struct nfs4_sequence_res seq_res;
@@ -251,7 +250,7 @@ struct nfs4_layoutcommit_res {
struct nfs4_layoutcommit_data {
struct rpc_task task;
- struct rpc_cred *cred;
+ struct inode *inode;
struct nfs_fattr fattr;
struct nfs4_layoutcommit_args args;
struct nfs4_layoutcommit_res res;
--
1.7.2.1
We only use lseg->range, and we will need the function for
the callback code, where we have only the range, and
not an enclosing lseg.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 16 ++++++++--------
1 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d9a867f..ca8be8d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -346,11 +346,11 @@ EXPORT_SYMBOL_GPL(put_lseg);
* READ RW false
*/
static int
-should_free_lseg(struct pnfs_layout_segment *lseg,
- struct pnfs_layout_range *range)
+should_free_lseg(struct pnfs_layout_range *lseg_range,
+ struct pnfs_layout_range *recall_range)
{
- return (range->iomode == IOMODE_ANY ||
- lseg->range.iomode == range->iomode);
+ return (recall_range->iomode == IOMODE_ANY ||
+ lseg_range->iomode == recall_range->iomode);
}
static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
@@ -378,7 +378,7 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
assert_spin_locked(&lo->inode->i_lock);
list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
- if (should_free_lseg(lseg, range)) {
+ if (should_free_lseg(&lseg->range, range)) {
dprintk("%s: freeing lseg %p iomode %d "
"offset %llu length %llu\n", __func__,
lseg, lseg->range.iomode, lseg->range.offset,
@@ -564,7 +564,7 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
assert_spin_locked(&lo->inode->i_lock);
list_for_each_entry(lseg, &lo->segs, fi_list)
- if (should_free_lseg(lseg, range)) {
+ if (should_free_lseg(&lseg->range, range)) {
out = lseg;
break;
}
@@ -586,7 +586,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
spin_lock(&nfsi->vfs_inode.i_lock);
list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
- if (should_free_lseg(lseg, range)) {
+ if (should_free_lseg(&lseg->range, range)) {
ret = true;
break;
}
@@ -662,7 +662,7 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
}
list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
- if (should_free_lseg(lseg, &arg))
+ if (should_free_lseg(&lseg->range, &arg))
mark_lseg_invalid(lseg, &tmp_list);
/* Reference matched in nfs4_layoutreturn_release */
get_layout_hdr(lo);
--
1.7.2.1
Instead, have mark_invalid function that marks lseg invalid and
removes the reference that holds it in the list. Now when io is finished,
the lseg will automatically be removed from the list. This is
at the heart of many of the upcoming cb_layoutrecall changes.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4xdr.c | 3 +-
fs/nfs/pnfs.c | 145 ++++++++++++++++++++++++++++++++++-------------------
fs/nfs/pnfs.h | 1 +
3 files changed, 95 insertions(+), 54 deletions(-)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 238eeb2..6d9ef2b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
p = xdr_encode_hyper(p, args->range.offset);
p = xdr_encode_hyper(p, args->range.length);
- pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
- NULL);
+ pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
p = xdr_encode_opaque_fixed(p, &stateid.data,
NFS4_STATEID_SIZE);
p = reserve_space(xdr, 4);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3bbe3be..4e5c68b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
lseg->layout = lo;
}
+static void
+_put_lseg_common(struct pnfs_layout_segment *lseg)
+{
+ BUG_ON(lseg->valid == true);
+ list_del(&lseg->fi_list);
+ if (list_empty(&lseg->layout->segs)) {
+ struct nfs_client *clp;
+
+ clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
+ spin_lock(&clp->cl_lock);
+ /* List does not take a reference, so no need for put here */
+ list_del_init(&lseg->layout->layouts);
+ spin_unlock(&clp->cl_lock);
+ pnfs_invalidate_layout_stateid(lseg->layout);
+ }
+ rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
+}
+
+/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
+ * could sleep, so must be called outside of the lock.
+ */
+static void
+put_lseg_locked(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
+{
+ dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
+ atomic_read(&lseg->pls_refcount), lseg->valid);
+ if (atomic_dec_and_test(&lseg->pls_refcount)) {
+ _put_lseg_common(lseg);
+ list_add(&lseg->fi_list, tmp_list);
+ }
+}
+
void
put_lseg(struct pnfs_layout_segment *lseg)
{
- bool do_wake_up;
struct inode *ino;
if (!lseg)
@@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount), lseg->valid);
- do_wake_up = !lseg->valid;
ino = lseg->layout->inode;
- if (atomic_dec_and_test(&lseg->pls_refcount)) {
+ if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
+ _put_lseg_common(lseg);
+ spin_unlock(&ino->i_lock);
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
- if (do_wake_up)
- rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
}
EXPORT_SYMBOL_GPL(put_lseg);
@@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
lseg->range.iomode == range->iomode);
}
-static bool
-_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
+static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
+ struct list_head *tmp_list)
{
- return atomic_read(&lseg->pls_refcount) == 1;
+ assert_spin_locked(&lseg->layout->inode->i_lock);
+ if (lseg->valid) {
+ lseg->valid = false;
+ /* Remove the reference keeping the lseg in the
+ * list. It will now be removed when all
+ * outstanding io is finished.
+ */
+ put_lseg_locked(lseg, tmp_list);
+ }
}
static void
@@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
__func__, lo, range->offset, range->length, range->iomode);
assert_spin_locked(&lo->inode->i_lock);
- list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
- if (!should_free_lseg(lseg, range) ||
- !_pnfs_can_return_lseg(lseg))
- continue;
- dprintk("%s: freeing lseg %p iomode %d "
- "offset %llu length %llu\n", __func__,
- lseg, lseg->range.iomode, lseg->range.offset,
- lseg->range.length);
- list_move(&lseg->fi_list, tmp_list);
- }
- if (list_empty(&lo->segs)) {
- struct nfs_client *clp;
-
- clp = NFS_SERVER(lo->inode)->nfs_client;
- spin_lock(&clp->cl_lock);
- /* List does not take a reference, so no need for put here */
- list_del_init(&lo->layouts);
- spin_unlock(&clp->cl_lock);
- pnfs_invalidate_layout_stateid(lo);
- }
-
+ list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
+ if (should_free_lseg(lseg, range)) {
+ dprintk("%s: freeing lseg %p iomode %d "
+ "offset %llu length %llu\n", __func__,
+ lseg, lseg->range.iomode, lseg->range.offset,
+ lseg->range.length);
+ mark_lseg_invalid(lseg, tmp_list);
+ }
dprintk("%s:Return\n", __func__);
}
static void
-pnfs_free_lseg_list(struct list_head *tmp_list)
+pnfs_free_lseg_list(struct list_head *free_me)
{
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg, *tmp;
+ struct inode *ino;
- while (!list_empty(tmp_list)) {
- lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
- fi_list);
- dprintk("%s calling put_lseg on %p\n", __func__, lseg);
- list_del(&lseg->fi_list);
- put_lseg(lseg);
+ list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
+ BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
+ ino = lseg->layout->inode;
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
+ put_layout_hdr(ino);
}
+ INIT_LIST_HEAD(free_me);
}
void
@@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
dprintk("<-- %s\n", __func__);
}
+/* Layoutreturn may use an invalid stateid, just copy what is there */
+void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
+{
+ int seq;
+
+ do {
+ seq = read_seqbegin(&lo->seqlock);
+ memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
+ } while (read_seqretry(&lo->seqlock, seq));
+}
+
void
pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
struct nfs4_state *open_state)
@@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
return out;
}
+/* Return true if there is layout based io in progress in the given range.
+ * Assumes range has already been marked invalid, and layout marked to
+ * prevent any new lseg from being inserted.
+ */
bool
pnfs_return_layout_barrier(struct nfs_inode *nfsi,
struct pnfs_layout_range *range)
{
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg, *tmp;
bool ret = false;
spin_lock(&nfsi->vfs_inode.i_lock);
- list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
- if (!should_free_lseg(lseg, range))
- continue;
- lseg->valid = false;
- if (!_pnfs_can_return_lseg(lseg)) {
- dprintk("%s: wait on lseg %p refcount %d\n",
- __func__, lseg,
- atomic_read(&lseg->pls_refcount));
+ list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
+ if (should_free_lseg(lseg, range)) {
ret = true;
+ break;
}
- }
spin_unlock(&nfsi->vfs_inode.i_lock);
dprintk("%s:Return %d\n", __func__, ret);
return ret;
@@ -574,12 +611,10 @@ void
pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
{
struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
- LIST_HEAD(tmp_list);
if (lrp->args.return_type != RETURN_FILE)
return;
spin_lock(&lrp->args.inode->i_lock);
- pnfs_clear_lseg_list(lo, &tmp_list, &lrp->args.range);
if (!lrp->res.valid)
; /* forgetful model internal release */
else if (!lrp->res.lrs_present)
@@ -588,7 +623,6 @@ pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
pnfs_set_layout_stateid(lo, &lrp->res.stateid);
put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
spin_unlock(&lrp->args.inode->i_lock);
- pnfs_free_lseg_list(&tmp_list);
}
static int
@@ -641,7 +675,11 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
arg.offset = 0;
arg.length = NFS4_MAX_UINT64;
+ /* probably should BUGON if type != RETURN_FILE */
if (type == RETURN_FILE) {
+ LIST_HEAD(tmp_list);
+ struct pnfs_layout_segment *lseg, *tmp;
+
spin_lock(&ino->i_lock);
lo = nfsi->layout;
if (lo && !has_layout_to_return(lo, &arg))
@@ -652,10 +690,13 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
goto out;
}
+ list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
+ if (should_free_lseg(lseg, &arg))
+ mark_lseg_invalid(lseg, &tmp_list);
/* Reference matched in pnfs_layoutreturn_release */
get_layout_hdr_locked(lo);
-
spin_unlock(&ino->i_lock);
+ pnfs_free_lseg_list(&tmp_list);
if (layoutcommit_needed(nfsi)) {
if (stateid && !wait) { /* callback */
@@ -1171,7 +1212,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
nfsi->layout->write_end_pos = 0;
nfsi->layout->cred = NULL;
__clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
- pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout, NULL);
+ pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
get_layout_hdr_locked(NFS_I(inode)->layout);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 05dd5e0..000acf0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -206,6 +206,7 @@ void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
void put_layout_hdr(struct inode *inode);
+void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
struct nfs4_state *open_state);
--
1.7.2.1
From: Andy Adamson <[email protected]>
Prepare for adding the layoutcommit operation to the close compound on
return-on-close.
Have pnfs_layoutcommit_setup handle all the layoutcommit operation setup.
Have pnfs_layoutcommit_inode handle all the layoutcommit compound setup.
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 104 ++++++++++++++++++++++++++-------------------------------
fs/nfs/pnfs.h | 3 ++
2 files changed, 50 insertions(+), 57 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0d5d95c..521ee9d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1252,97 +1252,87 @@ pnfs_try_to_commit(struct nfs_write_data *data,
}
/*
- * Set up the argument/result storage required for the RPC call.
+ * Set up the arguments required for the RPC call.
*/
-static int
+void
pnfs_layoutcommit_setup(struct inode *inode,
- struct nfs4_layoutcommit_data *data,
- loff_t write_begin_pos, loff_t write_end_pos)
+ struct nfs4_layoutcommit_op_args *args, bool use_cred)
{
- struct nfs_server *nfss = NFS_SERVER(inode);
- int result = 0;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ loff_t write_begin_pos, write_end_pos;
dprintk("--> %s\n", __func__);
- data->inode = inode;
- data->args.fh = NFS_FH(inode);
- data->args.op.layout_type = nfss->pnfs_curr_ld->id;
- data->res.fattr = &data->fattr;
- nfs_fattr_init(&data->fattr);
+ assert_spin_locked(&inode->i_lock);
- /* Set values from inode so it can be reset
+ /*
+ * Clear layoutcommit properties in the inode so
+ * new layoutcommit info can be generated
*/
- data->args.op.range.iomode = IOMODE_RW;
- data->args.op.range.offset = write_begin_pos;
- data->args.op.range.length = write_end_pos - write_begin_pos + 1;
- data->args.op.lastbytewritten = min(write_end_pos,
- i_size_read(inode) - 1);
- data->args.bitmask = nfss->attr_bitmask;
- data->res.server = nfss;
-
- dprintk("<-- %s Status %d\n", __func__, result);
- return result;
+ write_begin_pos = nfsi->layout->write_begin_pos;
+ write_end_pos = nfsi->layout->write_end_pos;
+ nfsi->layout->write_begin_pos = 0;
+ nfsi->layout->write_end_pos = 0;
+ /* In the true case, caller has passed on the cred to another struct */
+ if (use_cred == false)
+ put_rpccred(nfsi->layout->cred);
+ nfsi->layout->cred = NULL;
+ __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->plh_flags);
+ /* FIXME: figure out what to do here */
+ pnfs_copy_layout_stateid(&args->stateid, nfsi->layout);
+
+ args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id;
+
+ args->range.iomode = IOMODE_RW;
+ args->range.offset = write_begin_pos;
+ args->range.length = write_end_pos - write_begin_pos + 1;
+ args->lastbytewritten = min(write_end_pos, i_size_read(inode) - 1);
}
-/* Issue a async layoutcommit for an inode.
+/*
+ * Issue a async layoutcommit for an inode.
+ * Returns 0 on success, negative value for error
*/
int
pnfs_layoutcommit_inode(struct inode *inode, int sync)
{
struct nfs4_layoutcommit_data *data;
- struct nfs_inode *nfsi = NFS_I(inode);
- loff_t write_begin_pos;
- loff_t write_end_pos;
-
- int status = 0;
+ int status = -ENOMEM;
dprintk("%s Begin (sync:%d)\n", __func__, sync);
- BUG_ON(!has_layout(nfsi));
-
data = kzalloc(sizeof(*data), GFP_NOFS);
if (!data)
- return -ENOMEM;
+ goto out;
+ status = 0;
spin_lock(&inode->i_lock);
- if (!layoutcommit_needed(nfsi)) {
+ if (!layoutcommit_needed(NFS_I(inode))) {
spin_unlock(&inode->i_lock);
- goto out_free;
+ kfree(data);
+ goto out;
}
+ /* Use the layoutcommit cred */
+ data->args.cred = NFS_I(inode)->layout->cred;
- /* Clear layoutcommit properties in the inode so
- * new lc info can be generated
- */
- write_begin_pos = nfsi->layout->write_begin_pos;
- write_end_pos = nfsi->layout->write_end_pos;
- data->args.cred = nfsi->layout->cred;
- nfsi->layout->write_begin_pos = 0;
- nfsi->layout->write_end_pos = 0;
- nfsi->layout->cred = NULL;
- __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->plh_flags);
- pnfs_copy_layout_stateid(&data->args.op.stateid, nfsi->layout);
+ /* Set up layoutcommit operation args */
+ pnfs_layoutcommit_setup(inode, &data->args.op, true);
/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
get_layout_hdr(NFS_I(inode)->layout);
-
spin_unlock(&inode->i_lock);
- /* Set up layout commit args */
- status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
- write_end_pos);
- if (status) {
- /* The layout driver failed to setup the layoutcommit */
- put_rpccred(data->args.cred);
- put_layout_hdr(inode);
- goto out_free;
- }
+ data->args.fh = NFS_FH(inode);
+ data->args.bitmask = NFS_SERVER(inode)->attr_bitmask;
+
+ data->inode = inode;
+ data->res.server = NFS_SERVER(inode);
+ data->res.fattr = &data->fattr;
+ nfs_fattr_init(&data->fattr);
status = nfs4_proc_layoutcommit(data, sync);
out:
dprintk("%s end (err:%d)\n", __func__, status);
return status;
-out_free:
- kfree(data);
- goto out;
}
/*
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 3e04da8..a244b27 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -236,6 +236,9 @@ void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
int notify_bit, atomic_t *notify_count,
struct list_head *tmp_list);
bool pnfs_roc(struct nfs4_closedata *data);
+void pnfs_layoutcommit_setup(struct inode *inode,
+ struct nfs4_layoutcommit_op_args *args,
+ bool use_cred);
static inline bool
has_layout(struct nfs_inode *nfsi)
--
1.7.2.1
On 2010-11-04 17:22, Fred Isaman wrote:
> From: Andy Adamson <[email protected]>
>
> The NFSv4.1 session found in cb_sequence needs to be shared by other
> callback operations in the same cb_compound.
> Hold a reference to the session's nfs_client throughout the cb_compound
> processing.
>
> Move NFS4ERR_RETRY_UNCACHED_REP processing into nfs4_callback_sequence.
>
> Signed-off-by: Andy Adamson <[email protected]>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/callback.h | 24 ++++++--
> fs/nfs/callback_proc.c | 138 ++++++++++++++++++++++++++++--------------------
> fs/nfs/callback_xdr.c | 29 +++++-----
> 3 files changed, 113 insertions(+), 78 deletions(-)
>
> diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
> index 2ce61b8..89fee05 100644
> --- a/fs/nfs/callback.h
> +++ b/fs/nfs/callback.h
> @@ -34,6 +34,11 @@ enum nfs4_callback_opnum {
> OP_CB_ILLEGAL = 10044,
> };
>
> +struct cb_process_state {
> + __be32 drc_status;
> + struct nfs4_session *session;
> +};
> +
> struct cb_compound_hdr_arg {
> unsigned int taglen;
> const char *tag;
> @@ -104,7 +109,8 @@ struct cb_sequenceres {
> };
>
> extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
> - struct cb_sequenceres *res);
> + struct cb_sequenceres *res,
> + struct cb_process_state *cps);
>
> extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
> const nfs4_stateid *stateid);
> @@ -125,14 +131,17 @@ struct cb_recallanyargs {
> uint32_t craa_type_mask;
> };
>
> -extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
> +extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args,
> + void *dummy,
> + struct cb_process_state *cps);
>
> struct cb_recallslotargs {
> struct sockaddr *crsa_addr;
> uint32_t crsa_target_max_slots;
> };
> extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
> - void *dummy);
> + void *dummy,
> + struct cb_process_state *cps);
>
> struct cb_layoutrecallargs {
> struct sockaddr *cbl_addr;
> @@ -147,12 +156,15 @@ struct cb_layoutrecallargs {
>
> extern unsigned nfs4_callback_layoutrecall(
> struct cb_layoutrecallargs *args,
> - void *dummy);
> + void *dummy, struct cb_process_state *cps);
>
> #endif /* CONFIG_NFS_V4_1 */
>
> -extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
> -extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
> +extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
> + struct cb_getattrres *res,
> + struct cb_process_state *cps);
> +extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
> + struct cb_process_state *cps);
>
> #ifdef CONFIG_NFS_V4
> extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 6b560ce..84c5a1b 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -20,8 +20,10 @@
> #ifdef NFS_DEBUG
> #define NFSDBG_FACILITY NFSDBG_CALLBACK
> #endif
> -
> -__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
> +
> +__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
> + struct cb_getattrres *res,
> + struct cb_process_state *cps)
> {
> struct nfs_client *clp;
> struct nfs_delegation *delegation;
> @@ -30,9 +32,13 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
>
> res->bitmap[0] = res->bitmap[1] = 0;
> res->status = htonl(NFS4ERR_BADHANDLE);
> - clp = nfs_find_client(args->addr, 4);
> - if (clp == NULL)
> - goto out;
> + if (cps->session) { /* set in cb_sequence */
> + clp = cps->session->clp;
> + } else {
> + clp = nfs_find_client(args->addr, 4);
> + if (clp == NULL)
> + goto out;
> + }
How about extracting this code out into a helper function?
It's repeated also in nfs4_callback_recall().
>
> dprintk("NFS: GETATTR callback request from %s\n",
> rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
> @@ -60,22 +66,28 @@ out_iput:
> rcu_read_unlock();
> iput(inode);
> out_putclient:
> - nfs_put_client(clp);
> + if (!cps->session)
> + nfs_put_client(clp);
> out:
> dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
> return res->status;
> }
>
> -__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
> +__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
> + struct cb_process_state *cps)
> {
> struct nfs_client *clp;
> struct inode *inode;
> __be32 res;
>
> res = htonl(NFS4ERR_BADHANDLE);
> - clp = nfs_find_client(args->addr, 4);
> - if (clp == NULL)
> - goto out;
> + if (cps->session) { /* set in cb_sequence */
> + clp = cps->session->clp;
> + } else {
> + clp = nfs_find_client(args->addr, 4);
> + if (clp == NULL)
> + goto out;
> + }
>
> dprintk("NFS: RECALL callback request from %s\n",
> rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
> @@ -99,9 +111,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
> }
> iput(inode);
> }
> - clp = nfs_find_client_next(prev);
> - nfs_put_client(prev);
> - } while (clp != NULL);
> + if (!cps->session) {
> + clp = nfs_find_client_next(prev);
> + nfs_put_client(prev);
> + }
> + } while (!cps->session && clp != NULL);
I.e.,
if (cps->session)
break;
(I think this is simpler)
> out:
> dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
> return res;
> @@ -346,46 +360,40 @@ static int pnfs_recall_all_layouts(struct nfs_client *clp)
> }
>
> __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
> - void *dummy)
> + void *dummy, struct cb_process_state *cps)
> {
> struct nfs_client *clp;
> struct inode *inode = NULL;
> __be32 res;
> int status;
> - unsigned int num_client = 0;
>
> dprintk("%s: -->\n", __func__);
>
> res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
> - clp = nfs_find_client(args->cbl_addr, 4);
> - if (clp == NULL)
> + if (cps->session) /* set in cb_sequence */
> + clp = cps->session->clp;
> + else
> goto out;
>
> - res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
> - do {
> - struct nfs_client *prev = clp;
> - num_client++;
> - /* the callback must come from the MDS personality */
> - if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
> - goto loop;
> - /* In the _ALL or _FSID case, we need the inode to get
> - * the nfs_server struct.
> - */
> - inode = nfs_layoutrecall_find_inode(clp, args);
> - if (!inode)
> - goto loop;
> - status = pnfs_async_return_layout(clp, inode, args);
> - if (status)
> - res = cpu_to_be32(NFS4ERR_DELAY);
> - iput(inode);
> -loop:
> - clp = nfs_find_client_next(prev);
> - nfs_put_client(prev);
> - } while (clp != NULL);
> + /* the callback must come from the MDS personality */
> + res = cpu_to_be32(NFS4ERR_NOTSUPP);
> + if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
> + goto out;
>
> + res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
> + /*
> + * In the _ALL or _FSID case, we need the inode to get
> + * the nfs_server struct.
> + */
> + inode = nfs_layoutrecall_find_inode(clp, args);
> + if (!inode)
> + goto out;
> + status = pnfs_async_return_layout(clp, inode, args);
> + if (status)
> + res = cpu_to_be32(NFS4ERR_DELAY);
> + iput(inode);
> out:
> - dprintk("%s: exit with status = %d numclient %u\n",
> - __func__, ntohl(res), num_client);
> + dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
> return res;
> }
>
> @@ -552,12 +560,15 @@ out:
> }
>
> __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
> - struct cb_sequenceres *res)
> + struct cb_sequenceres *res,
> + struct cb_process_state *cps)
> {
> struct nfs_client *clp;
> int i;
> __be32 status;
>
> + cps->session = NULL;
> +
> status = htonl(NFS4ERR_BADSESSION);
> clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
> if (clp == NULL)
> @@ -583,21 +594,27 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
> res->csr_slotid = args->csa_slotid;
> res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
> res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
> + cps->session = clp->cl_session; /* caller must put nfs_client */
>
> -out_putclient:
> - nfs_put_client(clp);
> out:
> for (i = 0; i < args->csa_nrclists; i++)
> kfree(args->csa_rclists[i].rcl_refcalls);
> kfree(args->csa_rclists);
>
> - if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
> + if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
> res->csr_status = 0;
> - else
> + cps->drc_status = status;
> + status = 0;
> + } else
> res->csr_status = status;
> +
> dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
> ntohl(status), ntohl(res->csr_status));
> return status;
> +
> +out_putclient:
> + nfs_put_client(clp);
> + goto out;
> }
>
> static inline bool
> @@ -624,24 +641,31 @@ validate_bitmap_values(const unsigned long *mask)
> return false;
> }
>
> -__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
> +__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
> + struct cb_process_state *cps)
> {
> struct nfs_client *clp;
> __be32 status;
> fmode_t flags = 0;
>
> status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
> - clp = nfs_find_client(args->craa_addr, 4);
> - if (clp == NULL)
> + if (cps->session) /* set in cb_sequence */
> + clp = cps->session->clp;
> + else
> goto out;
>
> dprintk("NFS: RECALL_ANY callback request from %s\n",
> rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
>
> + /* the callback must come from the MDS personality */
> + status = cpu_to_be32(NFS4ERR_NOTSUPP);
> + if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
> + goto out;
> +
wouldn't it be simpler to do that once in cb_sequence?
I'll send a patch as reply to this message with my proposals...
Benny
> status = cpu_to_be32(NFS4ERR_INVAL);
> if (!validate_bitmap_values((const unsigned long *)
> &args->craa_type_mask))
> - goto out_put;
> + goto out;
>
> status = cpu_to_be32(NFS4_OK);
> if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
> @@ -657,23 +681,23 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
>
> if (flags)
> nfs_expire_all_delegation_types(clp, flags);
> -out_put:
> - nfs_put_client(clp);
> out:
> dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
> return status;
> }
>
> /* Reduce the fore channel's max_slots to the target value */
> -__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
> +__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
> + struct cb_process_state *cps)
> {
> struct nfs_client *clp;
> struct nfs4_slot_table *fc_tbl;
> __be32 status;
>
> status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
> - clp = nfs_find_client(args->crsa_addr, 4);
> - if (clp == NULL)
> + if (cps->session) /* set in cb_sequence */
> + clp = cps->session->clp;
> + else
> goto out;
>
> dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
> @@ -685,16 +709,14 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
> status = htonl(NFS4ERR_BAD_HIGH_SLOT);
> if (args->crsa_target_max_slots > fc_tbl->max_slots ||
> args->crsa_target_max_slots < 1)
> - goto out_putclient;
> + goto out;
>
> status = htonl(NFS4_OK);
> if (args->crsa_target_max_slots == fc_tbl->max_slots)
> - goto out_putclient;
> + goto out;
>
> fc_tbl->target_max_slots = args->crsa_target_max_slots;
> nfs41_handle_recall_slot(clp);
> -out_putclient:
> - nfs_put_client(clp); /* balance nfs_find_client */
> out:
> dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
> return status;
> diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
> index 63b17d0..1650ab0 100644
> --- a/fs/nfs/callback_xdr.c
> +++ b/fs/nfs/callback_xdr.c
> @@ -12,6 +12,7 @@
> #include <linux/slab.h>
> #include "nfs4_fs.h"
> #include "callback.h"
> +#include "internal.h"
>
> #define CB_OP_TAGLEN_MAXSZ (512)
> #define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
> @@ -34,7 +35,8 @@
> /* Internal error code */
> #define NFS4ERR_RESOURCE_HDR 11050
>
> -typedef __be32 (*callback_process_op_t)(void *, void *);
> +typedef __be32 (*callback_process_op_t)(void *, void *,
> + struct cb_process_state *);
> typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
> typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
>
> @@ -676,7 +678,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
> static __be32 process_op(uint32_t minorversion, int nop,
> struct svc_rqst *rqstp,
> struct xdr_stream *xdr_in, void *argp,
> - struct xdr_stream *xdr_out, void *resp, int* drc_status)
> + struct xdr_stream *xdr_out, void *resp,
> + struct cb_process_state *cps)
> {
> struct callback_op *op = &callback_ops[0];
> unsigned int op_nr;
> @@ -699,8 +702,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
> if (status)
> goto encode_hdr;
>
> - if (*drc_status) {
> - status = *drc_status;
> + if (cps->drc_status) {
> + status = cps->drc_status;
> goto encode_hdr;
> }
>
> @@ -708,16 +711,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
> if (maxlen > 0 && maxlen < PAGE_SIZE) {
> status = op->decode_args(rqstp, xdr_in, argp);
> if (likely(status == 0))
> - status = op->process_op(argp, resp);
> + status = op->process_op(argp, resp, cps);
> } else
> status = htonl(NFS4ERR_RESOURCE);
>
> - /* Only set by OP_CB_SEQUENCE processing */
> - if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
> - *drc_status = status;
> - status = 0;
> - }
> -
> encode_hdr:
> res = encode_op_hdr(xdr_out, op_nr, status);
> if (unlikely(res))
> @@ -736,8 +733,10 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
> struct cb_compound_hdr_arg hdr_arg = { 0 };
> struct cb_compound_hdr_res hdr_res = { NULL };
> struct xdr_stream xdr_in, xdr_out;
> - __be32 *p;
> - __be32 status, drc_status = 0;
> + __be32 *p, status;
> + struct cb_process_state cps = {
> + .drc_status = 0,
> + };
> unsigned int nops = 0;
>
> dprintk("%s: start\n", __func__);
> @@ -758,7 +757,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
>
> while (status == 0 && nops != hdr_arg.nops) {
> status = process_op(hdr_arg.minorversion, nops, rqstp,
> - &xdr_in, argp, &xdr_out, resp, &drc_status);
> + &xdr_in, argp, &xdr_out, resp, &cps);
> nops++;
> }
>
> @@ -771,6 +770,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
>
> *hdr_res.status = status;
> *hdr_res.nops = htonl(nops);
> + if (cps.session) /* matched by cb_sequence find_client_with_session */
> + nfs_put_client(cps.session->clp);
> dprintk("%s: done, status = %u\n", __func__, ntohl(status));
> return rpc_success;
> }
From: Andy Adamson <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
---
fs/nfs/nfs4proc.c | 73 +++++++++++++++++++++++++++++++++-------------
fs/nfs/nfs4state.c | 18 +-----------
fs/nfs/nfs4xdr.c | 14 ++++++++-
fs/nfs/pnfs.c | 64 +++++++++++++++++++++++++++++++++++++----
fs/nfs/pnfs.h | 1 +
include/linux/nfs_xdr.h | 19 ++++++++++++
6 files changed, 143 insertions(+), 46 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87b2b63..f9b210e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -74,6 +74,8 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs_fattr *fattr, struct iattr *sattr,
struct nfs4_state *state);
+static void nfs4_layoutreturn_set_stateid(struct inode *ino,
+ struct nfs4_layoutreturn_res *res);
/* Prevent leaks of NFSv4 errors into userland */
static int nfs4_map_errors(int err)
@@ -1821,16 +1823,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
return err;
}
-struct nfs4_closedata {
- struct path path;
- struct inode *inode;
- struct nfs4_state *state;
- struct nfs_closeargs arg;
- struct nfs_closeres res;
- struct nfs_fattr fattr;
- unsigned long timestamp;
-};
-
static void nfs4_free_closedata(void *data)
{
struct nfs4_closedata *calldata = data;
@@ -1840,6 +1832,17 @@ static void nfs4_free_closedata(void *data)
nfs_free_seqid(calldata->arg.seqid);
nfs4_put_state_owner(sp);
path_put(&calldata->path);
+ if (calldata->res.op_bitmask & NFS4_HAS_LAYOUTRETURN) {
+ struct pnfs_layout_hdr *lo = NFS_I(calldata->inode)->layout;
+
+ spin_lock(&lo->inode->i_lock);
+ lo->plh_block_lgets--;
+ lo->plh_outstanding--;
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(lo->inode)->lo_rpcwaitq_stateid);
+ spin_unlock(&lo->inode->i_lock);
+ put_layout_hdr(lo->inode);
+ }
kfree(calldata);
}
@@ -1869,6 +1872,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
switch (task->tk_status) {
case 0:
nfs_set_open_stateid(state, &calldata->res.stateid, 0);
+ if (calldata->res.op_bitmask & NFS4_HAS_LAYOUTRETURN)
+ nfs4_layoutreturn_set_stateid(calldata->inode,
+ &calldata->res.lr_res);
renew_lease(server, calldata->timestamp);
nfs4_close_clear_stateid_flags(state,
calldata->arg.fmode);
@@ -1920,8 +1926,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
return;
}
- if (calldata->arg.fmode == 0)
+ if (calldata->arg.fmode == 0) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
+ /* Are there layout segments to return on close? */
+ if (pnfs_roc(calldata)) {
+ struct nfs_inode *nfsi = NFS_I(calldata->inode);
+ if (pnfs_return_layout_barrier(nfsi,
+ &calldata->arg.lr_args.range)) {
+ dprintk("%s: waiting on barrier\n", __func__);
+ /* FIXME race with wake here */
+ rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
+ spin_lock(&calldata->inode->i_lock);
+ nfsi->layout->plh_block_lgets--;
+ nfsi->layout->plh_outstanding--;
+ if (!pnfs_layoutgets_blocked(nfsi->layout, NULL))
+ rpc_wake_up(&nfsi->lo_rpcwaitq_stateid);
+ spin_unlock(&calldata->inode->i_lock);
+ put_layout_hdr(calldata->inode);
+ return;
+ }
+ }
+ }
nfs_fattr_init(calldata->res.fattr);
calldata->timestamp = jiffies;
@@ -5587,6 +5612,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
if (pnfs_return_layout_barrier(nfsi, &lrp->args.range)) {
dprintk("%s: waiting on barrier\n", __func__);
+ /* FIXME race with wake here */
rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
return;
}
@@ -5602,6 +5628,19 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
rpc_call_start(task);
}
+static void nfs4_layoutreturn_set_stateid(struct inode *ino,
+ struct nfs4_layoutreturn_res *res)
+{
+ struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
+
+ spin_lock(&ino->i_lock);
+ if (res->lrs_present)
+ pnfs_set_layout_stateid(lo, &res->stateid, true);
+ else
+ BUG_ON(!list_empty(&lo->segs));
+ spin_unlock(&ino->i_lock);
+}
+
static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
@@ -5620,16 +5659,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
nfs_restart_rpc(task, lrp->clp);
return;
}
- if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) {
- struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
-
- spin_lock(&lo->inode->i_lock);
- if (lrp->res.lrs_present)
- pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- else
- BUG_ON(!list_empty(&lo->segs));
- spin_unlock(&lo->inode->i_lock);
- }
+ if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE))
+ nfs4_layoutreturn_set_stateid(lrp->args.inode, &lrp->res);
dprintk("<-- %s\n", __func__);
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ceb0d66..784f122 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -601,24 +601,8 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
if (!call_close) {
nfs4_put_open_state(state);
nfs4_put_state_owner(owner);
- } else {
- u32 roc_iomode;
- struct nfs_inode *nfsi = NFS_I(state->inode);
-
- if (has_layout(nfsi) &&
- (roc_iomode = pnfs_layout_roc_iomode(nfsi)) != 0) {
- struct pnfs_layout_range range = {
- .iomode = roc_iomode,
- .offset = 0,
- .length = NFS4_MAX_UINT64,
- };
-
- pnfs_return_layout(state->inode, &range, NULL,
- RETURN_FILE, wait);
- }
-
+ } else
nfs4_do_close(path, state, gfp_mask, wait);
- }
}
void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5208ef7..5c7ee03 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -438,12 +438,14 @@ static int nfs4_stat_to_errno(int);
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_close_maxsz + \
- encode_getattr_maxsz)
+ encode_getattr_maxsz * \
+ encode_layoutreturn_maxsz)
#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_close_maxsz + \
- decode_getattr_maxsz)
+ decode_getattr_maxsz + \
+ decode_layoutreturn_maxsz)
#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
@@ -2142,6 +2144,8 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea
encode_putfh(&xdr, args->fh, &hdr);
encode_close(&xdr, args, &hdr);
encode_getfattr(&xdr, args->bitmask, &hdr);
+ if (args->op_bitmask & NFS4_HAS_LAYOUTRETURN) /* layoutreturn set */
+ encode_layoutreturn(&xdr, &args->lr_args, &hdr);
encode_nops(&hdr);
return 0;
}
@@ -5718,6 +5722,12 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
*/
decode_getfattr(&xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
+ /*
+ * With the forgetful model, we pay no attention to the
+ * layoutreturn status.
+ */
+ if (res->op_bitmask & NFS4_HAS_LAYOUTRETURN)
+ decode_layoutreturn(&xdr, &res->lr_res);
out:
return status;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 02f2eb8..2072522 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -630,6 +630,63 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
return ret;
}
+/*
+ * Return on close
+ *
+ * No LAYOUTRETURNS can be sent when BULK RECALL flag is set.
+ * FIXME: add layoutcommit operation if layoutcommit_needed is true.
+ */
+bool
+pnfs_roc(struct nfs4_closedata *data)
+{
+ struct nfs4_layoutreturn_args *lr_args = &data->arg.lr_args;
+ struct pnfs_layout_hdr *lo;
+ struct pnfs_layout_segment *lseg, *tmp;
+ struct pnfs_layout_range range = {
+ .length = NFS4_MAX_UINT64,
+ };
+ LIST_HEAD(tmp_list);
+ bool found = false;
+
+ spin_lock(&data->inode->i_lock);
+ lo = NFS_I(data->inode)->layout;
+ if (!lo || lo->roc_iomode == 0 ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ goto out_nolayout;
+
+ range.iomode = lo->roc_iomode;
+ list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
+ if (should_free_lseg(&lseg->range, &range)) {
+ mark_lseg_invalid(lseg, &tmp_list);
+ found = true;
+ }
+ if (found == false)
+ goto out_nolayout;
+ /* Stop new and drop response to outstanding LAYOUTGETS */
+ lo->plh_block_lgets++;
+ lo->plh_outstanding++;
+ /* Reference matched in pnfs_layoutreturn_release */
+ get_layout_hdr(lo);
+
+ spin_unlock(&data->inode->i_lock);
+
+ pnfs_free_lseg_list(&tmp_list);
+
+ lr_args->reclaim = 0;
+ lr_args->layout_type = NFS_SERVER(data->inode)->pnfs_curr_ld->id;
+ lr_args->return_type = RETURN_FILE;
+ lr_args->range = range;
+ lr_args->inode = data->inode;
+ data->res.op_bitmask |= NFS4_HAS_LAYOUTRETURN;
+ data->arg.op_bitmask |= NFS4_HAS_LAYOUTRETURN;
+
+ return true;
+
+out_nolayout:
+ spin_unlock(&data->inode->i_lock);
+ return false;
+}
+
static int
return_layout(struct inode *ino, struct pnfs_layout_range *range,
enum pnfs_layoutreturn_type type, struct pnfs_layout_hdr *lo,
@@ -1004,13 +1061,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
*lgp->lsegpp = lseg;
pnfs_insert_layout(lo, lseg);
- if (res->return_on_close) {
- /* FI: This needs to be re-examined. At lo level,
- * all it needs is a bit indicating whether any of
- * the lsegs in the list have the flags set.
- */
+ if (res->return_on_close)
lo->roc_iomode |= res->range.iomode;
- }
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid, false);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index b84e362..3e04da8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -235,6 +235,7 @@ void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *range,
int notify_bit, atomic_t *notify_count,
struct list_head *tmp_list);
+bool pnfs_roc(struct nfs4_closedata *data);
static inline bool
has_layout(struct nfs_inode *nfsi)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f472405..6c4ba71 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -351,12 +351,18 @@ struct nfs_open_confirmres {
/*
* Arguments to the close call.
*/
+
+/* op_bitmask bits */
+#define NFS4_HAS_LAYOUTRETURN 0x01
+
struct nfs_closeargs {
struct nfs_fh * fh;
nfs4_stateid * stateid;
struct nfs_seqid * seqid;
fmode_t fmode;
const u32 * bitmask;
+ u32 op_bitmask; /* which optional ops to encode */
+ struct nfs4_layoutreturn_args lr_args; /* optional */
struct nfs4_sequence_args seq_args;
};
@@ -365,8 +371,21 @@ struct nfs_closeres {
struct nfs_fattr * fattr;
struct nfs_seqid * seqid;
const struct nfs_server *server;
+ u32 op_bitmask; /* which optional ops encoded */
+ struct nfs4_layoutreturn_res lr_res; /* optional */
struct nfs4_sequence_res seq_res;
};
+
+struct nfs4_closedata {
+ struct path path;
+ struct inode *inode;
+ struct nfs4_state *state;
+ struct nfs_closeargs arg;
+ struct nfs_closeres res;
+ struct nfs_fattr fattr;
+ unsigned long timestamp;
+};
+
/*
* * Arguments to the lock,lockt, and locku call.
* */
--
1.7.2.1
See Trond's commit a6f03393ec8 "NFSv4: Get rid of the bogus
RPC_ASSASSINATED(task) checks"
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4proc.c | 6 ------
1 files changed, 0 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7b42a33..be19e225 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5482,9 +5482,6 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
- if (RPC_ASSASSINATED(task))
- return;
-
if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
nfs_restart_rpc(task, server->nfs_client);
@@ -5588,9 +5585,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
if (!nfs4_sequence_done(task, &lrp->res.seq_res))
return;
- if (RPC_ASSASSINATED(task))
- return;
-
if (lrp->args.return_type == RETURN_FILE)
server = NFS_SERVER(lrp->args.inode);
else
--
1.7.2.1
From: Andy Adamson <[email protected]>
The NFSv4.1 session found in cb_sequence needs to be shared by other
callback operations in the same cb_compound.
Hold a reference to the session's nfs_client throughout the cb_compound
processing.
Move NFS4ERR_RETRY_UNCACHED_REP processing into nfs4_callback_sequence.
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/callback.h | 24 ++++++--
fs/nfs/callback_proc.c | 138 ++++++++++++++++++++++++++++--------------------
fs/nfs/callback_xdr.c | 29 +++++-----
3 files changed, 113 insertions(+), 78 deletions(-)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 2ce61b8..89fee05 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -34,6 +34,11 @@ enum nfs4_callback_opnum {
OP_CB_ILLEGAL = 10044,
};
+struct cb_process_state {
+ __be32 drc_status;
+ struct nfs4_session *session;
+};
+
struct cb_compound_hdr_arg {
unsigned int taglen;
const char *tag;
@@ -104,7 +109,8 @@ struct cb_sequenceres {
};
extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args,
- struct cb_sequenceres *res);
+ struct cb_sequenceres *res,
+ struct cb_process_state *cps);
extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
@@ -125,14 +131,17 @@ struct cb_recallanyargs {
uint32_t craa_type_mask;
};
-extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy);
+extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args,
+ void *dummy,
+ struct cb_process_state *cps);
struct cb_recallslotargs {
struct sockaddr *crsa_addr;
uint32_t crsa_target_max_slots;
};
extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
- void *dummy);
+ void *dummy,
+ struct cb_process_state *cps);
struct cb_layoutrecallargs {
struct sockaddr *cbl_addr;
@@ -147,12 +156,15 @@ struct cb_layoutrecallargs {
extern unsigned nfs4_callback_layoutrecall(
struct cb_layoutrecallargs *args,
- void *dummy);
+ void *dummy, struct cb_process_state *cps);
#endif /* CONFIG_NFS_V4_1 */
-extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res);
-extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
+extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+ struct cb_getattrres *res,
+ struct cb_process_state *cps);
+extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+ struct cb_process_state *cps);
#ifdef CONFIG_NFS_V4
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 6b560ce..84c5a1b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -20,8 +20,10 @@
#ifdef NFS_DEBUG
#define NFSDBG_FACILITY NFSDBG_CALLBACK
#endif
-
-__be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
+
+__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
+ struct cb_getattrres *res,
+ struct cb_process_state *cps)
{
struct nfs_client *clp;
struct nfs_delegation *delegation;
@@ -30,9 +32,13 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
res->bitmap[0] = res->bitmap[1] = 0;
res->status = htonl(NFS4ERR_BADHANDLE);
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
- goto out;
+ if (cps->session) { /* set in cb_sequence */
+ clp = cps->session->clp;
+ } else {
+ clp = nfs_find_client(args->addr, 4);
+ if (clp == NULL)
+ goto out;
+ }
dprintk("NFS: GETATTR callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
@@ -60,22 +66,28 @@ out_iput:
rcu_read_unlock();
iput(inode);
out_putclient:
- nfs_put_client(clp);
+ if (!cps->session)
+ nfs_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
return res->status;
}
-__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
+__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
+ struct cb_process_state *cps)
{
struct nfs_client *clp;
struct inode *inode;
__be32 res;
res = htonl(NFS4ERR_BADHANDLE);
- clp = nfs_find_client(args->addr, 4);
- if (clp == NULL)
- goto out;
+ if (cps->session) { /* set in cb_sequence */
+ clp = cps->session->clp;
+ } else {
+ clp = nfs_find_client(args->addr, 4);
+ if (clp == NULL)
+ goto out;
+ }
dprintk("NFS: RECALL callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
@@ -99,9 +111,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
}
iput(inode);
}
- clp = nfs_find_client_next(prev);
- nfs_put_client(prev);
- } while (clp != NULL);
+ if (!cps->session) {
+ clp = nfs_find_client_next(prev);
+ nfs_put_client(prev);
+ }
+ } while (!cps->session && clp != NULL);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
return res;
@@ -346,46 +360,40 @@ static int pnfs_recall_all_layouts(struct nfs_client *clp)
}
__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
- void *dummy)
+ void *dummy, struct cb_process_state *cps)
{
struct nfs_client *clp;
struct inode *inode = NULL;
__be32 res;
int status;
- unsigned int num_client = 0;
dprintk("%s: -->\n", __func__);
res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
- clp = nfs_find_client(args->cbl_addr, 4);
- if (clp == NULL)
+ if (cps->session) /* set in cb_sequence */
+ clp = cps->session->clp;
+ else
goto out;
- res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
- do {
- struct nfs_client *prev = clp;
- num_client++;
- /* the callback must come from the MDS personality */
- if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
- goto loop;
- /* In the _ALL or _FSID case, we need the inode to get
- * the nfs_server struct.
- */
- inode = nfs_layoutrecall_find_inode(clp, args);
- if (!inode)
- goto loop;
- status = pnfs_async_return_layout(clp, inode, args);
- if (status)
- res = cpu_to_be32(NFS4ERR_DELAY);
- iput(inode);
-loop:
- clp = nfs_find_client_next(prev);
- nfs_put_client(prev);
- } while (clp != NULL);
+ /* the callback must come from the MDS personality */
+ res = cpu_to_be32(NFS4ERR_NOTSUPP);
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+ goto out;
+ res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
+ /*
+ * In the _ALL or _FSID case, we need the inode to get
+ * the nfs_server struct.
+ */
+ inode = nfs_layoutrecall_find_inode(clp, args);
+ if (!inode)
+ goto out;
+ status = pnfs_async_return_layout(clp, inode, args);
+ if (status)
+ res = cpu_to_be32(NFS4ERR_DELAY);
+ iput(inode);
out:
- dprintk("%s: exit with status = %d numclient %u\n",
- __func__, ntohl(res), num_client);
+ dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
return res;
}
@@ -552,12 +560,15 @@ out:
}
__be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
- struct cb_sequenceres *res)
+ struct cb_sequenceres *res,
+ struct cb_process_state *cps)
{
struct nfs_client *clp;
int i;
__be32 status;
+ cps->session = NULL;
+
status = htonl(NFS4ERR_BADSESSION);
clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid);
if (clp == NULL)
@@ -583,21 +594,27 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
res->csr_slotid = args->csa_slotid;
res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ cps->session = clp->cl_session; /* caller must put nfs_client */
-out_putclient:
- nfs_put_client(clp);
out:
for (i = 0; i < args->csa_nrclists; i++)
kfree(args->csa_rclists[i].rcl_refcalls);
kfree(args->csa_rclists);
- if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP))
+ if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
res->csr_status = 0;
- else
+ cps->drc_status = status;
+ status = 0;
+ } else
res->csr_status = status;
+
dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
ntohl(status), ntohl(res->csr_status));
return status;
+
+out_putclient:
+ nfs_put_client(clp);
+ goto out;
}
static inline bool
@@ -624,24 +641,31 @@ validate_bitmap_values(const unsigned long *mask)
return false;
}
-__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
+__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
+ struct cb_process_state *cps)
{
struct nfs_client *clp;
__be32 status;
fmode_t flags = 0;
status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
- clp = nfs_find_client(args->craa_addr, 4);
- if (clp == NULL)
+ if (cps->session) /* set in cb_sequence */
+ clp = cps->session->clp;
+ else
goto out;
dprintk("NFS: RECALL_ANY callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ /* the callback must come from the MDS personality */
+ status = cpu_to_be32(NFS4ERR_NOTSUPP);
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+ goto out;
+
status = cpu_to_be32(NFS4ERR_INVAL);
if (!validate_bitmap_values((const unsigned long *)
&args->craa_type_mask))
- goto out_put;
+ goto out;
status = cpu_to_be32(NFS4_OK);
if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
@@ -657,23 +681,23 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
if (flags)
nfs_expire_all_delegation_types(clp, flags);
-out_put:
- nfs_put_client(clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
}
/* Reduce the fore channel's max_slots to the target value */
-__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
+__be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy,
+ struct cb_process_state *cps)
{
struct nfs_client *clp;
struct nfs4_slot_table *fc_tbl;
__be32 status;
status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
- clp = nfs_find_client(args->crsa_addr, 4);
- if (clp == NULL)
+ if (cps->session) /* set in cb_sequence */
+ clp = cps->session->clp;
+ else
goto out;
dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n",
@@ -685,16 +709,14 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy)
status = htonl(NFS4ERR_BAD_HIGH_SLOT);
if (args->crsa_target_max_slots > fc_tbl->max_slots ||
args->crsa_target_max_slots < 1)
- goto out_putclient;
+ goto out;
status = htonl(NFS4_OK);
if (args->crsa_target_max_slots == fc_tbl->max_slots)
- goto out_putclient;
+ goto out;
fc_tbl->target_max_slots = args->crsa_target_max_slots;
nfs41_handle_recall_slot(clp);
-out_putclient:
- nfs_put_client(clp); /* balance nfs_find_client */
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 63b17d0..1650ab0 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include "nfs4_fs.h"
#include "callback.h"
+#include "internal.h"
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
@@ -34,7 +35,8 @@
/* Internal error code */
#define NFS4ERR_RESOURCE_HDR 11050
-typedef __be32 (*callback_process_op_t)(void *, void *);
+typedef __be32 (*callback_process_op_t)(void *, void *,
+ struct cb_process_state *);
typedef __be32 (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *);
typedef __be32 (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *);
@@ -676,7 +678,8 @@ preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op)
static __be32 process_op(uint32_t minorversion, int nop,
struct svc_rqst *rqstp,
struct xdr_stream *xdr_in, void *argp,
- struct xdr_stream *xdr_out, void *resp, int* drc_status)
+ struct xdr_stream *xdr_out, void *resp,
+ struct cb_process_state *cps)
{
struct callback_op *op = &callback_ops[0];
unsigned int op_nr;
@@ -699,8 +702,8 @@ static __be32 process_op(uint32_t minorversion, int nop,
if (status)
goto encode_hdr;
- if (*drc_status) {
- status = *drc_status;
+ if (cps->drc_status) {
+ status = cps->drc_status;
goto encode_hdr;
}
@@ -708,16 +711,10 @@ static __be32 process_op(uint32_t minorversion, int nop,
if (maxlen > 0 && maxlen < PAGE_SIZE) {
status = op->decode_args(rqstp, xdr_in, argp);
if (likely(status == 0))
- status = op->process_op(argp, resp);
+ status = op->process_op(argp, resp, cps);
} else
status = htonl(NFS4ERR_RESOURCE);
- /* Only set by OP_CB_SEQUENCE processing */
- if (status == htonl(NFS4ERR_RETRY_UNCACHED_REP)) {
- *drc_status = status;
- status = 0;
- }
-
encode_hdr:
res = encode_op_hdr(xdr_out, op_nr, status);
if (unlikely(res))
@@ -736,8 +733,10 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
struct cb_compound_hdr_arg hdr_arg = { 0 };
struct cb_compound_hdr_res hdr_res = { NULL };
struct xdr_stream xdr_in, xdr_out;
- __be32 *p;
- __be32 status, drc_status = 0;
+ __be32 *p, status;
+ struct cb_process_state cps = {
+ .drc_status = 0,
+ };
unsigned int nops = 0;
dprintk("%s: start\n", __func__);
@@ -758,7 +757,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
while (status == 0 && nops != hdr_arg.nops) {
status = process_op(hdr_arg.minorversion, nops, rqstp,
- &xdr_in, argp, &xdr_out, resp, &drc_status);
+ &xdr_in, argp, &xdr_out, resp, &cps);
nops++;
}
@@ -771,6 +770,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
*hdr_res.status = status;
*hdr_res.nops = htonl(nops);
+ if (cps.session) /* matched by cb_sequence find_client_with_session */
+ nfs_put_client(cps.session->clp);
dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
}
--
1.7.2.1
Preparing for changes in pnfs_clear_lseg_list
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 37 ++++++++++++++-----------------------
fs/nfs/pnfs.h | 5 +++--
2 files changed, 17 insertions(+), 25 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 72d7ed3..3bbe3be 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -266,41 +266,32 @@ static void
init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
{
INIT_LIST_HEAD(&lseg->fi_list);
- kref_init(&lseg->kref);
+ atomic_set(&lseg->pls_refcount, 1);
+ smp_mb();
lseg->valid = true;
lseg->layout = lo;
}
-/* Called without i_lock held, as the free_lseg call may sleep */
-static void
-destroy_lseg(struct kref *kref)
-{
- struct pnfs_layout_segment *lseg =
- container_of(kref, struct pnfs_layout_segment, kref);
- struct inode *ino = lseg->layout->inode;
-
- dprintk("--> %s\n", __func__);
- NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
- /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
- put_layout_hdr(ino);
-}
-
void
put_lseg(struct pnfs_layout_segment *lseg)
{
bool do_wake_up;
- struct nfs_inode *nfsi;
+ struct inode *ino;
if (!lseg)
return;
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
- atomic_read(&lseg->kref.refcount), lseg->valid);
+ atomic_read(&lseg->pls_refcount), lseg->valid);
do_wake_up = !lseg->valid;
- nfsi = NFS_I(lseg->layout->inode);
- kref_put(&lseg->kref, destroy_lseg);
+ ino = lseg->layout->inode;
+ if (atomic_dec_and_test(&lseg->pls_refcount)) {
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
+ put_layout_hdr(ino);
+ }
if (do_wake_up)
- rpc_wake_up(&nfsi->lo_rpcwaitq);
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
}
EXPORT_SYMBOL_GPL(put_lseg);
@@ -326,7 +317,7 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
static bool
_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
{
- return atomic_read(&lseg->kref.refcount) == 1;
+ return atomic_read(&lseg->pls_refcount) == 1;
}
static void
@@ -570,7 +561,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
if (!_pnfs_can_return_lseg(lseg)) {
dprintk("%s: wait on lseg %p refcount %d\n",
__func__, lseg,
- atomic_read(&lseg->kref.refcount));
+ atomic_read(&lseg->pls_refcount));
ret = true;
}
}
@@ -827,7 +818,7 @@ pnfs_has_layout(struct pnfs_layout_hdr *lo,
}
dprintk("%s:Return lseg %p ref %d valid %d\n",
- __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0,
+ __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0,
ret ? ret->valid : 0);
return ret;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 7e06437..05dd5e0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -35,7 +35,7 @@
struct pnfs_layout_segment {
struct list_head fi_list;
struct pnfs_layout_range range;
- struct kref kref;
+ atomic_t pls_refcount;
bool valid;
struct pnfs_layout_hdr *layout;
};
@@ -230,7 +230,8 @@ static inline void pnfs_invalidate_layout_stateid(struct pnfs_layout_hdr *lo)
static inline void get_lseg(struct pnfs_layout_segment *lseg)
{
- kref_get(&lseg->kref);
+ atomic_inc(&lseg->pls_refcount);
+ smp_mb__after_atomic_inc();
}
/* Return true if a layout driver is being used for this mountpoint */
--
1.7.2.1
Since the release function may be called without sending any RPC,
it must should not refer to any of the result fields. This is
better accomplished in the rpc_done function.
In the process, this basically reverts the commit
"pnfs: do not change layout stateid when dropping layouts."
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4proc.c | 17 ++++++++++++++---
fs/nfs/nfs4xdr.c | 1 -
fs/nfs/pnfs.c | 23 ++---------------------
fs/nfs/pnfs.h | 3 ++-
include/linux/nfs_xdr.h | 1 -
5 files changed, 18 insertions(+), 27 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8d3965c..de3ed2f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5596,9 +5596,20 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
server = NFS_SERVER(lrp->args.inode);
else
server = NULL;
- if (nfs4_async_handle_error(task, server, NULL, lrp->clp) == -EAGAIN)
+ if (nfs4_async_handle_error(task, server, NULL, lrp->clp) == -EAGAIN) {
nfs_restart_rpc(task, lrp->clp);
+ return;
+ }
+ if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) {
+ struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+ spin_lock(&lo->inode->i_lock);
+ if (lrp->res.lrs_present)
+ pnfs_set_layout_stateid(lo, &lrp->res.stateid);
+ else
+ pnfs_invalidate_layout_stateid(lo);
+ spin_unlock(&lo->inode->i_lock);
+ }
dprintk("<-- %s\n", __func__);
}
@@ -5607,8 +5618,8 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
-
- pnfs_layoutreturn_release(lrp);
+ if (lrp->args.return_type == RETURN_FILE)
+ put_layout_hdr(lrp->args.inode);
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b71a482..10a6f4a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5286,7 +5286,6 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out_overflow;
- res->valid = true;
res->lrs_present = be32_to_cpup(p);
if (res->lrs_present)
status = decode_stateid(xdr, &res->stateid);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 34f6914..44f4f30 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -449,7 +449,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
*
* lo->stateid could be the open stateid, in which case we just use what given.
*/
-static void
+void
pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new)
{
@@ -587,25 +587,6 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
return ret;
}
-void
-pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
-{
- struct pnfs_layout_hdr *lo;
-
- if (lrp->args.return_type != RETURN_FILE)
- return;
- lo = NFS_I(lrp->args.inode)->layout;
- spin_lock(&lrp->args.inode->i_lock);
- if (!lrp->res.valid)
- ; /* forgetful model internal release */
- else if (!lrp->res.lrs_present)
- pnfs_invalidate_layout_stateid(lo);
- else
- pnfs_set_layout_stateid(lo, &lrp->res.stateid);
- put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
- spin_unlock(&lrp->args.inode->i_lock);
-}
-
static int
return_layout(struct inode *ino, struct pnfs_layout_range *range,
enum pnfs_layoutreturn_type type, struct pnfs_layout_hdr *lo,
@@ -675,7 +656,7 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
if (should_free_lseg(lseg, &arg))
mark_lseg_invalid(lseg, &tmp_list);
- /* Reference matched in pnfs_layoutreturn_release */
+ /* Reference matched in nfs4_layoutreturn_release */
get_layout_hdr_locked(lo);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index de4eaa8..f0232f5 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -201,10 +201,11 @@ void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
struct nfs_open_context *, struct list_head *);
void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
-void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
void put_layout_hdr(struct inode *inode);
+void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new);
void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
struct nfs4_state *open_state);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0ee7cce..ebe11d3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -270,7 +270,6 @@ struct nfs4_layoutreturn_args {
struct nfs4_layoutreturn_res {
struct nfs4_sequence_res seq_res;
- bool valid; /* internal, true if received reply */
u32 lrs_present;
nfs4_stateid stateid;
};
--
1.7.2.1
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/callback.h | 12 ++++++++----
fs/nfs/callback_proc.c | 16 ++++++++--------
fs/nfs/callback_xdr.c | 21 ++++++++++++---------
3 files changed, 28 insertions(+), 21 deletions(-)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 89fee05..817b0f4 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -145,13 +145,17 @@ extern unsigned nfs4_callback_recallslot(struct cb_recallslotargs *args,
struct cb_layoutrecallargs {
struct sockaddr *cbl_addr;
- struct nfs_fh cbl_fh;
- struct pnfs_layout_range cbl_seg;
- struct nfs_fsid cbl_fsid;
uint32_t cbl_recall_type;
uint32_t cbl_layout_type;
uint32_t cbl_layoutchanged;
- nfs4_stateid cbl_stateid;
+ union {
+ struct {
+ struct nfs_fh cbl_fh;
+ struct pnfs_layout_range cbl_range;
+ nfs4_stateid cbl_stateid;
+ };
+ struct nfs_fsid cbl_fsid;
+ };
};
extern unsigned nfs4_callback_layoutrecall(
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 53a85648..1509c34 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -241,13 +241,13 @@ static int pnfs_recall_layout(void *data)
rl = *args->rl;
/* support whole file layouts only */
- rl.cbl_seg.offset = 0;
- rl.cbl_seg.length = NFS4_MAX_UINT64;
+ rl.cbl_range.offset = 0;
+ rl.cbl_range.length = NFS4_MAX_UINT64;
if (rl.cbl_recall_type == RETURN_FILE) {
if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
rl.cbl_stateid))
- status = pnfs_return_layout(inode, &rl.cbl_seg,
+ status = pnfs_return_layout(inode, &rl.cbl_range,
&rl.cbl_stateid, RETURN_FILE,
false);
else
@@ -269,7 +269,7 @@ static int pnfs_recall_layout(void *data)
/* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
/* FIXME: need to check status on pnfs_return_layout */
- pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
+ pnfs_return_layout(ino, &rl.cbl_range, NULL, RETURN_FILE, false);
iput(ino);
}
@@ -285,7 +285,7 @@ static int pnfs_recall_layout(void *data)
lrp->args.layout_type = rl.cbl_layout_type;
lrp->args.return_type = rl.cbl_recall_type;
lrp->clp = clp;
- lrp->args.range = rl.cbl_seg;
+ lrp->args.range = rl.cbl_range;
lrp->args.inode = inode;
nfs4_proc_layoutreturn(lrp, true);
@@ -346,9 +346,9 @@ static int pnfs_recall_all_layouts(struct nfs_client *clp)
int status = 0;
rl.cbl_recall_type = RETURN_ALL;
- rl.cbl_seg.iomode = IOMODE_ANY;
- rl.cbl_seg.offset = 0;
- rl.cbl_seg.length = NFS4_MAX_UINT64;
+ rl.cbl_range.iomode = IOMODE_ANY;
+ rl.cbl_range.offset = 0;
+ rl.cbl_range.length = NFS4_MAX_UINT64;
/* we need the inode to get the nfs_server struct */
inode = nfs_layoutrecall_find_inode(clp, &rl);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 1650ab0..695d15a 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -229,6 +229,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
{
__be32 *p;
__be32 status = 0;
+ uint32_t iomode;
args->cbl_addr = svc_addr(rqstp);
p = read_buf(xdr, 4 * sizeof(uint32_t));
@@ -238,11 +239,15 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
}
args->cbl_layout_type = ntohl(*p++);
- args->cbl_seg.iomode = ntohl(*p++);
+ /* Depite the spec's xdr, iomode really belongs in the FILE switch,
+ * as it is unuseable and ignored with the other types.
+ */
+ iomode = ntohl(*p++);
args->cbl_layoutchanged = ntohl(*p++);
args->cbl_recall_type = ntohl(*p++);
if (likely(args->cbl_recall_type == RETURN_FILE)) {
+ args->cbl_range.iomode = iomode;
status = decode_fh(xdr, &args->cbl_fh);
if (unlikely(status != 0))
goto out;
@@ -252,8 +257,8 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
status = htonl(NFS4ERR_BADXDR);
goto out;
}
- p = xdr_decode_hyper(p, &args->cbl_seg.offset);
- p = xdr_decode_hyper(p, &args->cbl_seg.length);
+ p = xdr_decode_hyper(p, &args->cbl_range.offset);
+ p = xdr_decode_hyper(p, &args->cbl_range.length);
status = decode_stateid(xdr, &args->cbl_stateid);
if (unlikely(status != 0))
goto out;
@@ -266,12 +271,10 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
p = xdr_decode_hyper(p, &args->cbl_fsid.major);
p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
}
- dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d "
- "fsid %llx-%llx fhsize %d\n", __func__,
- args->cbl_layout_type, args->cbl_seg.iomode,
- args->cbl_layoutchanged, args->cbl_recall_type,
- args->cbl_fsid.major, args->cbl_fsid.minor,
- args->cbl_fh.size);
+ dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
+ __func__,
+ args->cbl_layout_type, iomode,
+ args->cbl_layoutchanged, args->cbl_recall_type);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
--
1.7.2.1
Looks good to me.
Fred
On Wed, Nov 10, 2010 at 8:41 AM, Benny Halevy <[email protected]> wrote:
> Signed-off-by: Benny Halevy <[email protected]>
> ---
> ?fs/nfs/callback_proc.c | ? 53 +++++++++++++++++++++--------------------------
> ?1 files changed, 24 insertions(+), 29 deletions(-)
>
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 6e21add..3a19cec 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -21,6 +21,12 @@
> ?#define NFSDBG_FACILITY NFSDBG_CALLBACK
> ?#endif
>
> +static struct nfs_client *
> +find_client_from_cps(struct cb_process_state *cps, struct sockaddr *addr)
> +{
> + ? ? ? return cps->session ? cps->session->clp : nfs_find_client(addr, 4);
> +}
> +
> ?__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct cb_getattrres *res,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? struct cb_process_state *cps)
> @@ -32,13 +38,9 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
>
> ? ? ? ?res->bitmap[0] = res->bitmap[1] = 0;
> ? ? ? ?res->status = htonl(NFS4ERR_BADHANDLE);
> - ? ? ? if (cps->session) { /* set in cb_sequence */
> - ? ? ? ? ? ? ? clp = cps->session->clp;
> - ? ? ? } else {
> - ? ? ? ? ? ? ? clp = nfs_find_client(args->addr, 4);
> - ? ? ? ? ? ? ? if (clp == NULL)
> - ? ? ? ? ? ? ? ? ? ? ? goto out;
> - ? ? ? }
> + ? ? ? clp = find_client_from_cps(cps, args->addr);
> + ? ? ? if (clp == NULL)
> + ? ? ? ? ? ? ? goto out;
>
> ? ? ? ?dprintk("NFS: GETATTR callback request from %s\n",
> ? ? ? ? ? ? ? ?rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
> @@ -81,13 +83,9 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
> ? ? ? ?__be32 res;
>
> ? ? ? ?res = htonl(NFS4ERR_BADHANDLE);
> - ? ? ? if (cps->session) { /* set in cb_sequence */
> - ? ? ? ? ? ? ? clp = cps->session->clp;
> - ? ? ? } else {
> - ? ? ? ? ? ? ? clp = nfs_find_client(args->addr, 4);
> - ? ? ? ? ? ? ? if (clp == NULL)
> - ? ? ? ? ? ? ? ? ? ? ? goto out;
> - ? ? ? }
> + ? ? ? clp = find_client_from_cps(cps, args->addr);
> + ? ? ? if (clp == NULL)
> + ? ? ? ? ? ? ? goto out;
>
> ? ? ? ?dprintk("NFS: RECALL callback request from %s\n",
> ? ? ? ? ? ? ? ?rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
> @@ -111,11 +109,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
> ? ? ? ? ? ? ? ? ? ? ? ?}
> ? ? ? ? ? ? ? ? ? ? ? ?iput(inode);
> ? ? ? ? ? ? ? ?}
> - ? ? ? ? ? ? ? if (!cps->session) {
> - ? ? ? ? ? ? ? ? ? ? ? clp = nfs_find_client_next(prev);
> - ? ? ? ? ? ? ? ? ? ? ? nfs_put_client(prev);
> - ? ? ? ? ? ? ? }
> - ? ? ? } while (!cps->session && clp != NULL);
> + ? ? ? ? ? ? ? if (cps->session)
> + ? ? ? ? ? ? ? ? ? ? ? break;
> + ? ? ? ? ? ? ? clp = nfs_find_client_next(prev);
> + ? ? ? ? ? ? ? nfs_put_client(prev);
> + ? ? ? } while (clp != NULL);
> ?out:
> ? ? ? ?dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
> ? ? ? ?return res;
> @@ -420,11 +418,7 @@ __be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
> ? ? ? ? ? ? ? ?res = NFS4ERR_OP_NOT_IN_SESSION;
> ? ? ? ? ? ? ? ?goto out;
> ? ? ? ?}
> - ? ? ? /* the callback must come from the MDS personality */
> - ? ? ? if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
> - ? ? ? ? ? ? ? res = NFS4ERR_INVAL;
> - ? ? ? ? ? ? ? goto out;
> - ? ? ? }
> +
> ? ? ? ?res = do_callback_layoutrecall(clp, args);
> ?out:
> ? ? ? ?dprintk("%s: exit with status = %d\n", __func__, res);
> @@ -623,6 +617,12 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
> ? ? ? ?if (status)
> ? ? ? ? ? ? ? ?goto out_putclient;
>
> + ? ? ? /* The callback must come from the MDS personality */
> + ? ? ? if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
> + ? ? ? ? ? ? ? status = htonl(NFS4ERR_INVAL);
> + ? ? ? ? ? ? ? goto out_putclient;
> + ? ? ? }
> +
> ? ? ? ?/*
> ? ? ? ? * Check for pending referring calls. ?If a match is found, a
> ? ? ? ? * related callback was received before the response to the original
> @@ -702,11 +702,6 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
> ? ? ? ?dprintk("NFS: RECALL_ANY callback request from %s\n",
> ? ? ? ? ? ? ? ?rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
>
> - ? ? ? /* the callback must come from the MDS personality */
> - ? ? ? status = cpu_to_be32(NFS4ERR_NOTSUPP);
> - ? ? ? if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
> - ? ? ? ? ? ? ? goto out;
> -
> ? ? ? ?status = cpu_to_be32(NFS4ERR_INVAL);
> ? ? ? ?if (!validate_bitmap_values((const unsigned long *)
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?&args->craa_type_mask))
> --
> 1.7.2.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>
On 2010-11-04 17:22, Fred Isaman wrote:
> Since the release function may be called without sending any RPC,
> it must should not refer to any of the result fields. This is
^^^^^^
must not
> better accomplished in the rpc_done function.
>
> In the process, this basically reverts the commit
> "pnfs: do not change layout stateid when dropping layouts."
Not exactly, as the !lrp->res.valid noop case is now handled with
the same outcome, just implemented differently.
Otherwise, this patch looks good.
Benny
>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/nfs4proc.c | 17 ++++++++++++++---
> fs/nfs/nfs4xdr.c | 1 -
> fs/nfs/pnfs.c | 23 ++---------------------
> fs/nfs/pnfs.h | 3 ++-
> include/linux/nfs_xdr.h | 1 -
> 5 files changed, 18 insertions(+), 27 deletions(-)
>
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 8d3965c..de3ed2f 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -5596,9 +5596,20 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
> server = NFS_SERVER(lrp->args.inode);
> else
> server = NULL;
> - if (nfs4_async_handle_error(task, server, NULL, lrp->clp) == -EAGAIN)
> + if (nfs4_async_handle_error(task, server, NULL, lrp->clp) == -EAGAIN) {
> nfs_restart_rpc(task, lrp->clp);
> + return;
> + }
> + if ((task->tk_status == 0) && (lrp->args.return_type == RETURN_FILE)) {
> + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
>
> + spin_lock(&lo->inode->i_lock);
> + if (lrp->res.lrs_present)
> + pnfs_set_layout_stateid(lo, &lrp->res.stateid);
> + else
> + pnfs_invalidate_layout_stateid(lo);
> + spin_unlock(&lo->inode->i_lock);
> + }
> dprintk("<-- %s\n", __func__);
> }
>
> @@ -5607,8 +5618,8 @@ static void nfs4_layoutreturn_release(void *calldata)
> struct nfs4_layoutreturn *lrp = calldata;
>
> dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
> -
> - pnfs_layoutreturn_release(lrp);
> + if (lrp->args.return_type == RETURN_FILE)
> + put_layout_hdr(lrp->args.inode);
> kfree(calldata);
> dprintk("<-- %s\n", __func__);
> }
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index b71a482..10a6f4a 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -5286,7 +5286,6 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
> p = xdr_inline_decode(xdr, 4);
> if (unlikely(!p))
> goto out_overflow;
> - res->valid = true;
> res->lrs_present = be32_to_cpup(p);
> if (res->lrs_present)
> status = decode_stateid(xdr, &res->stateid);
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 34f6914..44f4f30 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -449,7 +449,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
> *
> * lo->stateid could be the open stateid, in which case we just use what given.
> */
> -static void
> +void
> pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
> const nfs4_stateid *new)
> {
> @@ -587,25 +587,6 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
> return ret;
> }
>
> -void
> -pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
> -{
> - struct pnfs_layout_hdr *lo;
> -
> - if (lrp->args.return_type != RETURN_FILE)
> - return;
> - lo = NFS_I(lrp->args.inode)->layout;
> - spin_lock(&lrp->args.inode->i_lock);
> - if (!lrp->res.valid)
> - ; /* forgetful model internal release */
> - else if (!lrp->res.lrs_present)
> - pnfs_invalidate_layout_stateid(lo);
> - else
> - pnfs_set_layout_stateid(lo, &lrp->res.stateid);
> - put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
> - spin_unlock(&lrp->args.inode->i_lock);
> -}
> -
> static int
> return_layout(struct inode *ino, struct pnfs_layout_range *range,
> enum pnfs_layoutreturn_type type, struct pnfs_layout_hdr *lo,
> @@ -675,7 +656,7 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
> list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
> if (should_free_lseg(lseg, &arg))
> mark_lseg_invalid(lseg, &tmp_list);
> - /* Reference matched in pnfs_layoutreturn_release */
> + /* Reference matched in nfs4_layoutreturn_release */
> get_layout_hdr_locked(lo);
> spin_unlock(&ino->i_lock);
> pnfs_free_lseg_list(&tmp_list);
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index de4eaa8..f0232f5 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -201,10 +201,11 @@ void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
> struct nfs_open_context *, struct list_head *);
> void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
> int pnfs_layout_process(struct nfs4_layoutget *lgp);
> -void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
> void pnfs_destroy_layout(struct nfs_inode *);
> void pnfs_destroy_all_layouts(struct nfs_client *);
> void put_layout_hdr(struct inode *inode);
> +void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
> + const nfs4_stateid *new);
> void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
> void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
> struct nfs4_state *open_state);
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 0ee7cce..ebe11d3 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -270,7 +270,6 @@ struct nfs4_layoutreturn_args {
>
> struct nfs4_layoutreturn_res {
> struct nfs4_sequence_res seq_res;
> - bool valid; /* internal, true if received reply */
> u32 lrs_present;
> nfs4_stateid stateid;
> };
On Wed, Nov 10, 2010 at 9:35 AM, Benny Halevy <[email protected]> wrote:
> On 2010-11-04 17:22, Fred Isaman wrote:
>> Instead, have mark_invalid function that marks lseg invalid and
>> removes the reference that holds it in the list. ?Now when io is finished,
>> the lseg will automatically be removed from the list. ?This is
>> at the heart of many of the upcoming cb_layoutrecall changes.
>>
>> Signed-off-by: Fred Isaman <[email protected]>
>> ---
>> ?fs/nfs/nfs4xdr.c | ? ?3 +-
>> ?fs/nfs/pnfs.c ? ?| ?145 ++++++++++++++++++++++++++++++++++-------------------
>> ?fs/nfs/pnfs.h ? ?| ? ?1 +
>> ?3 files changed, 95 insertions(+), 54 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> index 238eeb2..6d9ef2b 100644
>> --- a/fs/nfs/nfs4xdr.c
>> +++ b/fs/nfs/nfs4xdr.c
>> @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
>> ? ? ? ? ? ? ? p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>> ? ? ? ? ? ? ? p = xdr_encode_hyper(p, args->range.offset);
>> ? ? ? ? ? ? ? p = xdr_encode_hyper(p, args->range.length);
>> - ? ? ? ? ? ? pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NULL);
>> + ? ? ? ? ? ? pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
>> ? ? ? ? ? ? ? p = xdr_encode_opaque_fixed(p, &stateid.data,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NFS4_STATEID_SIZE);
>> ? ? ? ? ? ? ? p = reserve_space(xdr, 4);
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 3bbe3be..4e5c68b 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
>> ? ? ? lseg->layout = lo;
>> ?}
>>
>> +static void
>> +_put_lseg_common(struct pnfs_layout_segment *lseg)
>> +{
>> + ? ? BUG_ON(lseg->valid == true);
>> + ? ? list_del(&lseg->fi_list);
>> + ? ? if (list_empty(&lseg->layout->segs)) {
>> + ? ? ? ? ? ? struct nfs_client *clp;
>> +
>> + ? ? ? ? ? ? clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
>> + ? ? ? ? ? ? spin_lock(&clp->cl_lock);
>> + ? ? ? ? ? ? /* List does not take a reference, so no need for put here */
>> + ? ? ? ? ? ? list_del_init(&lseg->layout->layouts);
>> + ? ? ? ? ? ? spin_unlock(&clp->cl_lock);
>> + ? ? ? ? ? ? pnfs_invalidate_layout_stateid(lseg->layout);
>> + ? ? }
>> + ? ? rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
>> +}
>> +
>> +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
>> + * could sleep, so must be called outside of the lock.
>> + */
>> +static void
>> +put_lseg_locked(struct pnfs_layout_segment *lseg,
>> + ? ? ? ? ? ? struct list_head *tmp_list)
>> +{
>> + ? ? dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>> + ? ? ? ? ? ? atomic_read(&lseg->pls_refcount), lseg->valid);
>> + ? ? if (atomic_dec_and_test(&lseg->pls_refcount)) {
>> + ? ? ? ? ? ? _put_lseg_common(lseg);
>> + ? ? ? ? ? ? list_add(&lseg->fi_list, tmp_list);
>> + ? ? }
>> +}
>> +
>> ?void
>> ?put_lseg(struct pnfs_layout_segment *lseg)
>> ?{
>> - ? ? bool do_wake_up;
>> ? ? ? struct inode *ino;
>>
>> ? ? ? if (!lseg)
>> @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
>>
>> ? ? ? dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>> ? ? ? ? ? ? ? atomic_read(&lseg->pls_refcount), lseg->valid);
>> - ? ? do_wake_up = !lseg->valid;
>> ? ? ? ino = lseg->layout->inode;
>> - ? ? if (atomic_dec_and_test(&lseg->pls_refcount)) {
>> + ? ? if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
>> + ? ? ? ? ? ? _put_lseg_common(lseg);
>> + ? ? ? ? ? ? spin_unlock(&ino->i_lock);
>> ? ? ? ? ? ? ? NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>> ? ? ? ? ? ? ? /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>> ? ? ? ? ? ? ? put_layout_hdr(ino);
>> ? ? ? }
>> - ? ? if (do_wake_up)
>> - ? ? ? ? ? ? rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
>> ?}
>> ?EXPORT_SYMBOL_GPL(put_lseg);
>>
>> @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
>> ? ? ? ? ? ? ? lseg->range.iomode == range->iomode);
>> ?}
>>
>> -static bool
>> -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
>> +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? struct list_head *tmp_list)
>> ?{
>> - ? ? return atomic_read(&lseg->pls_refcount) == 1;
>> + ? ? assert_spin_locked(&lseg->layout->inode->i_lock);
>> + ? ? if (lseg->valid) {
>> + ? ? ? ? ? ? lseg->valid = false;
>> + ? ? ? ? ? ? /* Remove the reference keeping the lseg in the
>> + ? ? ? ? ? ? ?* list. ?It will now be removed when all
>> + ? ? ? ? ? ? ?* outstanding io is finished.
>> + ? ? ? ? ? ? ?*/
>> + ? ? ? ? ? ? put_lseg_locked(lseg, tmp_list);
>> + ? ? }
>> ?}
>>
>> ?static void
>> @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
>> ? ? ? ? ? ? ? __func__, lo, range->offset, range->length, range->iomode);
>>
>> ? ? ? assert_spin_locked(&lo->inode->i_lock);
>> - ? ? list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
>> - ? ? ? ? ? ? if (!should_free_lseg(lseg, range) ||
>> - ? ? ? ? ? ? ? ? !_pnfs_can_return_lseg(lseg))
>> - ? ? ? ? ? ? ? ? ? ? continue;
>> - ? ? ? ? ? ? dprintk("%s: freeing lseg %p iomode %d "
>> - ? ? ? ? ? ? ? ? ? ? "offset %llu length %llu\n", __func__,
>> - ? ? ? ? ? ? ? ? ? ? lseg, lseg->range.iomode, lseg->range.offset,
>> - ? ? ? ? ? ? ? ? ? ? lseg->range.length);
>> - ? ? ? ? ? ? list_move(&lseg->fi_list, tmp_list);
>> - ? ? }
>> - ? ? if (list_empty(&lo->segs)) {
>> - ? ? ? ? ? ? struct nfs_client *clp;
>> -
>> - ? ? ? ? ? ? clp = NFS_SERVER(lo->inode)->nfs_client;
>> - ? ? ? ? ? ? spin_lock(&clp->cl_lock);
>> - ? ? ? ? ? ? /* List does not take a reference, so no need for put here */
>> - ? ? ? ? ? ? list_del_init(&lo->layouts);
>> - ? ? ? ? ? ? spin_unlock(&clp->cl_lock);
>> - ? ? ? ? ? ? pnfs_invalidate_layout_stateid(lo);
>> - ? ? }
>> -
>> + ? ? list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
>> + ? ? ? ? ? ? if (should_free_lseg(lseg, range)) {
>> + ? ? ? ? ? ? ? ? ? ? dprintk("%s: freeing lseg %p iomode %d "
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? "offset %llu length %llu\n", __func__,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? lseg, lseg->range.iomode, lseg->range.offset,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? lseg->range.length);
>> + ? ? ? ? ? ? ? ? ? ? mark_lseg_invalid(lseg, tmp_list);
>> + ? ? ? ? ? ? }
>> ? ? ? dprintk("%s:Return\n", __func__);
>> ?}
>>
>> ?static void
>> -pnfs_free_lseg_list(struct list_head *tmp_list)
>> +pnfs_free_lseg_list(struct list_head *free_me)
>> ?{
>> - ? ? struct pnfs_layout_segment *lseg;
>> + ? ? struct pnfs_layout_segment *lseg, *tmp;
>> + ? ? struct inode *ino;
>>
>> - ? ? while (!list_empty(tmp_list)) {
>> - ? ? ? ? ? ? lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? fi_list);
>> - ? ? ? ? ? ? dprintk("%s calling put_lseg on %p\n", __func__, lseg);
>> - ? ? ? ? ? ? list_del(&lseg->fi_list);
>> - ? ? ? ? ? ? put_lseg(lseg);
>> + ? ? list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
>> + ? ? ? ? ? ? BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
>> + ? ? ? ? ? ? ino = lseg->layout->inode;
>> + ? ? ? ? ? ? NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>> + ? ? ? ? ? ? /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>> + ? ? ? ? ? ? put_layout_hdr(ino);
>> ? ? ? }
>> + ? ? INIT_LIST_HEAD(free_me);
>> ?}
>>
>> ?void
>> @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
>> ? ? ? dprintk("<-- %s\n", __func__);
>> ?}
>>
>> +/* Layoutreturn may use an invalid stateid, just copy what is there */
>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
>> +{
>> + ? ? int seq;
>> +
>> + ? ? do {
>> + ? ? ? ? ? ? seq = read_seqbegin(&lo->seqlock);
>> + ? ? ? ? ? ? memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>> + ? ? } while (read_seqretry(&lo->seqlock, seq));
>> +}
>> +
>> ?void
>> ?pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>> ? ? ? ? ? ? ? ? ? ? ? struct nfs4_state *open_state)
>> @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
>> ? ? ? return out;
>> ?}
>>
>> +/* Return true if there is layout based io in progress in the given range.
>> + * Assumes range has already been marked invalid, and layout marked to
>> + * prevent any new lseg from being inserted.
>> + */
>> ?bool
>> ?pnfs_return_layout_barrier(struct nfs_inode *nfsi,
>> ? ? ? ? ? ? ? ? ? ? ? ? ?struct pnfs_layout_range *range)
>> ?{
>> - ? ? struct pnfs_layout_segment *lseg;
>> + ? ? struct pnfs_layout_segment *lseg, *tmp;
>> ? ? ? bool ret = false;
>>
>> ? ? ? spin_lock(&nfsi->vfs_inode.i_lock);
>> - ? ? list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
>> - ? ? ? ? ? ? if (!should_free_lseg(lseg, range))
>> - ? ? ? ? ? ? ? ? ? ? continue;
>> - ? ? ? ? ? ? lseg->valid = false;
>> - ? ? ? ? ? ? if (!_pnfs_can_return_lseg(lseg)) {
>> - ? ? ? ? ? ? ? ? ? ? dprintk("%s: wait on lseg %p refcount %d\n",
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? __func__, lseg,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? atomic_read(&lseg->pls_refcount));
>> + ? ? list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
>
> Why do you need the safe version here while the inode is locked?
>
We don't.
>> + ? ? ? ? ? ? if (should_free_lseg(lseg, range)) {
>> ? ? ? ? ? ? ? ? ? ? ? ret = true;
>
> But this will always return "true" if there's any lseg to return,
> not only if (!_pnfs_can_return_lseg(lseg)).
>
> What am I missing? :)
>
A return of "true" means the caller should wait. So if there is any
lseg still left to return, we should return true. The refcounting has
changed so that once the pending IO is finished, the lseg will
automatically be removed from the list. I suspect that what you are
missing is that...the refcount in the invalid case is one less than
what it used to be.
Fred
> Benny
>
>> + ? ? ? ? ? ? ? ? ? ? break;
>> ? ? ? ? ? ? ? }
>> - ? ? }
>> ? ? ? spin_unlock(&nfsi->vfs_inode.i_lock);
>> ? ? ? dprintk("%s:Return %d\n", __func__, ret);
>> ? ? ? return ret;
>> @@ -574,12 +611,10 @@ void
>> ?pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>> ?{
>> ? ? ? struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
>> - ? ? LIST_HEAD(tmp_list);
>>
>> ? ? ? if (lrp->args.return_type != RETURN_FILE)
>> ? ? ? ? ? ? ? return;
>> ? ? ? spin_lock(&lrp->args.inode->i_lock);
>> - ? ? pnfs_clear_lseg_list(lo, &tmp_list, &lrp->args.range);
>> ? ? ? if (!lrp->res.valid)
>> ? ? ? ? ? ? ? ; ? ? ? /* forgetful model internal release */
>> ? ? ? else if (!lrp->res.lrs_present)
>> @@ -588,7 +623,6 @@ pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>> ? ? ? ? ? ? ? pnfs_set_layout_stateid(lo, &lrp->res.stateid);
>> ? ? ? put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
>> ? ? ? spin_unlock(&lrp->args.inode->i_lock);
>> - ? ? pnfs_free_lseg_list(&tmp_list);
>> ?}
>>
>> ?static int
>> @@ -641,7 +675,11 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>> ? ? ? arg.offset = 0;
>> ? ? ? arg.length = NFS4_MAX_UINT64;
>>
>> + ? ? /* probably should BUGON if type != RETURN_FILE */
>> ? ? ? if (type == RETURN_FILE) {
>> + ? ? ? ? ? ? LIST_HEAD(tmp_list);
>> + ? ? ? ? ? ? struct pnfs_layout_segment *lseg, *tmp;
>> +
>> ? ? ? ? ? ? ? spin_lock(&ino->i_lock);
>> ? ? ? ? ? ? ? lo = nfsi->layout;
>> ? ? ? ? ? ? ? if (lo && !has_layout_to_return(lo, &arg))
>> @@ -652,10 +690,13 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>> ? ? ? ? ? ? ? ? ? ? ? goto out;
>> ? ? ? ? ? ? ? }
>>
>> + ? ? ? ? ? ? list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
>> + ? ? ? ? ? ? ? ? ? ? if (should_free_lseg(lseg, &arg))
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? mark_lseg_invalid(lseg, &tmp_list);
>> ? ? ? ? ? ? ? /* Reference matched in pnfs_layoutreturn_release */
>> ? ? ? ? ? ? ? get_layout_hdr_locked(lo);
>> -
>> ? ? ? ? ? ? ? spin_unlock(&ino->i_lock);
>> + ? ? ? ? ? ? pnfs_free_lseg_list(&tmp_list);
>>
>> ? ? ? ? ? ? ? if (layoutcommit_needed(nfsi)) {
>> ? ? ? ? ? ? ? ? ? ? ? if (stateid && !wait) { /* callback */
>> @@ -1171,7 +1212,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
>> ? ? ? nfsi->layout->write_end_pos = 0;
>> ? ? ? nfsi->layout->cred = NULL;
>> ? ? ? __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
>> - ? ? pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout, NULL);
>> + ? ? pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
>>
>> ? ? ? /* Reference for layoutcommit matched in pnfs_layoutcommit_release */
>> ? ? ? get_layout_hdr_locked(NFS_I(inode)->layout);
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 05dd5e0..000acf0 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -206,6 +206,7 @@ void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
>> ?void pnfs_destroy_layout(struct nfs_inode *);
>> ?void pnfs_destroy_all_layouts(struct nfs_client *);
>> ?void put_layout_hdr(struct inode *inode);
>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
>> ?void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ?struct nfs4_state *open_state);
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>
On 2010-11-11 15:52, Fred Isaman wrote:
> On Thu, Nov 11, 2010 at 2:00 AM, Benny Halevy <[email protected]> wrote:
>> On 2010-11-10 16:46, Fred Isaman wrote:
>>> On Wed, Nov 10, 2010 at 9:35 AM, Benny Halevy <[email protected]> wrote:
>>>> On 2010-11-04 17:22, Fred Isaman wrote:
>>>>> Instead, have mark_invalid function that marks lseg invalid and
>>>>> removes the reference that holds it in the list. Now when io is finished,
>>>>> the lseg will automatically be removed from the list. This is
>>>>> at the heart of many of the upcoming cb_layoutrecall changes.
>>>>>
>>>>> Signed-off-by: Fred Isaman <[email protected]>
>>>>> ---
>>>>> fs/nfs/nfs4xdr.c | 3 +-
>>>>> fs/nfs/pnfs.c | 145 ++++++++++++++++++++++++++++++++++-------------------
>>>>> fs/nfs/pnfs.h | 1 +
>>>>> 3 files changed, 95 insertions(+), 54 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>>> index 238eeb2..6d9ef2b 100644
>>>>> --- a/fs/nfs/nfs4xdr.c
>>>>> +++ b/fs/nfs/nfs4xdr.c
>>>>> @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
>>>>> p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>>>>> p = xdr_encode_hyper(p, args->range.offset);
>>>>> p = xdr_encode_hyper(p, args->range.length);
>>>>> - pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
>>>>> - NULL);
>>>>> + pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
>>>>> p = xdr_encode_opaque_fixed(p, &stateid.data,
>>>>> NFS4_STATEID_SIZE);
>>>>> p = reserve_space(xdr, 4);
>>>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>>>> index 3bbe3be..4e5c68b 100644
>>>>> --- a/fs/nfs/pnfs.c
>>>>> +++ b/fs/nfs/pnfs.c
>>>>> @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
>>>>> lseg->layout = lo;
>>>>> }
>>>>>
>>>>> +static void
>>>>> +_put_lseg_common(struct pnfs_layout_segment *lseg)
>>>>> +{
>>>>> + BUG_ON(lseg->valid == true);
>>>>> + list_del(&lseg->fi_list);
>>>>> + if (list_empty(&lseg->layout->segs)) {
>>>>> + struct nfs_client *clp;
>>>>> +
>>>>> + clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
>>>>> + spin_lock(&clp->cl_lock);
>>>>> + /* List does not take a reference, so no need for put here */
>>>>> + list_del_init(&lseg->layout->layouts);
>>>>> + spin_unlock(&clp->cl_lock);
>>>>> + pnfs_invalidate_layout_stateid(lseg->layout);
>>>>> + }
>>>>> + rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
>>>>> +}
>>>>> +
>>>>> +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
>>>>> + * could sleep, so must be called outside of the lock.
>>>>> + */
>>>>> +static void
>>>>> +put_lseg_locked(struct pnfs_layout_segment *lseg,
>>>>> + struct list_head *tmp_list)
>>>>> +{
>>>>> + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>>> + atomic_read(&lseg->pls_refcount), lseg->valid);
>>>>> + if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>>> + _put_lseg_common(lseg);
>>>>> + list_add(&lseg->fi_list, tmp_list);
>>>>> + }
>>>>> +}
>>>>> +
>>>>> void
>>>>> put_lseg(struct pnfs_layout_segment *lseg)
>>>>> {
>>>>> - bool do_wake_up;
>>>>> struct inode *ino;
>>>>>
>>>>> if (!lseg)
>>>>> @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
>>>>>
>>>>> dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>>> atomic_read(&lseg->pls_refcount), lseg->valid);
>>>>> - do_wake_up = !lseg->valid;
>>>>> ino = lseg->layout->inode;
>>>>> - if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>>> + if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
>>>>> + _put_lseg_common(lseg);
>>>>> + spin_unlock(&ino->i_lock);
>>>>> NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>>> /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>>> put_layout_hdr(ino);
>>>>> }
>>>>> - if (do_wake_up)
>>>>> - rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
>>>>> }
>>>>> EXPORT_SYMBOL_GPL(put_lseg);
>>>>>
>>>>> @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
>>>>> lseg->range.iomode == range->iomode);
>>>>> }
>>>>>
>>>>> -static bool
>>>>> -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
>>>>> +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
>>>>> + struct list_head *tmp_list)
>>>>> {
>>>>> - return atomic_read(&lseg->pls_refcount) == 1;
>>>>> + assert_spin_locked(&lseg->layout->inode->i_lock);
>>>>> + if (lseg->valid) {
>>>>> + lseg->valid = false;
>>>>> + /* Remove the reference keeping the lseg in the
>>>>> + * list. It will now be removed when all
>>>>> + * outstanding io is finished.
>>>>> + */
>>>>> + put_lseg_locked(lseg, tmp_list);
>>>>> + }
>>>>> }
>>>>>
>>>>> static void
>>>>> @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
>>>>> __func__, lo, range->offset, range->length, range->iomode);
>>>>>
>>>>> assert_spin_locked(&lo->inode->i_lock);
>>>>> - list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
>>>>> - if (!should_free_lseg(lseg, range) ||
>>>>> - !_pnfs_can_return_lseg(lseg))
>>>>> - continue;
>>>>> - dprintk("%s: freeing lseg %p iomode %d "
>>>>> - "offset %llu length %llu\n", __func__,
>>>>> - lseg, lseg->range.iomode, lseg->range.offset,
>>>>> - lseg->range.length);
>>>>> - list_move(&lseg->fi_list, tmp_list);
>>>>> - }
>>>>> - if (list_empty(&lo->segs)) {
>>>>> - struct nfs_client *clp;
>>>>> -
>>>>> - clp = NFS_SERVER(lo->inode)->nfs_client;
>>>>> - spin_lock(&clp->cl_lock);
>>>>> - /* List does not take a reference, so no need for put here */
>>>>> - list_del_init(&lo->layouts);
>>>>> - spin_unlock(&clp->cl_lock);
>>>>> - pnfs_invalidate_layout_stateid(lo);
>>>>> - }
>>>>> -
>>>>> + list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
>>>>> + if (should_free_lseg(lseg, range)) {
>>>>> + dprintk("%s: freeing lseg %p iomode %d "
>>>>> + "offset %llu length %llu\n", __func__,
>>>>> + lseg, lseg->range.iomode, lseg->range.offset,
>>>>> + lseg->range.length);
>>>>> + mark_lseg_invalid(lseg, tmp_list);
>>>>> + }
>>>>> dprintk("%s:Return\n", __func__);
>>>>> }
>>>>>
>>>>> static void
>>>>> -pnfs_free_lseg_list(struct list_head *tmp_list)
>>>>> +pnfs_free_lseg_list(struct list_head *free_me)
>>>>> {
>>>>> - struct pnfs_layout_segment *lseg;
>>>>> + struct pnfs_layout_segment *lseg, *tmp;
>>>>> + struct inode *ino;
>>>>>
>>>>> - while (!list_empty(tmp_list)) {
>>>>> - lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
>>>>> - fi_list);
>>>>> - dprintk("%s calling put_lseg on %p\n", __func__, lseg);
>>>>> - list_del(&lseg->fi_list);
>>>>> - put_lseg(lseg);
>>>>> + list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
>>>>> + BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
>>>>> + ino = lseg->layout->inode;
>>>>> + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>>> + /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>>> + put_layout_hdr(ino);
>>>>> }
>>>>> + INIT_LIST_HEAD(free_me);
>>>>> }
>>>>>
>>>>> void
>>>>> @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
>>>>> dprintk("<-- %s\n", __func__);
>>>>> }
>>>>>
>>>>> +/* Layoutreturn may use an invalid stateid, just copy what is there */
>>>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
>>>>> +{
>>>>> + int seq;
>>>>> +
>>>>> + do {
>>>>> + seq = read_seqbegin(&lo->seqlock);
>>>>> + memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>>>>> + } while (read_seqretry(&lo->seqlock, seq));
>>>>> +}
>>>>> +
>>>>> void
>>>>> pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>>>> struct nfs4_state *open_state)
>>>>> @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
>>>>> return out;
>>>>> }
>>>>>
>>>>> +/* Return true if there is layout based io in progress in the given range.
>>>>> + * Assumes range has already been marked invalid, and layout marked to
>>>>> + * prevent any new lseg from being inserted.
>>>>> + */
>>>>> bool
>>>>> pnfs_return_layout_barrier(struct nfs_inode *nfsi,
>>>>> struct pnfs_layout_range *range)
>>>>> {
>>>>> - struct pnfs_layout_segment *lseg;
>>>>> + struct pnfs_layout_segment *lseg, *tmp;
>>>>> bool ret = false;
>>>>>
>>>>> spin_lock(&nfsi->vfs_inode.i_lock);
>>>>> - list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
>>>>> - if (!should_free_lseg(lseg, range))
>>>>> - continue;
>>>>> - lseg->valid = false;
>>>>> - if (!_pnfs_can_return_lseg(lseg)) {
>>>>> - dprintk("%s: wait on lseg %p refcount %d\n",
>>>>> - __func__, lseg,
>>>>> - atomic_read(&lseg->pls_refcount));
>>>>> + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
>>>>
>>>> Why do you need the safe version here while the inode is locked?
>>>>
>>>
>>> We don't.
>>
>> OK. I'll fix that then :)
>>
>>>
>>>
>>>>> + if (should_free_lseg(lseg, range)) {
>>>>> ret = true;
>>>>
>>>> But this will always return "true" if there's any lseg to return,
>>>> not only if (!_pnfs_can_return_lseg(lseg)).
>>>>
>>>> What am I missing? :)
>>>>
>>>
>>> A return of "true" means the caller should wait. So if there is any
>>> lseg still left to return, we should return true. The refcounting has
>>> changed so that once the pending IO is finished, the lseg will
>>> automatically be removed from the list. I suspect that what you are
>>> missing is that...the refcount in the invalid case is one less than
>>> what it used to be.
>>
>> Thanks. I see what you mean now.
>>
>> What's missing is plh_block_lgets which is introduced only
>> in [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall
>> Otherwise, new lsegs can be inserted into the list in between.
>>
>> Benny
>>
>
> Hmmm, you're right. Let me see if I can tease out enough of the
> blocking code from patch 13 to make it work.
Thanks!
> The other option is to
> just merge the two patches together.
This can work too if the former is too complicated. :)
benny
>
> Fred
>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/callback.h | 1 +
fs/nfs/callback_proc.c | 35 +++++++++++++++++++++++++++++------
fs/nfs/pnfs.c | 24 +++++++++---------------
fs/nfs/pnfs.h | 7 +++----
include/linux/nfs_fs_sb.h | 3 ++-
5 files changed, 44 insertions(+), 26 deletions(-)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index c1c7f3e..8d3184c 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -163,6 +163,7 @@ extern unsigned nfs4_callback_layoutrecall(
void *dummy, struct cb_process_state *cps);
extern bool matches_outstanding_recall(struct inode *ino,
struct pnfs_layout_range *range);
+extern void notify_drained(struct nfs_client *clp, u64 mask);
#endif /* CONFIG_NFS_V4_1 */
extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 583446b..6e21add 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -229,16 +229,28 @@ void nfs_client_return_layouts(struct nfs_client *clp)
/* Removing from the list unblocks LAYOUTGETs */
list_del(&cb_info->pcl_list);
clp->cl_cb_lrecall_count--;
+ clp->cl_drain_notification[1 << cb_info->pcl_notify_bit] = NULL;
rpc_wake_up(&clp->cl_rpcwaitq_recall);
kfree(cb_info);
}
}
-void notify_drained(struct pnfs_cb_lrecall_info *d)
+void notify_drained(struct nfs_client *clp, u64 mask)
{
- if (d && atomic_dec_and_test(&d->pcl_count)) {
- set_bit(NFS4CLNT_LAYOUT_RECALL, &d->pcl_clp->cl_state);
- nfs4_schedule_state_manager(d->pcl_clp);
+ atomic_t **ptr = clp->cl_drain_notification;
+ bool done = false;
+
+ /* clp lock not needed except to remove used up entries */
+ /* Should probably use functions defined in bitmap.h */
+ while (mask) {
+ if ((mask & 1) && (atomic_dec_and_test(*ptr)))
+ done = true;
+ mask >>= 1;
+ ptr++;
+ }
+ if (done) {
+ set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
+ nfs4_schedule_state_manager(clp);
}
}
@@ -279,7 +291,9 @@ static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
if (rv == NFS4_OK) {
lo->plh_block_lgets++;
nfs4_asynch_forget_layouts(lo, &args->cbl_range,
- cb_info, &free_me_list);
+ cb_info->pcl_notify_bit,
+ &cb_info->pcl_count,
+ &free_me_list);
}
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&lo->inode->i_lock);
@@ -315,7 +329,9 @@ static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
&recall_list, plh_bulk_recall) {
spin_lock(&lo->inode->i_lock);
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
- nfs4_asynch_forget_layouts(lo, &range, cb_info,
+ nfs4_asynch_forget_layouts(lo, &range,
+ cb_info->pcl_notify_bit,
+ &cb_info->pcl_count,
&free_me_list);
list_del_init(&lo->plh_bulk_recall);
spin_unlock(&lo->inode->i_lock);
@@ -331,6 +347,8 @@ static u32 do_callback_layoutrecall(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
struct pnfs_cb_lrecall_info *new;
+ atomic_t **ptr;
+ int bit_num;
u32 res;
dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
@@ -353,12 +371,17 @@ static u32 do_callback_layoutrecall(struct nfs_client *clp,
clp->cl_cb_lrecall_count++;
/* Adding to the list will block conflicting LGET activity */
list_add_tail(&new->pcl_list, &clp->cl_layoutrecalls);
+ for (bit_num = 0, ptr = clp->cl_drain_notification; *ptr; ptr++)
+ bit_num++;
+ *ptr = &new->pcl_count;
+ new->pcl_notify_bit = bit_num;
spin_unlock(&clp->cl_lock);
res = initiate_layout_draining(new);
if (res || atomic_dec_and_test(&new->pcl_count)) {
spin_lock(&clp->cl_lock);
list_del(&new->pcl_list);
clp->cl_cb_lrecall_count--;
+ clp->cl_drain_notification[1 << bit_num] = NULL;
rpc_wake_up(&clp->cl_rpcwaitq_recall);
spin_unlock(&clp->cl_lock);
if (res == NFS4_OK) {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8d04cf2..02f2eb8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -278,7 +278,7 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
smp_mb();
lseg->valid = true;
lseg->layout = lo;
- lseg->drain_notification = NULL;
+ lseg->pls_notify_mask = 0;
}
static void
@@ -330,12 +330,12 @@ put_lseg(struct pnfs_layout_segment *lseg)
atomic_read(&lseg->pls_refcount), lseg->valid);
ino = lseg->layout->inode;
if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
- struct pnfs_cb_lrecall_info *drain_info = lseg->drain_notification;
+ u64 mask = lseg->pls_notify_mask;
_put_lseg_common(lseg);
spin_unlock(&ino->i_lock);
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
- notify_drained(drain_info);
+ notify_drained(NFS_SERVER(ino)->nfs_client, mask);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
@@ -401,14 +401,14 @@ pnfs_free_lseg_list(struct list_head *free_me)
{
struct pnfs_layout_segment *lseg, *tmp;
struct inode *ino;
- struct pnfs_cb_lrecall_info *drain_info;
+ u64 mask;
list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
ino = lseg->layout->inode;
- drain_info = lseg->drain_notification;
+ mask = lseg->pls_notify_mask;
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
- notify_drained(drain_info);
+ notify_drained(NFS_SERVER(ino)->nfs_client, mask);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
@@ -594,7 +594,7 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *range,
- struct pnfs_cb_lrecall_info *drain_info,
+ int notify_bit, atomic_t *notify_count,
struct list_head *tmp_list)
{
struct pnfs_layout_segment *lseg, *tmp;
@@ -602,14 +602,8 @@ void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
assert_spin_locked(&lo->inode->i_lock);
list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
if (should_free_lseg(&lseg->range, range)) {
- /* FIXME - need to change to something like a
- * notification bitmap to remove the restriction
- * of only being able to process a single
- * CB_LAYOUTRECALL at a time.
- */
- BUG_ON(lseg->drain_notification);
- lseg->drain_notification = drain_info;
- atomic_inc(&drain_info->pcl_count);
+ lseg->pls_notify_mask |= (1 << notify_bit);
+ atomic_inc(notify_count);
mark_lseg_invalid(lseg, tmp_list);
}
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 810714a..b84e362 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -39,7 +39,7 @@ struct pnfs_layout_segment {
atomic_t pls_refcount;
bool valid;
struct pnfs_layout_hdr *layout;
- struct pnfs_cb_lrecall_info *drain_notification;
+ u64 pls_notify_mask;
};
enum pnfs_try_status {
@@ -126,6 +126,7 @@ struct pnfs_device {
struct pnfs_cb_lrecall_info {
struct list_head pcl_list; /* hook into cl_layoutrecalls list */
atomic_t pcl_count;
+ int pcl_notify_bit;
struct nfs_client *pcl_clp;
struct inode *pcl_ino;
struct cb_layoutrecallargs pcl_args;
@@ -232,10 +233,8 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct nfs4_state *open_state);
void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *range,
- struct pnfs_cb_lrecall_info *drain_info,
+ int notify_bit, atomic_t *notify_count,
struct list_head *tmp_list);
-/* FIXME - this should be in callback.h, but pnfs_cb_lrecall_info needs to be there too */
-extern void notify_drained(struct pnfs_cb_lrecall_info *d);
static inline bool
has_layout(struct nfs_inode *nfsi)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 80dcc00..295d449 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -85,7 +85,8 @@ struct nfs_client {
struct list_head cl_layouts;
struct list_head cl_layoutrecalls;
unsigned long cl_cb_lrecall_count;
-#define PNFS_MAX_CB_LRECALLS (1)
+#define PNFS_MAX_CB_LRECALLS (64)
+ atomic_t *cl_drain_notification[PNFS_MAX_CB_LRECALLS];
struct rpc_wait_queue cl_rpcwaitq_recall;
struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
#endif /* CONFIG_NFS_V4_1 */
--
1.7.2.1
This is needed because we need to increment the refcount outside
of the i_lock. In particular, we will need to scan cl_layouts
while holding cl_lock, and grab reference of each lo found.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 50 +++++++++++++++++++++++++++++---------------------
fs/nfs/pnfs.h | 2 +-
2 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a2d8d3f..d9a867f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -232,34 +232,42 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
* pNFS client layout cache
*/
+/* Need to hold i_lock if caller does not already hold reference */
static void
-get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
+get_layout_hdr(struct pnfs_layout_hdr *lo)
{
- assert_spin_locked(&lo->inode->i_lock);
- lo->refcount++;
+ atomic_inc(&lo->plh_refcount);
+ smp_mb__after_atomic_inc();
+}
+
+static void
+destroy_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ dprintk("%s: freeing layout cache %p\n", __func__, lo);
+ BUG_ON(!list_empty(&lo->layouts));
+ NFS_I(lo->inode)->layout = NULL;
+ kfree(lo);
}
static void
put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
{
assert_spin_locked(&lo->inode->i_lock);
- BUG_ON(lo->refcount == 0);
-
- lo->refcount--;
- if (!lo->refcount) {
- dprintk("%s: freeing layout cache %p\n", __func__, lo);
- BUG_ON(!list_empty(&lo->layouts));
- NFS_I(lo->inode)->layout = NULL;
- kfree(lo);
- }
+ BUG_ON(atomic_read(&lo->plh_refcount) == 0);
+ if (atomic_dec_and_test(&lo->plh_refcount))
+ destroy_layout_hdr(lo);
}
void
put_layout_hdr(struct inode *inode)
{
- spin_lock(&inode->i_lock);
- put_layout_hdr_locked(NFS_I(inode)->layout);
- spin_unlock(&inode->i_lock);
+ struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
+
+ BUG_ON(atomic_read(&lo->plh_refcount) == 0);
+ if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
+ destroy_layout_hdr(lo);
+ spin_unlock(&inode->i_lock);
+ }
}
static void
@@ -413,7 +421,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
pnfs_clear_lseg_list(lo, &tmp_list, &range);
WARN_ON(!list_empty(&nfsi->layout->segs));
WARN_ON(!list_empty(&nfsi->layout->layouts));
- WARN_ON(nfsi->layout->refcount != 1);
+ WARN_ON(atomic_read(&nfsi->layout->plh_refcount) != 1);
/* Matched by refcount set to 1 in alloc_init_layout_hdr */
put_layout_hdr_locked(lo);
@@ -657,7 +665,7 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
if (should_free_lseg(lseg, &arg))
mark_lseg_invalid(lseg, &tmp_list);
/* Reference matched in nfs4_layoutreturn_release */
- get_layout_hdr_locked(lo);
+ get_layout_hdr(lo);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
@@ -738,7 +746,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
__func__, lseg, lseg->range.iomode,
lseg->range.offset, lseg->range.length);
}
- get_layout_hdr_locked(lo);
+ get_layout_hdr(lo);
dprintk("%s:Return\n", __func__);
}
@@ -751,7 +759,7 @@ alloc_init_layout_hdr(struct inode *ino)
lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
if (!lo)
return NULL;
- lo->refcount = 1;
+ atomic_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->layouts);
INIT_LIST_HEAD(&lo->segs);
lo->inode = ino;
@@ -875,7 +883,7 @@ pnfs_update_layout(struct inode *ino,
if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
goto out_unlock;
- get_layout_hdr_locked(lo); /* Matched in pnfs_layoutget_release */
+ get_layout_hdr(lo); /* Matched in pnfs_layoutget_release */
spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, &arg);
@@ -1177,7 +1185,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
- get_layout_hdr_locked(NFS_I(inode)->layout);
+ get_layout_hdr(NFS_I(inode)->layout);
spin_unlock(&inode->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 96af66f..e631487 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -92,7 +92,7 @@ struct pnfs_layoutdriver_type {
};
struct pnfs_layout_hdr {
- unsigned long refcount;
+ atomic_t plh_refcount;
struct list_head layouts; /* other client layouts */
struct list_head segs; /* layout segments list */
int roc_iomode;/* return on close iomode, 0=none */
--
1.7.2.1
Preparing for change in stateid code, and want to avoid name confusion.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 18 +++++++++---------
fs/nfs/pnfs.h | 6 +++---
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 44f4f30..a2d8d3f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -81,10 +81,10 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
spin_lock(&nfsi->vfs_inode.i_lock);
if (has_layout(nfsi) &&
- !test_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state)) {
+ !test_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->plh_flags)) {
nfsi->layout->cred = get_rpccred(ctx->state->owner->so_cred);
__set_bit(NFS_LAYOUT_NEED_LCOMMIT,
- &nfsi->layout->state);
+ &nfsi->layout->plh_flags);
nfsi->change_attr++;
spin_unlock(&nfsi->vfs_inode.i_lock);
dprintk("%s: Set layoutcommit\n", __func__);
@@ -457,7 +457,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
bool overwrite = false;
assert_spin_locked(&lo->inode->i_lock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
+ if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags) ||
memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
overwrite = true;
else {
@@ -485,7 +485,7 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
{
dprintk("--> %s\n", __func__);
spin_lock(&lo->inode->i_lock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
+ if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags)) {
int seq;
do {
@@ -493,7 +493,7 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
memcpy(dst->data, open_state->stateid.data,
sizeof(open_state->stateid.data));
} while (read_seqretry(&open_state->seqlock, seq));
- set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+ set_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags);
} else
memcpy(dst->data, lo->stateid.data,
sizeof(lo->stateid.data));
@@ -541,7 +541,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
nfs4_proc_layoutget(lgp);
if (!lseg) {
/* remember that LAYOUTGET failed and suspend trying */
- set_bit(lo_fail_bit(range->iomode), &lo->state);
+ set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
}
return lseg;
}
@@ -872,7 +872,7 @@ pnfs_update_layout(struct inode *ino,
}
/* if LAYOUTGET already failed once we don't try again */
- if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
+ if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
goto out_unlock;
get_layout_hdr_locked(lo); /* Matched in pnfs_layoutget_release */
@@ -881,7 +881,7 @@ pnfs_update_layout(struct inode *ino,
lseg = send_layoutget(lo, ctx, &arg);
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
- nfsi->layout->state, lseg);
+ nfsi->layout->plh_flags, lseg);
return lseg;
out_unlock:
spin_unlock(&ino->i_lock);
@@ -1173,7 +1173,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
nfsi->layout->write_begin_pos = 0;
nfsi->layout->write_end_pos = 0;
nfsi->layout->cred = NULL;
- __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
+ __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->plh_flags);
pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
/* Reference for layoutcommit matched in pnfs_layoutcommit_release */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f0232f5..96af66f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -97,7 +97,7 @@ struct pnfs_layout_hdr {
struct list_head segs; /* layout segments list */
int roc_iomode;/* return on close iomode, 0=none */
nfs4_stateid stateid;
- unsigned long state;
+ unsigned long plh_flags;
struct rpc_cred *cred; /* layoutcommit credential */
/* DH: These vars keep track of the maximum write range
* so the values can be used for layoutcommit.
@@ -225,7 +225,7 @@ static inline int lo_fail_bit(u32 iomode)
static inline void pnfs_invalidate_layout_stateid(struct pnfs_layout_hdr *lo)
{
assert_spin_locked(&lo->inode->i_lock);
- clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+ clear_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags);
}
static inline void get_lseg(struct pnfs_layout_segment *lseg)
@@ -268,7 +268,7 @@ static inline bool
layoutcommit_needed(struct nfs_inode *nfsi)
{
return has_layout(nfsi) &&
- test_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
+ test_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->plh_flags);
}
#else /* CONFIG_NFS_V4_1 */
--
1.7.2.1
This prepares for future changes, where the layout state needs
to change atomically with several other variables. In particular,
it will need to know if lo->segs is empty. Moreover, the
layoutstateid is not really a read-mostly structure, as it is
written on each LAYOUTGET.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/callback_proc.c | 8 +++---
fs/nfs/nfs4xdr.c | 2 +
fs/nfs/pnfs.c | 55 ++++++++++++++---------------------------------
fs/nfs/pnfs.h | 4 +--
4 files changed, 24 insertions(+), 45 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 84c5a1b..3e022a8 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -135,12 +135,11 @@ static bool
pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
const nfs4_stateid stateid)
{
- int seqlock;
bool res;
u32 oldseqid, newseqid;
- do {
- seqlock = read_seqbegin(&lo->seqlock);
+ spin_lock(&lo->inode->i_lock);
+ {
oldseqid = be32_to_cpu(lo->stateid.stateid.seqid);
newseqid = be32_to_cpu(stateid.stateid.seqid);
res = !memcmp(lo->stateid.stateid.other,
@@ -158,7 +157,8 @@ pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
if (res)
res = (newseqid == 1);
}
- } while (read_seqretry(&lo->seqlock, seqlock));
+ }
+ spin_unlock(&lo->inode->i_lock);
return res;
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6d9ef2b..b71a482 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1915,7 +1915,9 @@ encode_layoutreturn(struct xdr_stream *xdr,
p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
p = xdr_encode_hyper(p, args->range.offset);
p = xdr_encode_hyper(p, args->range.length);
+ spin_lock(&args->inode->i_lock);
pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
+ spin_unlock(&args->inode->i_lock);
p = xdr_encode_opaque_fixed(p, &stateid.data,
NFS4_STATEID_SIZE);
p = reserve_space(xdr, 4);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 4e5c68b..01ecb95 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -456,7 +456,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
nfs4_stateid *old = &lo->stateid;
bool overwrite = false;
- write_seqlock(&lo->seqlock);
+ assert_spin_locked(&lo->inode->i_lock);
if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
overwrite = true;
@@ -470,54 +470,34 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
}
if (overwrite)
memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
- write_sequnlock(&lo->seqlock);
-}
-
-static void
-pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
- struct nfs4_state *state)
-{
- int seq;
-
- dprintk("--> %s\n", __func__);
- write_seqlock(&lo->seqlock);
- do {
- seq = read_seqbegin(&state->seqlock);
- memcpy(lo->stateid.data, state->stateid.data,
- sizeof(state->stateid.data));
- } while (read_seqretry(&state->seqlock, seq));
- set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
- write_sequnlock(&lo->seqlock);
- dprintk("<-- %s\n", __func__);
}
/* Layoutreturn may use an invalid stateid, just copy what is there */
void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
{
- int seq;
-
- do {
- seq = read_seqbegin(&lo->seqlock);
- memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
- } while (read_seqretry(&lo->seqlock, seq));
+ assert_spin_locked(&lo->inode->i_lock);
+ memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
}
void
pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
struct nfs4_state *open_state)
{
- int seq;
-
dprintk("--> %s\n", __func__);
- do {
- seq = read_seqbegin(&lo->seqlock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
- /* This will trigger retry of the read */
- pnfs_layout_from_open_stateid(lo, open_state);
- } else
- memcpy(dst->data, lo->stateid.data,
- sizeof(lo->stateid.data));
- } while (read_seqretry(&lo->seqlock, seq));
+ spin_lock(&lo->inode->i_lock);
+ if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
+ int seq;
+
+ do {
+ seq = read_seqbegin(&open_state->seqlock);
+ memcpy(dst->data, open_state->stateid.data,
+ sizeof(open_state->stateid.data));
+ } while (read_seqretry(&open_state->seqlock, seq));
+ set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
+ } else
+ memcpy(dst->data, lo->stateid.data,
+ sizeof(lo->stateid.data));
+ spin_unlock(&lo->inode->i_lock);
dprintk("<-- %s\n", __func__);
}
@@ -791,7 +771,6 @@ alloc_init_layout_hdr(struct inode *ino)
lo->refcount = 1;
INIT_LIST_HEAD(&lo->layouts);
INIT_LIST_HEAD(&lo->segs);
- seqlock_init(&lo->seqlock);
lo->inode = ino;
return lo;
}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 000acf0..de4eaa8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -96,7 +96,6 @@ struct pnfs_layout_hdr {
struct list_head layouts; /* other client layouts */
struct list_head segs; /* layout segments list */
int roc_iomode;/* return on close iomode, 0=none */
- seqlock_t seqlock; /* Protects the stateid */
nfs4_stateid stateid;
unsigned long state;
struct rpc_cred *cred; /* layoutcommit credential */
@@ -224,9 +223,8 @@ static inline int lo_fail_bit(u32 iomode)
static inline void pnfs_invalidate_layout_stateid(struct pnfs_layout_hdr *lo)
{
- write_seqlock(&lo->seqlock);
+ assert_spin_locked(&lo->inode->i_lock);
clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
- write_sequnlock(&lo->seqlock);
}
static inline void get_lseg(struct pnfs_layout_segment *lseg)
--
1.7.2.1
From: Andy Adamson <[email protected]>
Place the layoutcommi operation prior to the close operation in the close
compound so that the filehandle is still valid.
If the layoutcommit fails, a retry of the close compound, which retries with
rpc_restart_call_prepare and so calls pnfs_roc again, will not include the
layoutcommit operation, as the layoutcommit_needed test will be false having
been satisfied by the failed compound.
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4xdr.c | 5 +++++
fs/nfs/pnfs.c | 9 ++++++++-
include/linux/nfs_xdr.h | 2 ++
3 files changed, 15 insertions(+), 1 deletions(-)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1c8a3c4..bc97b33 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2135,6 +2135,8 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea
encode_compound_hdr(&xdr, req, &hdr);
encode_sequence(&xdr, &args->seq_args, &hdr);
encode_putfh(&xdr, args->fh, &hdr);
+ if (args->op_bitmask & NFS4_HAS_LAYOUTCOMMIT) /* layoutcommit set */
+ encode_layoutcommit(&xdr, &args->lc_args, &hdr);
encode_close(&xdr, args, &hdr);
encode_getfattr(&xdr, args->bitmask, &hdr);
if (args->op_bitmask & NFS4_HAS_LAYOUTRETURN) /* layoutreturn set */
@@ -5704,6 +5706,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
status = decode_putfh(&xdr);
if (status)
goto out;
+ /* We pay no attention to the layoutcommit return */
+ if (res->op_bitmask & NFS4_HAS_LAYOUTCOMMIT)
+ decode_layoutcommit(&xdr);
status = decode_close(&xdr, res);
if (status != 0)
goto out;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 521ee9d..6724dc4 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -634,7 +634,6 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
* Return on close
*
* No LAYOUTRETURNS can be sent when BULK RECALL flag is set.
- * FIXME: add layoutcommit operation if layoutcommit_needed is true.
*/
bool
pnfs_roc(struct nfs4_closedata *data)
@@ -662,6 +661,14 @@ pnfs_roc(struct nfs4_closedata *data)
}
if (found == false)
goto out_nolayout;
+
+ /* Add layoutcommit operation if needed */
+ if (layoutcommit_needed(NFS_I(data->inode))) {
+ pnfs_layoutcommit_setup(data->inode, &data->arg.lc_args, false);
+ data->res.op_bitmask |= NFS4_HAS_LAYOUTCOMMIT;
+ data->arg.op_bitmask |= NFS4_HAS_LAYOUTCOMMIT;
+ }
+
/* Stop new and drop response to outstanding LAYOUTGETS */
lo->plh_block_lgets++;
lo->plh_outstanding++;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 851b09f..d4c4804 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -353,6 +353,7 @@ struct nfs_open_confirmres {
/* op_bitmask bits */
#define NFS4_HAS_LAYOUTRETURN 0x01
+#define NFS4_HAS_LAYOUTCOMMIT 0x02
struct nfs_closeargs {
struct nfs_fh * fh;
@@ -361,6 +362,7 @@ struct nfs_closeargs {
fmode_t fmode;
const u32 * bitmask;
u32 op_bitmask; /* which optional ops to encode */
+ struct nfs4_layoutcommit_op_args lc_args; /* optional */
struct nfs4_layoutreturn_args lr_args; /* optional */
struct nfs4_sequence_args seq_args;
};
--
1.7.2.1
Remove NFS_LAYOUT_STATEID_SET in favor of just checking list_empty(lo->segs).
LAYOUTGETs with openstateid are serialized. Waiting on the condition
(list_empty(lo->segs) && plh_outstanding>0) both drains outstanding RPCs once
the stateid is invalidated and allows only a single LAYOUTGET(openstateid)
through at a time.
Before sending a LAYOUTRETURN, plh_block_lgets is incremented. It is
decremented in the rpc_release function. While set, LAYOUTGETs are
paused in their rpc_prepare function, and any responses are
forgotten.
Callbacks are handled by blocking any matching LAYOUTGETS while processing and
initiating drain of IO. A notification system is set up so that when
all relevant IO is finished, the state manger thread is invoked, which
synchronously sends the final matching LAYOUTRETURN before unblocking
LAYOUTGETS.
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/callback.h | 4 +-
fs/nfs/callback_proc.c | 471 +++++++++++++++++++++++----------------------
fs/nfs/client.c | 3 +
fs/nfs/inode.c | 3 +-
fs/nfs/nfs4proc.c | 105 +++++++---
fs/nfs/nfs4state.c | 4 +
fs/nfs/nfs4xdr.c | 16 ++-
fs/nfs/pnfs.c | 181 +++++++++++++----
fs/nfs/pnfs.h | 41 +++-
include/linux/nfs_fs.h | 1 +
include/linux/nfs_fs_sb.h | 4 +
11 files changed, 518 insertions(+), 315 deletions(-)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 817b0f4..c1c7f3e 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -161,7 +161,8 @@ struct cb_layoutrecallargs {
extern unsigned nfs4_callback_layoutrecall(
struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps);
-
+extern bool matches_outstanding_recall(struct inode *ino,
+ struct pnfs_layout_range *range);
#endif /* CONFIG_NFS_V4_1 */
extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
@@ -171,6 +172,7 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
struct cb_process_state *cps);
#ifdef CONFIG_NFS_V4
+extern void nfs_client_return_layouts(struct nfs_client *clp);
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
extern void nfs_callback_down(int minorversion);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 1509c34..583446b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -132,270 +132,291 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
#if defined(CONFIG_NFS_V4_1)
static bool
-pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
- const nfs4_stateid stateid)
+_recall_matches_lget(struct pnfs_cb_lrecall_info *cb_info,
+ struct inode *ino, struct pnfs_layout_range *range)
{
- bool res;
- u32 oldseqid, newseqid;
-
- spin_lock(&lo->inode->i_lock);
- {
- oldseqid = be32_to_cpu(lo->stateid.stateid.seqid);
- newseqid = be32_to_cpu(stateid.stateid.seqid);
- res = !memcmp(lo->stateid.stateid.other,
- stateid.stateid.other,
- NFS4_STATEID_OTHER_SIZE);
- if (res) { /* comparing layout stateids */
- if (oldseqid == ~0)
- res = (newseqid == 1);
- else
- res = (newseqid == oldseqid + 1);
- } else { /* open stateid */
- res = !memcmp(lo->stateid.data,
- &zero_stateid,
- NFS4_STATEID_SIZE);
- if (res)
- res = (newseqid == 1);
- }
- }
- spin_unlock(&lo->inode->i_lock);
+ struct cb_layoutrecallargs *cb_args = &cb_info->pcl_args;
- return res;
+ switch (cb_args->cbl_recall_type) {
+ case RETURN_ALL:
+ return true;
+ case RETURN_FSID:
+ return !memcmp(&NFS_SERVER(ino)->fsid, &cb_args->cbl_fsid,
+ sizeof(struct nfs_fsid));
+ case RETURN_FILE:
+ if (ino != cb_info->pcl_ino)
+ return false;
+ return should_free_lseg(range, &cb_args->cbl_range);
+ default:
+ BUG();
+ }
}
-/*
- * Retrieve an inode based on layout recall parameters
- *
- * Note: caller must iput(inode) to dereference the inode.
- */
-static struct inode *
-nfs_layoutrecall_find_inode(struct nfs_client *clp,
- const struct cb_layoutrecallargs *args)
+bool
+matches_outstanding_recall(struct inode *ino, struct pnfs_layout_range *range)
{
- struct nfs_inode *nfsi;
- struct pnfs_layout_hdr *lo;
- struct nfs_server *server;
- struct inode *ino = NULL;
-
- dprintk("%s: Begin recall_type=%d clp %p\n",
- __func__, args->cbl_recall_type, clp);
-
- spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, layouts) {
- nfsi = NFS_I(lo->inode);
- if (!nfsi)
- continue;
-
- dprintk("%s: Searching inode=%lu\n",
- __func__, nfsi->vfs_inode.i_ino);
-
- if (args->cbl_recall_type == RETURN_FILE) {
- if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
- continue;
- } else if (args->cbl_recall_type == RETURN_FSID) {
- server = NFS_SERVER(&nfsi->vfs_inode);
- if (server->fsid.major != args->cbl_fsid.major ||
- server->fsid.minor != args->cbl_fsid.minor)
- continue;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+ struct pnfs_cb_lrecall_info *cb_info;
+ bool rv = false;
+
+ assert_spin_locked(&clp->cl_lock);
+ list_for_each_entry(cb_info, &clp->cl_layoutrecalls, pcl_list) {
+ if (_recall_matches_lget(cb_info, ino, range)) {
+ rv = true;
+ break;
}
-
- /* Make sure client didn't clean up layout without
- * telling the server */
- if (!has_layout(nfsi))
- continue;
-
- ino = igrab(&nfsi->vfs_inode);
- dprintk("%s: Found inode=%p\n", __func__, ino);
- break;
}
- spin_unlock(&clp->cl_lock);
- return ino;
+ return rv;
}
-struct recall_layout_threadargs {
- struct inode *inode;
- struct nfs_client *clp;
- struct completion started;
- struct cb_layoutrecallargs *rl;
- int result;
-};
-
-static int pnfs_recall_layout(void *data)
+/* Send a synchronous LAYOUTRETURN. By the time this is called, we know
+ * all IO has been drained, any matching lsegs deleted, and that no
+ * overlapping LAYOUTGETs will be sent or processed for the duration
+ * of this call.
+ * Note that it is possible that when this is called, the stateid has
+ * been invalidated. But will not be cleared, so can still use.
+ */
+static int
+pnfs_send_layoutreturn(struct nfs_client *clp,
+ struct pnfs_cb_lrecall_info *cb_info)
{
- struct inode *inode, *ino;
- struct nfs_client *clp;
- struct cb_layoutrecallargs rl;
+ struct cb_layoutrecallargs *args = &cb_info->pcl_args;
struct nfs4_layoutreturn *lrp;
- struct recall_layout_threadargs *args =
- (struct recall_layout_threadargs *)data;
- int status = 0;
-
- daemonize("nfsv4-layoutreturn");
-
- dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
- __func__, args->rl->cbl_recall_type,
- args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
-
- clp = args->clp;
- inode = args->inode;
- rl = *args->rl;
-
- /* support whole file layouts only */
- rl.cbl_range.offset = 0;
- rl.cbl_range.length = NFS4_MAX_UINT64;
-
- if (rl.cbl_recall_type == RETURN_FILE) {
- if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
- rl.cbl_stateid))
- status = pnfs_return_layout(inode, &rl.cbl_range,
- &rl.cbl_stateid, RETURN_FILE,
- false);
- else
- status = cpu_to_be32(NFS4ERR_DELAY);
- if (status)
- dprintk("%s RETURN_FILE error: %d\n", __func__, status);
- else
- status = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
- args->result = status;
- complete(&args->started);
- goto out;
- }
-
- status = cpu_to_be32(NFS4_OK);
- args->result = status;
- complete(&args->started);
- args = NULL;
-
- /* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
- while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
- /* FIXME: need to check status on pnfs_return_layout */
- pnfs_return_layout(ino, &rl.cbl_range, NULL, RETURN_FILE, false);
- iput(ino);
- }
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
- if (!lrp) {
- dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
- __func__);
- goto out;
- }
-
- /* send final layoutreturn */
+ if (!lrp)
+ return -ENOMEM;
lrp->args.reclaim = 0;
- lrp->args.layout_type = rl.cbl_layout_type;
- lrp->args.return_type = rl.cbl_recall_type;
+ lrp->args.layout_type = args->cbl_layout_type;
+ lrp->args.return_type = args->cbl_recall_type;
lrp->clp = clp;
- lrp->args.range = rl.cbl_range;
- lrp->args.inode = inode;
- nfs4_proc_layoutreturn(lrp, true);
-
-out:
- clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
- nfs_put_client(clp);
- module_put_and_exit(0);
- dprintk("%s: exit status %d\n", __func__, 0);
- return 0;
+ if (args->cbl_recall_type == RETURN_FILE) {
+ lrp->args.range = args->cbl_range;
+ lrp->args.inode = cb_info->pcl_ino;
+ } else {
+ lrp->args.range.iomode = IOMODE_ANY;
+ lrp->args.inode = NULL;
+ }
+ return nfs4_proc_layoutreturn(lrp, true);
}
-/*
- * Asynchronous layout recall!
+/* Called by state manager to finish CB_LAYOUTRECALLS initiated by
+ * nfs4_callback_layoutrecall().
*/
-static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
- struct cb_layoutrecallargs *rl)
+void nfs_client_return_layouts(struct nfs_client *clp)
{
- struct recall_layout_threadargs data = {
- .clp = clp,
- .inode = inode,
- .rl = rl,
- };
- struct task_struct *t;
- int status = -EAGAIN;
+ struct pnfs_cb_lrecall_info *cb_info;
- dprintk("%s: -->\n", __func__);
+ spin_lock(&clp->cl_lock);
+ while (true) {
+ if (list_empty(&clp->cl_layoutrecalls)) {
+ spin_unlock(&clp->cl_lock);
+ break;
+ }
+ cb_info = list_first_entry(&clp->cl_layoutrecalls,
+ struct pnfs_cb_lrecall_info,
+ pcl_list);
+ spin_unlock(&clp->cl_lock);
+ if (atomic_read(&cb_info->pcl_count) != 0)
+ break;
+ /* What do on error return? These layoutreturns are
+ * required by the protocol. So if do not get
+ * successful reply, probably have to do something
+ * more drastic.
+ */
+ pnfs_send_layoutreturn(clp, cb_info);
+ spin_lock(&clp->cl_lock);
+ /* Removing from the list unblocks LAYOUTGETs */
+ list_del(&cb_info->pcl_list);
+ clp->cl_cb_lrecall_count--;
+ rpc_wake_up(&clp->cl_rpcwaitq_recall);
+ kfree(cb_info);
+ }
+}
- /* FIXME: do not allow two concurrent layout recalls */
- if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
- return status;
-
- init_completion(&data.started);
- __module_get(THIS_MODULE);
- atomic_inc(&clp->cl_count);
-
- t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
- if (IS_ERR(t)) {
- printk(KERN_INFO "NFS: Layout recall callback thread failed "
- "for client (clientid %08x/%08x)\n",
- (unsigned)(clp->cl_clientid >> 32),
- (unsigned)(clp->cl_clientid));
- status = PTR_ERR(t);
- goto out_module_put;
+void notify_drained(struct pnfs_cb_lrecall_info *d)
+{
+ if (d && atomic_dec_and_test(&d->pcl_count)) {
+ set_bit(NFS4CLNT_LAYOUT_RECALL, &d->pcl_clp->cl_state);
+ nfs4_schedule_state_manager(d->pcl_clp);
}
- wait_for_completion(&data.started);
- return data.result;
-out_module_put:
- nfs_put_client(clp);
- clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
- module_put(THIS_MODULE);
- return status;
}
-static int pnfs_recall_all_layouts(struct nfs_client *clp)
+static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
{
- struct cb_layoutrecallargs rl;
- struct inode *inode;
- int status = 0;
-
- rl.cbl_recall_type = RETURN_ALL;
- rl.cbl_range.iomode = IOMODE_ANY;
- rl.cbl_range.offset = 0;
- rl.cbl_range.length = NFS4_MAX_UINT64;
-
- /* we need the inode to get the nfs_server struct */
- inode = nfs_layoutrecall_find_inode(clp, &rl);
- if (!inode)
- return status;
- status = pnfs_async_return_layout(clp, inode, &rl);
- iput(inode);
+ struct nfs_client *clp = cb_info->pcl_clp;
+ struct pnfs_layout_hdr *lo;
+ int rv = NFS4ERR_NOMATCHING_LAYOUT;
+ struct cb_layoutrecallargs *args = &cb_info->pcl_args;
+
+ if (args->cbl_recall_type == RETURN_FILE) {
+ LIST_HEAD(free_me_list);
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(lo, &clp->cl_layouts, layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->inode)->fh))
+ continue;
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ rv = NFS4ERR_DELAY;
+ else {
+ /* FIXME I need to better understand igrab and
+ * does having a layout ref keep ino around?
+ * It should.
+ */
+ /* We need to hold the reference until any
+ * potential LAYOUTRETURN is finished.
+ */
+ get_layout_hdr(lo);
+ cb_info->pcl_ino = lo->inode;
+ rv = NFS4_OK;
+ }
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+
+ spin_lock(&lo->inode->i_lock);
+ if (rv == NFS4_OK) {
+ lo->plh_block_lgets++;
+ nfs4_asynch_forget_layouts(lo, &args->cbl_range,
+ cb_info, &free_me_list);
+ }
+ pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+ spin_unlock(&lo->inode->i_lock);
+ pnfs_free_lseg_list(&free_me_list);
+ } else {
+ struct pnfs_layout_hdr *tmp;
+ LIST_HEAD(recall_list);
+ LIST_HEAD(free_me_list);
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ spin_lock(&clp->cl_lock);
+ /* Per RFC 5661, 12.5.5.2.1.5, bulk recall must be serialized */
+ if (!list_is_singular(&clp->cl_layoutrecalls)) {
+ spin_unlock(&clp->cl_lock);
+ return NFS4ERR_DELAY;
+ }
+ list_for_each_entry(lo, &clp->cl_layouts, layouts) {
+ if ((args->cbl_recall_type == RETURN_FSID) &&
+ memcmp(&NFS_SERVER(lo->inode)->fsid,
+ &args->cbl_fsid, sizeof(struct nfs_fsid)))
+ continue;
+ get_layout_hdr(lo);
+ /* We could list_del(&lo->layouts) here */
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
+ spin_unlock(&clp->cl_lock);
+ list_for_each_entry_safe(lo, tmp,
+ &recall_list, plh_bulk_recall) {
+ spin_lock(&lo->inode->i_lock);
+ set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ nfs4_asynch_forget_layouts(lo, &range, cb_info,
+ &free_me_list);
+ list_del_init(&lo->plh_bulk_recall);
+ spin_unlock(&lo->inode->i_lock);
+ put_layout_hdr(lo->inode);
+ rv = NFS4_OK;
+ }
+ pnfs_free_lseg_list(&free_me_list);
+ }
+ return rv;
+}
+
+static u32 do_callback_layoutrecall(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ struct pnfs_cb_lrecall_info *new;
+ u32 res;
+
+ dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ res = NFS4ERR_RESOURCE;
+ goto out;
+ }
+ memcpy(&new->pcl_args, args, sizeof(*args));
+ atomic_set(&new->pcl_count, 1);
+ new->pcl_clp = clp;
+ new->pcl_ino = NULL;
+ spin_lock(&clp->cl_lock);
+ if (clp->cl_cb_lrecall_count >= PNFS_MAX_CB_LRECALLS) {
+ kfree(new);
+ res = NFS4ERR_DELAY;
+ spin_unlock(&clp->cl_lock);
+ goto out;
+ }
+ clp->cl_cb_lrecall_count++;
+ /* Adding to the list will block conflicting LGET activity */
+ list_add_tail(&new->pcl_list, &clp->cl_layoutrecalls);
+ spin_unlock(&clp->cl_lock);
+ res = initiate_layout_draining(new);
+ if (res || atomic_dec_and_test(&new->pcl_count)) {
+ spin_lock(&clp->cl_lock);
+ list_del(&new->pcl_list);
+ clp->cl_cb_lrecall_count--;
+ rpc_wake_up(&clp->cl_rpcwaitq_recall);
+ spin_unlock(&clp->cl_lock);
+ if (res == NFS4_OK) {
+ if (args->cbl_recall_type == RETURN_FILE) {
+ struct pnfs_layout_hdr *lo;
+
+ lo = NFS_I(new->pcl_ino)->layout;
+ spin_lock(&lo->inode->i_lock);
+ lo->plh_block_lgets--;
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(lo->inode)->lo_rpcwaitq_stateid);
+ spin_unlock(&lo->inode->i_lock);
+ put_layout_hdr(new->pcl_ino);
+ }
+ res = NFS4ERR_NOMATCHING_LAYOUT;
+ }
+ kfree(new);
+ }
+out:
+ dprintk("%s returning %i\n", __func__, res);
+ return res;
- return status;
}
__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps)
{
struct nfs_client *clp;
- struct inode *inode = NULL;
- __be32 res;
- int status;
+ u32 res;
dprintk("%s: -->\n", __func__);
- res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
if (cps->session) /* set in cb_sequence */
clp = cps->session->clp;
- else
+ else {
+ res = NFS4ERR_OP_NOT_IN_SESSION;
goto out;
-
+ }
/* the callback must come from the MDS personality */
- res = cpu_to_be32(NFS4ERR_NOTSUPP);
- if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
+ res = NFS4ERR_INVAL;
goto out;
-
- res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
- /*
- * In the _ALL or _FSID case, we need the inode to get
- * the nfs_server struct.
- */
- inode = nfs_layoutrecall_find_inode(clp, args);
- if (!inode)
- goto out;
- status = pnfs_async_return_layout(clp, inode, args);
- if (status)
- res = cpu_to_be32(NFS4ERR_DELAY);
- iput(inode);
+ }
+ res = do_callback_layoutrecall(clp, args);
out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
- return res;
+ dprintk("%s: exit with status = %d\n", __func__, res);
+ return cpu_to_be32(res);
+}
+
+static void pnfs_recall_all_layouts(struct nfs_client *clp)
+{
+ struct cb_layoutrecallargs args;
+
+ /* Pretend we got a CB_LAYOUTRECALL(ALL) */
+ memset(&args, 0, sizeof(args));
+ args.cbl_recall_type = RETURN_ALL;
+ /* FIXME we ignore errors, what should we do? */
+ do_callback_layoutrecall(clp, &args);
}
int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
@@ -677,9 +698,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
flags |= FMODE_WRITE;
if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
&args->craa_type_mask))
- if (pnfs_recall_all_layouts(clp) == -EAGAIN)
- status = cpu_to_be32(NFS4ERR_DELAY);
-
+ pnfs_recall_all_layouts(clp);
if (flags)
nfs_expire_all_delegation_types(clp, flags);
out:
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3c8c841..dbf43e7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -158,6 +158,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_machine_cred = cred;
#if defined(CONFIG_NFS_V4_1)
INIT_LIST_HEAD(&clp->cl_layouts);
+ INIT_LIST_HEAD(&clp->cl_layoutrecalls);
+ rpc_init_wait_queue(&clp->cl_rpcwaitq_recall,
+ "NFS client CB_LAYOUTRECALLS");
#endif
nfs_fscache_get_client_cookie(clp);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 72f27cc..8727ade 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1459,7 +1459,8 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
- rpc_init_wait_queue(&nfsi->lo_rpcwaitq, "pNFS Layout");
+ rpc_init_wait_queue(&nfsi->lo_rpcwaitq, "pNFS Layoutreturn");
+ rpc_init_wait_queue(&nfsi->lo_rpcwaitq_stateid, "pNFS Layoutstateid");
nfsi->layout = NULL;
#endif
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be19e225..87b2b63 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5346,42 +5346,58 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
struct inode *ino = lgp->args.inode;
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_server *server = NFS_SERVER(ino);
- struct pnfs_layout_segment *lseg;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
dprintk("--> %s\n", __func__);
+ spin_lock(&clp->cl_lock);
+ if (matches_outstanding_recall(ino, &lgp->args.range)) {
+ rpc_sleep_on(&clp->cl_rpcwaitq_recall, task, NULL);
+ spin_unlock(&clp->cl_lock);
+ return;
+ }
+ spin_unlock(&clp->cl_lock);
+ /* Note the is a race here, where a CB_LAYOUTRECALL can come in
+ * right now covering the LAYOUTGET we are about to send.
+ * However, that is not so catastrophic, and there seems
+ * to be no way to prevent it completely.
+ */
spin_lock(&ino->i_lock);
- lseg = pnfs_has_layout(nfsi->layout, &lgp->args.range);
- if (likely(!lseg)) {
+ if (pnfs_layoutgets_blocked(nfsi->layout, NULL)) {
+ rpc_sleep_on(&nfsi->lo_rpcwaitq_stateid, task, NULL);
spin_unlock(&ino->i_lock);
- dprintk("%s: no lseg found, proceeding\n", __func__);
- if (!nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
- &lgp->res.seq_res, 0, task))
- rpc_call_start(task);
return;
}
- if (!lseg->valid) {
+ /* This needs after but atomic with above check in order to properly
+ * serialize openstateid LAYOUTGETs.
+ */
+ nfsi->layout->plh_outstanding++;
+ spin_unlock(&ino->i_lock);
+
+ if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
+ &lgp->res.seq_res, 0, task)) {
+ spin_lock(&ino->i_lock);
+ nfsi->layout->plh_outstanding--;
spin_unlock(&ino->i_lock);
- dprintk("%s: invalid lseg found, waiting\n", __func__);
- rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
return;
}
- get_lseg(lseg);
- *lgp->lsegpp = lseg;
- spin_unlock(&ino->i_lock);
- dprintk("%s: valid lseg found, no rpc required\n", __func__);
- rpc_exit(task, NFS4_OK);
+ rpc_call_start(task);
}
static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
- struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ struct inode *ino = lgp->args.inode;
dprintk("--> %s\n", __func__);
- if (!nfs4_sequence_done(task, &lgp->res.seq_res))
+ if (!nfs4_sequence_done(task, &lgp->res.seq_res)) {
+ /* layout code relies on fact that in this case
+ * code falls back to tk_action=call_start, but not
+ * back to rpc_prepare_task, to keep plh_outstanding
+ * correct.
+ */
return;
-
+ }
switch (task->tk_status) {
case 0:
break;
@@ -5390,7 +5406,11 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
task->tk_status = -NFS4ERR_DELAY;
/* Fall through */
default:
- if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, NFS_SERVER(ino),
+ NULL, NULL) == -EAGAIN) {
+ spin_lock(&ino->i_lock);
+ NFS_I(ino)->layout->plh_outstanding--;
+ spin_unlock(&ino->i_lock);
rpc_restart_call_prepare(task);
return;
}
@@ -5448,13 +5468,20 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
if (IS_ERR(task))
return PTR_ERR(task);
status = nfs4_wait_for_completion_rpc_task(task);
- if (status != 0)
- goto out;
- status = task->tk_status;
- if (status != 0)
- goto out;
- status = pnfs_layout_process(lgp);
-out:
+ if (status == 0)
+ status = task->tk_status;
+ if (status == 0)
+ status = pnfs_layout_process(lgp);
+ else {
+ struct inode *ino = lgp->args.inode;
+ struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
+
+ spin_lock(&ino->i_lock);
+ lo->plh_outstanding--;
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
+ spin_unlock(&ino->i_lock);
+ }
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
return status;
@@ -5598,9 +5625,9 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
spin_lock(&lo->inode->i_lock);
if (lrp->res.lrs_present)
- pnfs_set_layout_stateid(lo, &lrp->res.stateid);
+ pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
else
- pnfs_invalidate_layout_stateid(lo);
+ BUG_ON(!list_empty(&lo->segs));
spin_unlock(&lo->inode->i_lock);
}
dprintk("<-- %s\n", __func__);
@@ -5611,8 +5638,18 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
- if (lrp->args.return_type == RETURN_FILE)
- put_layout_hdr(lrp->args.inode);
+ if (lrp->args.return_type == RETURN_FILE) {
+ struct inode *ino = lrp->args.inode;
+ struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
+
+ spin_lock(&ino->i_lock);
+ lo->plh_block_lgets--;
+ lo->plh_outstanding--;
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
+ spin_unlock(&ino->i_lock);
+ put_layout_hdr(ino);
+ }
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
@@ -5641,6 +5678,14 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
int status = 0;
dprintk("--> %s\n", __func__);
+ if (lrp->args.return_type == RETURN_FILE) {
+ struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+ /* FIXME we should test for BULK here */
+ spin_lock(&lo->inode->i_lock);
+ BUG_ON(lo->plh_block_lgets == 0);
+ lo->plh_outstanding++;
+ spin_unlock(&lo->inode->i_lock);
+ }
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 00632f6..ceb0d66 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1560,6 +1560,10 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs_client_return_marked_delegations(clp);
continue;
}
+ if (test_and_clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state)) {
+ nfs_client_return_layouts(clp);
+ continue;
+ }
/* Recall session slots */
if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)
&& nfs4_has_session(clp)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 10a6f4a..5208ef7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1827,13 +1827,14 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
hdr->replen += decode_getdeviceinfo_maxsz;
}
-static void
+static int
encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr)
{
nfs4_stateid stateid;
__be32 *p;
+ int status;
p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_LAYOUTGET);
@@ -1843,8 +1844,11 @@ encode_layoutget(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->range.offset);
p = xdr_encode_hyper(p, args->range.length);
p = xdr_encode_hyper(p, args->minlength);
- pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
- args->ctx->state);
+ status = pnfs_choose_layoutget_stateid(&stateid,
+ NFS_I(args->inode)->layout,
+ args->ctx->state);
+ if (status)
+ return status;
p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->maxcount);
@@ -1857,6 +1861,7 @@ encode_layoutget(struct xdr_stream *xdr,
args->maxcount);
hdr->nops++;
hdr->replen += decode_layoutget_maxsz;
+ return 0;
}
static int
@@ -2781,12 +2786,15 @@ static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
+ int status;
xdr_init_encode(&xdr, &req->rq_snd_buf, p);
encode_compound_hdr(&xdr, req, &hdr);
encode_sequence(&xdr, &args->seq_args, &hdr);
encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
- encode_layoutget(&xdr, args, &hdr);
+ status = encode_layoutget(&xdr, args, &hdr);
+ if (status)
+ return status;
encode_nops(&hdr);
return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ca8be8d..8d04cf2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
*/
/* Need to hold i_lock if caller does not already hold reference */
-static void
+void
get_layout_hdr(struct pnfs_layout_hdr *lo)
{
atomic_inc(&lo->plh_refcount);
@@ -278,24 +278,29 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
smp_mb();
lseg->valid = true;
lseg->layout = lo;
+ lseg->drain_notification = NULL;
}
static void
_put_lseg_common(struct pnfs_layout_segment *lseg)
{
+ struct inode *ino = lseg->layout->inode;
+
BUG_ON(lseg->valid == true);
list_del(&lseg->fi_list);
if (list_empty(&lseg->layout->segs)) {
struct nfs_client *clp;
- clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
+ clp = NFS_SERVER(ino)->nfs_client;
spin_lock(&clp->cl_lock);
/* List does not take a reference, so no need for put here */
list_del_init(&lseg->layout->layouts);
spin_unlock(&clp->cl_lock);
- pnfs_invalidate_layout_stateid(lseg->layout);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->layout->plh_flags);
+ if (!pnfs_layoutgets_blocked(lseg->layout, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
}
- rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
}
/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
@@ -325,9 +330,12 @@ put_lseg(struct pnfs_layout_segment *lseg)
atomic_read(&lseg->pls_refcount), lseg->valid);
ino = lseg->layout->inode;
if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
+ struct pnfs_cb_lrecall_info *drain_info = lseg->drain_notification;
+
_put_lseg_common(lseg);
spin_unlock(&ino->i_lock);
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ notify_drained(drain_info);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
@@ -345,7 +353,7 @@ EXPORT_SYMBOL_GPL(put_lseg);
* READ READ true
* READ RW false
*/
-static int
+bool
should_free_lseg(struct pnfs_layout_range *lseg_range,
struct pnfs_layout_range *recall_range)
{
@@ -388,16 +396,19 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
dprintk("%s:Return\n", __func__);
}
-static void
+void
pnfs_free_lseg_list(struct list_head *free_me)
{
struct pnfs_layout_segment *lseg, *tmp;
struct inode *ino;
+ struct pnfs_cb_lrecall_info *drain_info;
list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
ino = lseg->layout->inode;
+ drain_info = lseg->drain_notification;
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ notify_drained(drain_info);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
@@ -453,31 +464,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
}
}
-/* update lo->stateid with new if is more recent
- *
- * lo->stateid could be the open stateid, in which case we just use what given.
- */
+/* update lo->stateid with new if is more recent */
void
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
- const nfs4_stateid *new)
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
+ bool update_barrier)
{
- nfs4_stateid *old = &lo->stateid;
- bool overwrite = false;
+ u32 oldseq, newseq;
assert_spin_locked(&lo->inode->i_lock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags) ||
- memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
- overwrite = true;
- else {
- u32 oldseq, newseq;
-
- oldseq = be32_to_cpu(old->stateid.seqid);
- newseq = be32_to_cpu(new->stateid.seqid);
- if ((int)(newseq - oldseq) > 0)
- overwrite = true;
+ oldseq = be32_to_cpu(lo->stateid.stateid.seqid);
+ newseq = be32_to_cpu(new->stateid.seqid);
+ if ((int)(newseq - oldseq) > 0) {
+ memcpy(&lo->stateid, &new->stateid, sizeof(new->stateid));
+ if (update_barrier)
+ lo->plh_barrier = be32_to_cpu(new->stateid.seqid);
+ else {
+ /* Because of wraparound, we want to keep the barrier
+ * "close" to the current seqids. It needs to be
+ * within 2**31 to count as "behind", so if it
+ * gets too near that limit, give us a litle leeway
+ * and bring it to within 2**30.
+ * NOTE - and yes, this is all unsigned arithmetic.
+ */
+ if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
+ lo->plh_barrier = newseq - (1 << 30);
+ }
}
- if (overwrite)
- memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
}
/* Layoutreturn may use an invalid stateid, just copy what is there */
@@ -487,13 +499,21 @@ void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
}
-void
-pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state)
+int
+pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state)
{
+ int status = 0;
+
dprintk("--> %s\n", __func__);
spin_lock(&lo->inode->i_lock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags)) {
+ if (lo->plh_block_lgets ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ /* We avoid -EAGAIN, as that has special meaning to
+ * some callers.
+ */
+ status = -NFS4ERR_LAYOUTTRYLATER;
+ } else if (list_empty(&lo->segs)) {
int seq;
do {
@@ -501,12 +521,11 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
memcpy(dst->data, open_state->stateid.data,
sizeof(open_state->stateid.data));
} while (read_seqretry(&open_state->seqlock, seq));
- set_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags);
} else
- memcpy(dst->data, lo->stateid.data,
- sizeof(lo->stateid.data));
+ memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
spin_unlock(&lo->inode->i_lock);
dprintk("<-- %s\n", __func__);
+ return status;
}
/*
@@ -573,6 +592,28 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
return out;
}
+void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
+ struct pnfs_layout_range *range,
+ struct pnfs_cb_lrecall_info *drain_info,
+ struct list_head *tmp_list)
+{
+ struct pnfs_layout_segment *lseg, *tmp;
+
+ assert_spin_locked(&lo->inode->i_lock);
+ list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
+ if (should_free_lseg(&lseg->range, range)) {
+ /* FIXME - need to change to something like a
+ * notification bitmap to remove the restriction
+ * of only being able to process a single
+ * CB_LAYOUTRECALL at a time.
+ */
+ BUG_ON(lseg->drain_notification);
+ lseg->drain_notification = drain_info;
+ atomic_inc(&drain_info->pcl_count);
+ mark_lseg_invalid(lseg, tmp_list);
+ }
+}
+
/* Return true if there is layout based io in progress in the given range.
* Assumes range has already been marked invalid, and layout marked to
* prevent any new lseg from being inserted.
@@ -661,6 +702,7 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
goto out;
}
+ lo->plh_block_lgets++;
list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
if (should_free_lseg(&lseg->range, &arg))
mark_lseg_invalid(lseg, &tmp_list);
@@ -717,14 +759,6 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin\n", __func__);
assert_spin_locked(&lo->inode->i_lock);
- if (list_empty(&lo->segs)) {
- struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
-
- spin_lock(&clp->cl_lock);
- BUG_ON(!list_empty(&lo->layouts));
- list_add_tail(&lo->layouts, &clp->cl_layouts);
- spin_unlock(&clp->cl_lock);
- }
list_for_each_entry(lp, &lo->segs, fi_list) {
if (cmp_layout(&lp->range, &lseg->range) > 0)
continue;
@@ -741,6 +775,9 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
}
if (!found) {
list_add_tail(&lseg->fi_list, &lo->segs);
+ if (list_is_singular(&lo->segs) &&
+ !pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(lo->inode)->lo_rpcwaitq_stateid);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu at tail\n",
__func__, lseg, lseg->range.iomode,
@@ -762,6 +799,7 @@ alloc_init_layout_hdr(struct inode *ino)
atomic_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->layouts);
INIT_LIST_HEAD(&lo->segs);
+ INIT_LIST_HEAD(&lo->plh_bulk_recall);
lo->inode = ino;
return lo;
}
@@ -849,6 +887,7 @@ pnfs_update_layout(struct inode *ino,
.length = NFS4_MAX_UINT64,
};
struct nfs_inode *nfsi = NFS_I(ino);
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
@@ -884,9 +923,28 @@ pnfs_update_layout(struct inode *ino,
goto out_unlock;
get_layout_hdr(lo); /* Matched in pnfs_layoutget_release */
+ if (list_empty(&lo->segs)) {
+ /* The lo must be on the clp list if there is any
+ * chance of a CB_LAYOUTRECALL(FILE) coming in.
+ */
+ spin_lock(&clp->cl_lock);
+ BUG_ON(!list_empty(&lo->layouts));
+ list_add_tail(&lo->layouts, &clp->cl_layouts);
+ spin_unlock(&clp->cl_lock);
+ }
spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, &arg);
+ if (!lseg) {
+ spin_lock(&ino->i_lock);
+ if (list_empty(&lo->segs)) {
+ spin_lock(&clp->cl_lock);
+ list_del_init(&lo->layouts);
+ spin_unlock(&clp->cl_lock);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ }
+ spin_unlock(&ino->i_lock);
+ }
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
nfsi->layout->plh_flags, lseg);
@@ -896,6 +954,18 @@ out_unlock:
goto out;
}
+bool
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid)
+{
+ assert_spin_locked(&lo->inode->i_lock);
+ if ((stateid) &&
+ (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+ return true;
+ return lo->plh_block_lgets ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+ (list_empty(&lo->segs) && lo->plh_outstanding);
+}
+
int
pnfs_layout_process(struct nfs4_layoutget *lgp)
{
@@ -903,6 +973,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
struct inode *ino = lo->inode;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
int status = 0;
/* Inject layout blob into I/O device driver */
@@ -914,10 +985,25 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
status = PTR_ERR(lseg);
dprintk("%s: Could not allocate layout: error %d\n",
__func__, status);
+ spin_lock(&ino->i_lock);
goto out;
}
spin_lock(&ino->i_lock);
+ /* decrement needs to be done before call to pnfs_layoutget_blocked */
+ lo->plh_outstanding--;
+ spin_lock(&clp->cl_lock);
+ if (matches_outstanding_recall(ino, &res->range)) {
+ spin_unlock(&clp->cl_lock);
+ dprintk("%s forget reply due to recall\n", __func__);
+ goto out_forget_reply;
+ }
+ spin_unlock(&clp->cl_lock);
+
+ if (pnfs_layoutgets_blocked(lo, &res->stateid)) {
+ dprintk("%s forget reply due to state\n", __func__);
+ goto out_forget_reply;
+ }
init_lseg(lo, lseg);
lseg->range = res->range;
get_lseg(lseg);
@@ -933,10 +1019,19 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
/* Done processing layoutget. Set the layout stateid */
- pnfs_set_layout_stateid(lo, &res->stateid);
- spin_unlock(&ino->i_lock);
+ pnfs_set_layout_stateid(lo, &res->stateid, false);
out:
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
+ spin_unlock(&ino->i_lock);
return status;
+
+out_forget_reply:
+ spin_unlock(&ino->i_lock);
+ lseg->layout = lo;
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ spin_lock(&ino->i_lock);
+ goto out;
}
void
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e631487..810714a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -31,6 +31,7 @@
#define FS_NFS_PNFS_H
#include <linux/nfs_page.h>
+#include "callback.h" /* for cb_layoutrecallargs */
struct pnfs_layout_segment {
struct list_head fi_list;
@@ -38,6 +39,7 @@ struct pnfs_layout_segment {
atomic_t pls_refcount;
bool valid;
struct pnfs_layout_hdr *layout;
+ struct pnfs_cb_lrecall_info *drain_notification;
};
enum pnfs_try_status {
@@ -52,7 +54,7 @@ enum pnfs_try_status {
enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
- NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
+ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
NFS_LAYOUT_NEED_LCOMMIT, /* LAYOUTCOMMIT needed */
};
@@ -94,9 +96,13 @@ struct pnfs_layoutdriver_type {
struct pnfs_layout_hdr {
atomic_t plh_refcount;
struct list_head layouts; /* other client layouts */
+ struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
struct list_head segs; /* layout segments list */
int roc_iomode;/* return on close iomode, 0=none */
nfs4_stateid stateid;
+ unsigned long plh_outstanding; /* number of RPCs out */
+ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
+ u32 plh_barrier; /* ignore lower seqids */
unsigned long plh_flags;
struct rpc_cred *cred; /* layoutcommit credential */
/* DH: These vars keep track of the maximum write range
@@ -117,6 +123,14 @@ struct pnfs_device {
unsigned int pglen;
};
+struct pnfs_cb_lrecall_info {
+ struct list_head pcl_list; /* hook into cl_layoutrecalls list */
+ atomic_t pcl_count;
+ struct nfs_client *pcl_clp;
+ struct inode *pcl_ino;
+ struct cb_layoutrecallargs pcl_args;
+};
+
/*
* Device ID RCU cache. A device ID is unique per client ID and layout type.
*/
@@ -175,7 +189,10 @@ extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
/* pnfs.c */
+void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
+bool should_free_lseg(struct pnfs_layout_range *lseg_range,
+ struct pnfs_layout_range *recall_range);
struct pnfs_layout_segment *
pnfs_has_layout(struct pnfs_layout_hdr *lo, struct pnfs_layout_range *range);
struct pnfs_layout_segment *
@@ -200,15 +217,25 @@ enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
struct nfs_open_context *, struct list_head *);
void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
void put_layout_hdr(struct inode *inode);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
- const nfs4_stateid *new);
+ const nfs4_stateid *new,
+ bool update_barrier);
void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
-void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state);
+int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
+ struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state);
+void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
+ struct pnfs_layout_range *range,
+ struct pnfs_cb_lrecall_info *drain_info,
+ struct list_head *tmp_list);
+/* FIXME - this should be in callback.h, but pnfs_cb_lrecall_info needs to be there too */
+extern void notify_drained(struct pnfs_cb_lrecall_info *d);
static inline bool
has_layout(struct nfs_inode *nfsi)
@@ -222,12 +249,6 @@ static inline int lo_fail_bit(u32 iomode)
NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
}
-static inline void pnfs_invalidate_layout_stateid(struct pnfs_layout_hdr *lo)
-{
- assert_spin_locked(&lo->inode->i_lock);
- clear_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags);
-}
-
static inline void get_lseg(struct pnfs_layout_segment *lseg)
{
atomic_inc(&lseg->pls_refcount);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d8bfa42..061d81a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -191,6 +191,7 @@ struct nfs_inode {
/* pNFS layout information */
struct rpc_wait_queue lo_rpcwaitq;
+ struct rpc_wait_queue lo_rpcwaitq_stateid;
struct pnfs_layout_hdr *layout;
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3cae408..80dcc00 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -83,6 +83,10 @@ struct nfs_client {
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* sharred session */
struct list_head cl_layouts;
+ struct list_head cl_layoutrecalls;
+ unsigned long cl_cb_lrecall_count;
+#define PNFS_MAX_CB_LRECALLS (1)
+ struct rpc_wait_queue cl_rpcwaitq_recall;
struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
#endif /* CONFIG_NFS_V4_1 */
--
1.7.2.1
On 2010-11-04 17:22, Fred Isaman wrote:
> nfs4_proc_layoutreturn and its descendants were assuming that
> inode and lo were always available, but that is not true in the
> case of a bulk return.
>
> Signed-off-by: Fred Isaman <[email protected]>
Looks good. Thanks!
> ---
> fs/nfs/callback_proc.c | 1 +
> fs/nfs/nfs4proc.c | 37 ++++++++++++++++++-------------------
> fs/nfs/pnfs.c | 4 +++-
> include/linux/nfs_xdr.h | 1 +
> 4 files changed, 23 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 3e022a8..53a85648 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -284,6 +284,7 @@ static int pnfs_recall_layout(void *data)
> lrp->args.reclaim = 0;
> lrp->args.layout_type = rl.cbl_layout_type;
> lrp->args.return_type = rl.cbl_recall_type;
> + lrp->clp = clp;
> lrp->args.range = rl.cbl_seg;
> lrp->args.inode = inode;
> nfs4_proc_layoutreturn(lrp, true);
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 73bd44e..8d3965c 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -5557,23 +5557,23 @@ static void
> nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
> {
> struct nfs4_layoutreturn *lrp = calldata;
> - struct inode *ino = lrp->args.inode;
> - struct nfs_inode *nfsi = NFS_I(ino);
> - struct nfs_server *server = NFS_SERVER(ino);
>
> dprintk("--> %s\n", __func__);
> - if ((lrp->args.return_type == RETURN_FILE) &&
> - pnfs_return_layout_barrier(nfsi, &lrp->args.range)) {
> - dprintk("%s: waiting on barrier\n", __func__);
> - rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
> - return;
> + if (lrp->args.return_type == RETURN_FILE) {
> + struct nfs_inode *nfsi = NFS_I(lrp->args.inode);
> +
> + if (pnfs_return_layout_barrier(nfsi, &lrp->args.range)) {
> + dprintk("%s: waiting on barrier\n", __func__);
> + rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
> + return;
> + }
> }
> if (lrp->stateid) {
> /* Forget the layout, without sending the return */
> rpc_exit(task, 0);
> return;
> }
> - if (nfs4_setup_sequence(server, NULL, &lrp->args.seq_args,
> + if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args,
> &lrp->res.seq_res, 0, task))
> return;
> rpc_call_start(task);
> @@ -5582,8 +5582,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
> static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
> {
> struct nfs4_layoutreturn *lrp = calldata;
> - struct inode *ino = lrp->args.inode;
> - struct nfs_server *server = NFS_SERVER(ino);
> + struct nfs_server *server;
>
> dprintk("--> %s\n", __func__);
>
> @@ -5593,8 +5592,12 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
> if (RPC_ASSASSINATED(task))
> return;
>
> - if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN)
> - nfs_restart_rpc(task, server->nfs_client);
> + if (lrp->args.return_type == RETURN_FILE)
> + server = NFS_SERVER(lrp->args.inode);
> + else
> + server = NULL;
> + if (nfs4_async_handle_error(task, server, NULL, lrp->clp) == -EAGAIN)
> + nfs_restart_rpc(task, lrp->clp);
>
> dprintk("<-- %s\n", __func__);
> }
> @@ -5602,10 +5605,8 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
> static void nfs4_layoutreturn_release(void *calldata)
> {
> struct nfs4_layoutreturn *lrp = calldata;
> - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
>
> - dprintk("--> %s return_type %d lo %p\n", __func__,
> - lrp->args.return_type, lo);
> + dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
>
> pnfs_layoutreturn_release(lrp);
> kfree(calldata);
> @@ -5620,8 +5621,6 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = {
>
> int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
> {
> - struct inode *ino = lrp->args.inode;
> - struct nfs_server *server = NFS_SERVER(ino);
> struct rpc_task *task;
> struct rpc_message msg = {
> .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN],
> @@ -5629,7 +5628,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
> .rpc_resp = &lrp->res,
> };
> struct rpc_task_setup task_setup_data = {
> - .rpc_client = server->client,
> + .rpc_client = lrp->clp->cl_rpcclient,
> .rpc_message = &msg,
> .callback_ops = &nfs4_layoutreturn_call_ops,
> .callback_data = lrp,
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 01ecb95..34f6914 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -590,10 +590,11 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
> void
> pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
> {
> - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
> + struct pnfs_layout_hdr *lo;
>
> if (lrp->args.return_type != RETURN_FILE)
> return;
> + lo = NFS_I(lrp->args.inode)->layout;
> spin_lock(&lrp->args.inode->i_lock);
> if (!lrp->res.valid)
> ; /* forgetful model internal release */
> @@ -630,6 +631,7 @@ return_layout(struct inode *ino, struct pnfs_layout_range *range,
> lrp->args.range = *range;
> lrp->args.inode = ino;
> lrp->stateid = stateid;
> + lrp->clp = server->nfs_client;
>
> status = nfs4_proc_layoutreturn(lrp, wait);
> out:
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 1ff6cb0..0ee7cce 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -280,6 +280,7 @@ struct nfs4_layoutreturn {
> struct nfs4_layoutreturn_res res;
> struct rpc_cred *cred;
> const nfs4_stateid *stateid;
> + struct nfs_client *clp;
> int rpc_status;
> };
>
On 2010-11-04 17:22, Fred Isaman wrote:
> This prepares for future changes, where the layout state needs
> to change atomically with several other variables. In particular,
> it will need to know if lo->segs is empty. Moreover, the
> layoutstateid is not really a read-mostly structure, as it is
> written on each LAYOUTGET.
>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/callback_proc.c | 8 +++---
> fs/nfs/nfs4xdr.c | 2 +
> fs/nfs/pnfs.c | 55 ++++++++++++++---------------------------------
> fs/nfs/pnfs.h | 4 +--
> 4 files changed, 24 insertions(+), 45 deletions(-)
>
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 84c5a1b..3e022a8 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -135,12 +135,11 @@ static bool
> pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
> const nfs4_stateid stateid)
> {
> - int seqlock;
> bool res;
> u32 oldseqid, newseqid;
>
> - do {
> - seqlock = read_seqbegin(&lo->seqlock);
> + spin_lock(&lo->inode->i_lock);
> + {
> oldseqid = be32_to_cpu(lo->stateid.stateid.seqid);
> newseqid = be32_to_cpu(stateid.stateid.seqid);
> res = !memcmp(lo->stateid.stateid.other,
> @@ -158,7 +157,8 @@ pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
> if (res)
> res = (newseqid == 1);
> }
> - } while (read_seqretry(&lo->seqlock, seqlock));
> + }
> + spin_unlock(&lo->inode->i_lock);
>
> return res;
> }
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 6d9ef2b..b71a482 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -1915,7 +1915,9 @@ encode_layoutreturn(struct xdr_stream *xdr,
> p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
> p = xdr_encode_hyper(p, args->range.offset);
> p = xdr_encode_hyper(p, args->range.length);
> + spin_lock(&args->inode->i_lock);
> pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
> + spin_unlock(&args->inode->i_lock);
> p = xdr_encode_opaque_fixed(p, &stateid.data,
> NFS4_STATEID_SIZE);
> p = reserve_space(xdr, 4);
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 4e5c68b..01ecb95 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -456,7 +456,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
> nfs4_stateid *old = &lo->stateid;
> bool overwrite = false;
>
> - write_seqlock(&lo->seqlock);
> + assert_spin_locked(&lo->inode->i_lock);
> if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
> memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
> overwrite = true;
> @@ -470,54 +470,34 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
> }
> if (overwrite)
> memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
> - write_sequnlock(&lo->seqlock);
> -}
> -
> -static void
> -pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
> - struct nfs4_state *state)
> -{
> - int seq;
> -
> - dprintk("--> %s\n", __func__);
> - write_seqlock(&lo->seqlock);
> - do {
> - seq = read_seqbegin(&state->seqlock);
> - memcpy(lo->stateid.data, state->stateid.data,
> - sizeof(state->stateid.data));
> - } while (read_seqretry(&state->seqlock, seq));
> - set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
> - write_sequnlock(&lo->seqlock);
> - dprintk("<-- %s\n", __func__);
> }
>
> /* Layoutreturn may use an invalid stateid, just copy what is there */
> void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
> {
> - int seq;
> -
> - do {
> - seq = read_seqbegin(&lo->seqlock);
> - memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
> - } while (read_seqretry(&lo->seqlock, seq));
> + assert_spin_locked(&lo->inode->i_lock);
> + memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
This function is just redundant now.
Let's just open code its two users.
Benny
> }
>
> void
> pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
> struct nfs4_state *open_state)
> {
> - int seq;
> -
> dprintk("--> %s\n", __func__);
> - do {
> - seq = read_seqbegin(&lo->seqlock);
> - if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
> - /* This will trigger retry of the read */
> - pnfs_layout_from_open_stateid(lo, open_state);
> - } else
> - memcpy(dst->data, lo->stateid.data,
> - sizeof(lo->stateid.data));
> - } while (read_seqretry(&lo->seqlock, seq));
> + spin_lock(&lo->inode->i_lock);
> + if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
> + int seq;
> +
> + do {
> + seq = read_seqbegin(&open_state->seqlock);
> + memcpy(dst->data, open_state->stateid.data,
> + sizeof(open_state->stateid.data));
> + } while (read_seqretry(&open_state->seqlock, seq));
> + set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
> + } else
> + memcpy(dst->data, lo->stateid.data,
> + sizeof(lo->stateid.data));
> + spin_unlock(&lo->inode->i_lock);
> dprintk("<-- %s\n", __func__);
> }
>
> @@ -791,7 +771,6 @@ alloc_init_layout_hdr(struct inode *ino)
> lo->refcount = 1;
> INIT_LIST_HEAD(&lo->layouts);
> INIT_LIST_HEAD(&lo->segs);
> - seqlock_init(&lo->seqlock);
> lo->inode = ino;
> return lo;
> }
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 000acf0..de4eaa8 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -96,7 +96,6 @@ struct pnfs_layout_hdr {
> struct list_head layouts; /* other client layouts */
> struct list_head segs; /* layout segments list */
> int roc_iomode;/* return on close iomode, 0=none */
> - seqlock_t seqlock; /* Protects the stateid */
> nfs4_stateid stateid;
> unsigned long state;
> struct rpc_cred *cred; /* layoutcommit credential */
> @@ -224,9 +223,8 @@ static inline int lo_fail_bit(u32 iomode)
>
> static inline void pnfs_invalidate_layout_stateid(struct pnfs_layout_hdr *lo)
> {
> - write_seqlock(&lo->seqlock);
> + assert_spin_locked(&lo->inode->i_lock);
> clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
> - write_sequnlock(&lo->seqlock);
> }
>
> static inline void get_lseg(struct pnfs_layout_segment *lseg)
On Thu, Nov 11, 2010 at 10:00 AM, Benny Halevy <[email protected]> wrote:
> On 2010-11-04 17:22, Fred Isaman wrote:
>> This prepares for future changes, where the layout state needs
>> to change atomically with several other variables. ?In particular,
>> it will need to know if lo->segs is empty. ?Moreover, the
>> layoutstateid is not really a read-mostly structure, as it is
>> written on each LAYOUTGET.
>>
>> Signed-off-by: Fred Isaman <[email protected]>
>> ---
>> ?fs/nfs/callback_proc.c | ? ?8 +++---
>> ?fs/nfs/nfs4xdr.c ? ? ? | ? ?2 +
>> ?fs/nfs/pnfs.c ? ? ? ? ?| ? 55 ++++++++++++++---------------------------------
>> ?fs/nfs/pnfs.h ? ? ? ? ?| ? ?4 +--
>> ?4 files changed, 24 insertions(+), 45 deletions(-)
>>
>> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
>> index 84c5a1b..3e022a8 100644
>> --- a/fs/nfs/callback_proc.c
>> +++ b/fs/nfs/callback_proc.c
>> @@ -135,12 +135,11 @@ static bool
>> ?pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? const nfs4_stateid stateid)
>> ?{
>> - ? ? int seqlock;
>> ? ? ? bool res;
>> ? ? ? u32 oldseqid, newseqid;
>>
>> - ? ? do {
>> - ? ? ? ? ? ? seqlock = read_seqbegin(&lo->seqlock);
>> + ? ? spin_lock(&lo->inode->i_lock);
>> + ? ? {
>> ? ? ? ? ? ? ? oldseqid = be32_to_cpu(lo->stateid.stateid.seqid);
>> ? ? ? ? ? ? ? newseqid = be32_to_cpu(stateid.stateid.seqid);
>> ? ? ? ? ? ? ? res = !memcmp(lo->stateid.stateid.other,
>> @@ -158,7 +157,8 @@ pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
>> ? ? ? ? ? ? ? ? ? ? ? if (res)
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? res = (newseqid == 1);
>> ? ? ? ? ? ? ? }
>> - ? ? } while (read_seqretry(&lo->seqlock, seqlock));
>> + ? ? }
>> + ? ? spin_unlock(&lo->inode->i_lock);
>>
>> ? ? ? return res;
>> ?}
>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> index 6d9ef2b..b71a482 100644
>> --- a/fs/nfs/nfs4xdr.c
>> +++ b/fs/nfs/nfs4xdr.c
>> @@ -1915,7 +1915,9 @@ encode_layoutreturn(struct xdr_stream *xdr,
>> ? ? ? ? ? ? ? p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>> ? ? ? ? ? ? ? p = xdr_encode_hyper(p, args->range.offset);
>> ? ? ? ? ? ? ? p = xdr_encode_hyper(p, args->range.length);
>> + ? ? ? ? ? ? spin_lock(&args->inode->i_lock);
>> ? ? ? ? ? ? ? pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
>> + ? ? ? ? ? ? spin_unlock(&args->inode->i_lock);
>> ? ? ? ? ? ? ? p = xdr_encode_opaque_fixed(p, &stateid.data,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? NFS4_STATEID_SIZE);
>> ? ? ? ? ? ? ? p = reserve_space(xdr, 4);
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 4e5c68b..01ecb95 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -456,7 +456,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
>> ? ? ? nfs4_stateid *old = &lo->stateid;
>> ? ? ? bool overwrite = false;
>>
>> - ? ? write_seqlock(&lo->seqlock);
>> + ? ? assert_spin_locked(&lo->inode->i_lock);
>> ? ? ? if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
>> ? ? ? ? ? memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
>> ? ? ? ? ? ? ? overwrite = true;
>> @@ -470,54 +470,34 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
>> ? ? ? }
>> ? ? ? if (overwrite)
>> ? ? ? ? ? ? ? memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
>> - ? ? write_sequnlock(&lo->seqlock);
>> -}
>> -
>> -static void
>> -pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? struct nfs4_state *state)
>> -{
>> - ? ? int seq;
>> -
>> - ? ? dprintk("--> %s\n", __func__);
>> - ? ? write_seqlock(&lo->seqlock);
>> - ? ? do {
>> - ? ? ? ? ? ? seq = read_seqbegin(&state->seqlock);
>> - ? ? ? ? ? ? memcpy(lo->stateid.data, state->stateid.data,
>> - ? ? ? ? ? ? ? ? ? ?sizeof(state->stateid.data));
>> - ? ? } while (read_seqretry(&state->seqlock, seq));
>> - ? ? set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
>> - ? ? write_sequnlock(&lo->seqlock);
>> - ? ? dprintk("<-- %s\n", __func__);
>> ?}
>>
>> ?/* Layoutreturn may use an invalid stateid, just copy what is there */
>> ?void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
>> ?{
>> - ? ? int seq;
>> -
>> - ? ? do {
>> - ? ? ? ? ? ? seq = read_seqbegin(&lo->seqlock);
>> - ? ? ? ? ? ? memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>> - ? ? } while (read_seqretry(&lo->seqlock, seq));
>> + ? ? assert_spin_locked(&lo->inode->i_lock);
>> + ? ? memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>
> This function is just redundant now.
> Let's just open code its two users.
>
> Benny
>
OK.
Fred
>> ?}
>>
>> ?void
>> ?pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>> ? ? ? ? ? ? ? ? ? ? ? struct nfs4_state *open_state)
>> ?{
>> - ? ? int seq;
>> -
>> ? ? ? dprintk("--> %s\n", __func__);
>> - ? ? do {
>> - ? ? ? ? ? ? seq = read_seqbegin(&lo->seqlock);
>> - ? ? ? ? ? ? if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
>> - ? ? ? ? ? ? ? ? ? ? /* This will trigger retry of the read */
>> - ? ? ? ? ? ? ? ? ? ? pnfs_layout_from_open_stateid(lo, open_state);
>> - ? ? ? ? ? ? } else
>> - ? ? ? ? ? ? ? ? ? ? memcpy(dst->data, lo->stateid.data,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ?sizeof(lo->stateid.data));
>> - ? ? } while (read_seqretry(&lo->seqlock, seq));
>> + ? ? spin_lock(&lo->inode->i_lock);
>> + ? ? if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
>> + ? ? ? ? ? ? int seq;
>> +
>> + ? ? ? ? ? ? do {
>> + ? ? ? ? ? ? ? ? ? ? seq = read_seqbegin(&open_state->seqlock);
>> + ? ? ? ? ? ? ? ? ? ? memcpy(dst->data, open_state->stateid.data,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ?sizeof(open_state->stateid.data));
>> + ? ? ? ? ? ? } while (read_seqretry(&open_state->seqlock, seq));
>> + ? ? ? ? ? ? set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
>> + ? ? } else
>> + ? ? ? ? ? ? memcpy(dst->data, lo->stateid.data,
>> + ? ? ? ? ? ? ? ? ? ?sizeof(lo->stateid.data));
>> + ? ? spin_unlock(&lo->inode->i_lock);
>> ? ? ? dprintk("<-- %s\n", __func__);
>> ?}
>>
>> @@ -791,7 +771,6 @@ alloc_init_layout_hdr(struct inode *ino)
>> ? ? ? lo->refcount = 1;
>> ? ? ? INIT_LIST_HEAD(&lo->layouts);
>> ? ? ? INIT_LIST_HEAD(&lo->segs);
>> - ? ? seqlock_init(&lo->seqlock);
>> ? ? ? lo->inode = ino;
>> ? ? ? return lo;
>> ?}
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 000acf0..de4eaa8 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -96,7 +96,6 @@ struct pnfs_layout_hdr {
>> ? ? ? struct list_head ? ? ? ?layouts; ? /* other client layouts */
>> ? ? ? struct list_head ? ? ? ?segs; ? ? ?/* layout segments list */
>> ? ? ? int ? ? ? ? ? ? ? ? ? ? roc_iomode;/* return on close iomode, 0=none */
>> - ? ? seqlock_t ? ? ? ? ? ? ? seqlock; ? /* Protects the stateid */
>> ? ? ? nfs4_stateid ? ? ? ? ? ?stateid;
>> ? ? ? unsigned long ? ? ? ? ? state;
>> ? ? ? struct rpc_cred ? ? ? ? *cred; ? ? /* layoutcommit credential */
>> @@ -224,9 +223,8 @@ static inline int lo_fail_bit(u32 iomode)
>>
>> ?static inline void pnfs_invalidate_layout_stateid(struct pnfs_layout_hdr *lo)
>> ?{
>> - ? ? write_seqlock(&lo->seqlock);
>> + ? ? assert_spin_locked(&lo->inode->i_lock);
>> ? ? ? clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
>> - ? ? write_sequnlock(&lo->seqlock);
>> ?}
>>
>> ?static inline void get_lseg(struct pnfs_layout_segment *lseg)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>
On 2010-11-10 16:46, Fred Isaman wrote:
> On Wed, Nov 10, 2010 at 9:35 AM, Benny Halevy <[email protected]> wrote:
>> On 2010-11-04 17:22, Fred Isaman wrote:
>>> Instead, have mark_invalid function that marks lseg invalid and
>>> removes the reference that holds it in the list. Now when io is finished,
>>> the lseg will automatically be removed from the list. This is
>>> at the heart of many of the upcoming cb_layoutrecall changes.
>>>
>>> Signed-off-by: Fred Isaman <[email protected]>
>>> ---
>>> fs/nfs/nfs4xdr.c | 3 +-
>>> fs/nfs/pnfs.c | 145 ++++++++++++++++++++++++++++++++++-------------------
>>> fs/nfs/pnfs.h | 1 +
>>> 3 files changed, 95 insertions(+), 54 deletions(-)
>>>
>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>> index 238eeb2..6d9ef2b 100644
>>> --- a/fs/nfs/nfs4xdr.c
>>> +++ b/fs/nfs/nfs4xdr.c
>>> @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
>>> p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>>> p = xdr_encode_hyper(p, args->range.offset);
>>> p = xdr_encode_hyper(p, args->range.length);
>>> - pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
>>> - NULL);
>>> + pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
>>> p = xdr_encode_opaque_fixed(p, &stateid.data,
>>> NFS4_STATEID_SIZE);
>>> p = reserve_space(xdr, 4);
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index 3bbe3be..4e5c68b 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
>>> lseg->layout = lo;
>>> }
>>>
>>> +static void
>>> +_put_lseg_common(struct pnfs_layout_segment *lseg)
>>> +{
>>> + BUG_ON(lseg->valid == true);
>>> + list_del(&lseg->fi_list);
>>> + if (list_empty(&lseg->layout->segs)) {
>>> + struct nfs_client *clp;
>>> +
>>> + clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
>>> + spin_lock(&clp->cl_lock);
>>> + /* List does not take a reference, so no need for put here */
>>> + list_del_init(&lseg->layout->layouts);
>>> + spin_unlock(&clp->cl_lock);
>>> + pnfs_invalidate_layout_stateid(lseg->layout);
>>> + }
>>> + rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
>>> +}
>>> +
>>> +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
>>> + * could sleep, so must be called outside of the lock.
>>> + */
>>> +static void
>>> +put_lseg_locked(struct pnfs_layout_segment *lseg,
>>> + struct list_head *tmp_list)
>>> +{
>>> + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>> + atomic_read(&lseg->pls_refcount), lseg->valid);
>>> + if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>> + _put_lseg_common(lseg);
>>> + list_add(&lseg->fi_list, tmp_list);
>>> + }
>>> +}
>>> +
>>> void
>>> put_lseg(struct pnfs_layout_segment *lseg)
>>> {
>>> - bool do_wake_up;
>>> struct inode *ino;
>>>
>>> if (!lseg)
>>> @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
>>>
>>> dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>> atomic_read(&lseg->pls_refcount), lseg->valid);
>>> - do_wake_up = !lseg->valid;
>>> ino = lseg->layout->inode;
>>> - if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>> + if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
>>> + _put_lseg_common(lseg);
>>> + spin_unlock(&ino->i_lock);
>>> NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>> /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>> put_layout_hdr(ino);
>>> }
>>> - if (do_wake_up)
>>> - rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
>>> }
>>> EXPORT_SYMBOL_GPL(put_lseg);
>>>
>>> @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
>>> lseg->range.iomode == range->iomode);
>>> }
>>>
>>> -static bool
>>> -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
>>> +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
>>> + struct list_head *tmp_list)
>>> {
>>> - return atomic_read(&lseg->pls_refcount) == 1;
>>> + assert_spin_locked(&lseg->layout->inode->i_lock);
>>> + if (lseg->valid) {
>>> + lseg->valid = false;
>>> + /* Remove the reference keeping the lseg in the
>>> + * list. It will now be removed when all
>>> + * outstanding io is finished.
>>> + */
>>> + put_lseg_locked(lseg, tmp_list);
>>> + }
>>> }
>>>
>>> static void
>>> @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
>>> __func__, lo, range->offset, range->length, range->iomode);
>>>
>>> assert_spin_locked(&lo->inode->i_lock);
>>> - list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
>>> - if (!should_free_lseg(lseg, range) ||
>>> - !_pnfs_can_return_lseg(lseg))
>>> - continue;
>>> - dprintk("%s: freeing lseg %p iomode %d "
>>> - "offset %llu length %llu\n", __func__,
>>> - lseg, lseg->range.iomode, lseg->range.offset,
>>> - lseg->range.length);
>>> - list_move(&lseg->fi_list, tmp_list);
>>> - }
>>> - if (list_empty(&lo->segs)) {
>>> - struct nfs_client *clp;
>>> -
>>> - clp = NFS_SERVER(lo->inode)->nfs_client;
>>> - spin_lock(&clp->cl_lock);
>>> - /* List does not take a reference, so no need for put here */
>>> - list_del_init(&lo->layouts);
>>> - spin_unlock(&clp->cl_lock);
>>> - pnfs_invalidate_layout_stateid(lo);
>>> - }
>>> -
>>> + list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
>>> + if (should_free_lseg(lseg, range)) {
>>> + dprintk("%s: freeing lseg %p iomode %d "
>>> + "offset %llu length %llu\n", __func__,
>>> + lseg, lseg->range.iomode, lseg->range.offset,
>>> + lseg->range.length);
>>> + mark_lseg_invalid(lseg, tmp_list);
>>> + }
>>> dprintk("%s:Return\n", __func__);
>>> }
>>>
>>> static void
>>> -pnfs_free_lseg_list(struct list_head *tmp_list)
>>> +pnfs_free_lseg_list(struct list_head *free_me)
>>> {
>>> - struct pnfs_layout_segment *lseg;
>>> + struct pnfs_layout_segment *lseg, *tmp;
>>> + struct inode *ino;
>>>
>>> - while (!list_empty(tmp_list)) {
>>> - lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
>>> - fi_list);
>>> - dprintk("%s calling put_lseg on %p\n", __func__, lseg);
>>> - list_del(&lseg->fi_list);
>>> - put_lseg(lseg);
>>> + list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
>>> + BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
>>> + ino = lseg->layout->inode;
>>> + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>> + /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>> + put_layout_hdr(ino);
>>> }
>>> + INIT_LIST_HEAD(free_me);
>>> }
>>>
>>> void
>>> @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
>>> dprintk("<-- %s\n", __func__);
>>> }
>>>
>>> +/* Layoutreturn may use an invalid stateid, just copy what is there */
>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
>>> +{
>>> + int seq;
>>> +
>>> + do {
>>> + seq = read_seqbegin(&lo->seqlock);
>>> + memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>>> + } while (read_seqretry(&lo->seqlock, seq));
>>> +}
>>> +
>>> void
>>> pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>> struct nfs4_state *open_state)
>>> @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
>>> return out;
>>> }
>>>
>>> +/* Return true if there is layout based io in progress in the given range.
>>> + * Assumes range has already been marked invalid, and layout marked to
>>> + * prevent any new lseg from being inserted.
>>> + */
>>> bool
>>> pnfs_return_layout_barrier(struct nfs_inode *nfsi,
>>> struct pnfs_layout_range *range)
>>> {
>>> - struct pnfs_layout_segment *lseg;
>>> + struct pnfs_layout_segment *lseg, *tmp;
>>> bool ret = false;
>>>
>>> spin_lock(&nfsi->vfs_inode.i_lock);
>>> - list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
>>> - if (!should_free_lseg(lseg, range))
>>> - continue;
>>> - lseg->valid = false;
>>> - if (!_pnfs_can_return_lseg(lseg)) {
>>> - dprintk("%s: wait on lseg %p refcount %d\n",
>>> - __func__, lseg,
>>> - atomic_read(&lseg->pls_refcount));
>>> + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
>>
>> Why do you need the safe version here while the inode is locked?
>>
>
> We don't.
OK. I'll fix that then :)
>
>
>>> + if (should_free_lseg(lseg, range)) {
>>> ret = true;
>>
>> But this will always return "true" if there's any lseg to return,
>> not only if (!_pnfs_can_return_lseg(lseg)).
>>
>> What am I missing? :)
>>
>
> A return of "true" means the caller should wait. So if there is any
> lseg still left to return, we should return true. The refcounting has
> changed so that once the pending IO is finished, the lseg will
> automatically be removed from the list. I suspect that what you are
> missing is that...the refcount in the invalid case is one less than
> what it used to be.
Thanks. I see what you mean now.
What's missing is plh_block_lgets which is introduced only
in [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall
Otherwise, new lsegs can be inserted into the list in between.
Benny
>
> Fred
>
>> Benny
>>
>>> + break;
>>> }
>>> - }
>>> spin_unlock(&nfsi->vfs_inode.i_lock);
>>> dprintk("%s:Return %d\n", __func__, ret);
>>> return ret;
>>> @@ -574,12 +611,10 @@ void
>>> pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>>> {
>>> struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
>>> - LIST_HEAD(tmp_list);
>>>
>>> if (lrp->args.return_type != RETURN_FILE)
>>> return;
>>> spin_lock(&lrp->args.inode->i_lock);
>>> - pnfs_clear_lseg_list(lo, &tmp_list, &lrp->args.range);
>>> if (!lrp->res.valid)
>>> ; /* forgetful model internal release */
>>> else if (!lrp->res.lrs_present)
>>> @@ -588,7 +623,6 @@ pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>>> pnfs_set_layout_stateid(lo, &lrp->res.stateid);
>>> put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
>>> spin_unlock(&lrp->args.inode->i_lock);
>>> - pnfs_free_lseg_list(&tmp_list);
>>> }
>>>
>>> static int
>>> @@ -641,7 +675,11 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>>> arg.offset = 0;
>>> arg.length = NFS4_MAX_UINT64;
>>>
>>> + /* probably should BUGON if type != RETURN_FILE */
>>> if (type == RETURN_FILE) {
>>> + LIST_HEAD(tmp_list);
>>> + struct pnfs_layout_segment *lseg, *tmp;
>>> +
>>> spin_lock(&ino->i_lock);
>>> lo = nfsi->layout;
>>> if (lo && !has_layout_to_return(lo, &arg))
>>> @@ -652,10 +690,13 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>>> goto out;
>>> }
>>>
>>> + list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
>>> + if (should_free_lseg(lseg, &arg))
>>> + mark_lseg_invalid(lseg, &tmp_list);
>>> /* Reference matched in pnfs_layoutreturn_release */
>>> get_layout_hdr_locked(lo);
>>> -
>>> spin_unlock(&ino->i_lock);
>>> + pnfs_free_lseg_list(&tmp_list);
>>>
>>> if (layoutcommit_needed(nfsi)) {
>>> if (stateid && !wait) { /* callback */
>>> @@ -1171,7 +1212,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
>>> nfsi->layout->write_end_pos = 0;
>>> nfsi->layout->cred = NULL;
>>> __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
>>> - pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout, NULL);
>>> + pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
>>>
>>> /* Reference for layoutcommit matched in pnfs_layoutcommit_release */
>>> get_layout_hdr_locked(NFS_I(inode)->layout);
>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>> index 05dd5e0..000acf0 100644
>>> --- a/fs/nfs/pnfs.h
>>> +++ b/fs/nfs/pnfs.h
>>> @@ -206,6 +206,7 @@ void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
>>> void pnfs_destroy_layout(struct nfs_inode *);
>>> void pnfs_destroy_all_layouts(struct nfs_client *);
>>> void put_layout_hdr(struct inode *inode);
>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
>>> void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>> struct nfs4_state *open_state);
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>>