Return-Path: Received: from mail-iw0-f174.google.com ([209.85.214.174]:54609 "EHLO mail-iw0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754972Ab1FKM1Q (ORCPT ); Sat, 11 Jun 2011 08:27:16 -0400 Received: by iwn34 with SMTP id 34so2779541iwn.19 for ; Sat, 11 Jun 2011 05:27:16 -0700 (PDT) Message-ID: <4DF35F17.20501@tonian.com> Date: Sat, 11 Jun 2011 08:27:03 -0400 From: Benny Halevy To: Boaz Harrosh CC: "J. Bruce Fields" , NFS list , Andy Adamson , Trond Myklebust Subject: Re: [RFC] pnfsd: Return all ROC layouts On Close References: <4DE3B812.8020804@panasas.com> <4DE3D392.3020902@panasas.com> <4DF27702.4060900@tonian.com> <4DF2A20B.2030901@panasas.com> In-Reply-To: <4DF2A20B.2030901@panasas.com> Content-Type: text/plain; charset=ISO-8859-1 Sender: linux-nfs-owner@vger.kernel.org List-ID: MIME-Version: 1.0 On 2011-06-10 19:00, Boaz Harrosh wrote: > On 06/10/2011 12:56 PM, Benny Halevy wrote: >> On 2011-05-30 13:27, Benny Halevy wrote: >>> On 2011-05-30 18:30, Boaz Harrosh wrote: >>>> >>>> As talk to with Trond, when a client has sent an nfs4_close, the >>>> server can surly assume that the given client has already forgotten >>>> all it's ROC given layouts. And in fact the current Linux-client goes >>>> to grate length to make sure all ROC layouts are invalidated, before >>>> sending the close. If a new open+IO comes in and tries to get a new layout, >>>> it will wait for the nfs4_close to return before sending the new layout_get. >>>> >>>> So in light of above the server should simulate layout_return and remove >>>> any layouts marked ROC on nfs4_close. >>>> >>>> Current code is protocol correct, but a more fine-grained approach can be >>>> devised to also release layouts in open_downgrade, of all unused iomodes. >>>> But to do this I'll need the help of an expert. >>>> >>>> With this patch in, I'm finally able to run with the new Linux-Client together >>>> with a Linux-Server. Before, the layouts where only released and freed on umount >>>> and in a modest machine like the UML I use, few 10s of "git checkout linux" the >>>> server would oom. >>>> >>>> Benny please submit it to the pnfs tree, so we can run with this in Bakeathone. >>>> >>>> So please all help me to verify and refine this code. >>>> >>>> Signed-off-by: Boaz Harrosh >>>> --- >>>> fs/nfsd/nfs4pnfsd.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- >>>> fs/nfsd/nfs4state.c | 3 +++ >>>> fs/nfsd/pnfsd.h | 3 +++ >>>> 3 files changed, 48 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c >>>> index 9089e6b9..d8f744e 100644 >>>> --- a/fs/nfsd/nfs4pnfsd.c >>>> +++ b/fs/nfsd/nfs4pnfsd.c >>>> @@ -402,7 +402,8 @@ init_layout(struct nfs4_layout_state *ls, >>>> struct nfs4_client *clp, >>>> struct svc_fh *current_fh, >>>> struct nfsd4_layout_seg *seg, >>>> - stateid_t *stateid) >>>> + stateid_t *stateid, >>>> + bool roc) >>>> { >>>> dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__, >>>> ls, lp, clp, fp, fp->fi_inode); >>>> @@ -412,6 +413,7 @@ init_layout(struct nfs4_layout_state *ls, >>>> lp->lo_file = fp; >>>> get_layout_state(ls); >>>> lp->lo_state = ls; >>>> + lp->lo_roc = roc; >>>> memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg)); >>>> spin_lock(&layout_lock); >>>> update_layout_stateid(ls, stateid); >>>> @@ -914,7 +916,8 @@ nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp, >>>> goto out_freelayout; >>>> >>>> /* Can't merge, so let's initialize this new layout */ >>>> - init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg, &lgp->lg_sid); >>>> + init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg, &lgp->lg_sid, >>>> + res.lg_return_on_close); >>>> out_unlock: >>>> if (ls) >>>> put_layout_state(ls); >>>> @@ -1328,6 +1331,43 @@ nomatching_layout(struct nfs4_layoutrecall *clr) >>>> iput(inode); >>>> } >>>> >>>> +/* Return On Close: >>>> + * Look for all layouts of @fp that belong to @clp, if ROC is set, remove >>>> + * the layout and simulate a layout_return. Surly the client has forgotten >>>> + * these layouts or it would return them before the close. >>>> + */ >>>> +void pnfs_roc(struct nfs4_client *clp, struct nfs4_file *fp, >>>> + enum pnfs_iomode iomode) >>> >>> what's the iomode for? >>> >>> The nfs CLOSE operation is sent on the final close on the client side. >>> otherwise you'd get an OPEN_DOWNGRADE for which return on close is not >>> really defined. >> >> Ping... :) >> >> Benny >> > > Fine just drop the iomode param for now. But with out this patch I cannot > run Sure, no problem.... > > Boaz > >>> >>> Benny >>> >>>> +{ >>>> + struct nfs4_layout *lo, *nextlp; >>>> + >>>> + spin_lock(&layout_lock); >>>> + list_for_each_entry_safe (lo, nextlp, &fp->fi_layouts, lo_perfile) { >>>> + struct nfsd4_pnfs_layoutreturn lr; >>>> + bool empty; >>>> + >>>> + /* Check for a match */ >>>> + if (!lo->lo_roc || >>>> + (lo->lo_client != clp) || >>>> + ((iomode != IOMODE_ANY) && (lo->lo_seg.iomode != iomode)) >>>> + ) >>>> + continue; >>>> + >>>> + /* Return the layout */ >>>> + memset(&lr, 0, sizeof(lr)); >>>> + lr.args.lr_return_type = RETURN_FILE; >>>> + lr.args.lr_seg = lo->lo_seg; >>>> + dequeue_layout(lo); >>>> + destroy_layout(lo); /* do not access lp after this */ >>>> + >>>> + empty = list_empty(&fp->fi_layouts); >>>> + fs_layout_return(fp->fi_inode->i_sb, fp->fi_inode, &lr, >>>> + LR_FLAG_EXPIRE, >>>> + empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL); >>>> + } >>>> + spin_unlock(&layout_lock); >>>> +} >>>> + >>>> void pnfs_expire_client(struct nfs4_client *clp) >>>> { >>>> for (;;) { >>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >>>> index 66f6f48..a116e41 100644 >>>> --- a/fs/nfsd/nfs4state.c >>>> +++ b/fs/nfsd/nfs4state.c >>>> @@ -3592,6 +3592,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >>>> /* release_stateid() calls nfsd_close() if needed */ >>>> release_open_stateid(stp); >>>> >>>> + /* RFC: what to do with IOMODE */ >>>> + pnfs_roc(stp->st_stateowner->so_client, stp->st_file, IOMODE_ANY); >>>> + >>>> /* place unused nfs4_stateowners on so_close_lru list to be >>>> * released by the laundromat service after the lease period >>>> * to enable us to handle CLOSE replay >>>> diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h >>>> index c11d93d..fcb16d2 100644 >>>> --- a/fs/nfsd/pnfsd.h >>>> +++ b/fs/nfsd/pnfsd.h >>>> @@ -59,6 +59,7 @@ struct nfs4_layout { >>>> struct nfs4_client *lo_client; >>>> struct nfs4_layout_state *lo_state; >>>> struct nfsd4_layout_seg lo_seg; >>>> + bool lo_roc; >>>> }; >>>> >>>> struct pnfs_inval_state { >>>> @@ -124,6 +125,8 @@ int nfs4_pnfs_cb_change_state(struct pnfs_get_state *); >>>> void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *); >>>> int put_layoutrecall(struct nfs4_layoutrecall *); >>>> void nomatching_layout(struct nfs4_layoutrecall *); >>>> +void pnfs_roc(struct nfs4_client *clp, struct nfs4_file *fp, >>>> + enum pnfs_iomode iomode); >>>> void *layoutrecall_done(struct nfs4_layoutrecall *); >>>> void nfsd4_cb_layout(struct nfs4_layoutrecall *); >>>> int nfsd_layout_recall_cb(struct super_block *, struct inode *, >>> > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html