Return-Path: Received: from daytona.panasas.com ([67.152.220.89]:15554 "EHLO daytona.panasas.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753781Ab1E3Pae (ORCPT ); Mon, 30 May 2011 11:30:34 -0400 Message-ID: <4DE3B812.8020804@panasas.com> Date: Mon, 30 May 2011 18:30:26 +0300 From: Boaz Harrosh To: Benny Halevy , "J. Bruce Fields" , NFS list , Andy Adamson CC: Trond Myklebust Subject: [RFC] pnfsd: Return all ROC layouts On Close Content-Type: text/plain; charset=UTF-8 Sender: linux-nfs-owner@vger.kernel.org List-ID: MIME-Version: 1.0 As talk to with Trond, when a client has sent an nfs4_close, the server can surly assume that the given client has already forgotten all it's ROC given layouts. And in fact the current Linux-client goes to grate length to make sure all ROC layouts are invalidated, before sending the close. If a new open+IO comes in and tries to get a new layout, it will wait for the nfs4_close to return before sending the new layout_get. So in light of above the server should simulate layout_return and remove any layouts marked ROC on nfs4_close. Current code is protocol correct, but a more fine-grained approach can be devised to also release layouts in open_downgrade, of all unused iomodes. But to do this I'll need the help of an expert. With this patch in, I'm finally able to run with the new Linux-Client together with a Linux-Server. Before, the layouts where only released and freed on umount and in a modest machine like the UML I use, few 10s of "git checkout linux" the server would oom. Benny please submit it to the pnfs tree, so we can run with this in Bakeathone. So please all help me to verify and refine this code. Signed-off-by: Boaz Harrosh --- fs/nfsd/nfs4pnfsd.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- fs/nfsd/nfs4state.c | 3 +++ fs/nfsd/pnfsd.h | 3 +++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4pnfsd.c b/fs/nfsd/nfs4pnfsd.c index 9089e6b9..d8f744e 100644 --- a/fs/nfsd/nfs4pnfsd.c +++ b/fs/nfsd/nfs4pnfsd.c @@ -402,7 +402,8 @@ init_layout(struct nfs4_layout_state *ls, struct nfs4_client *clp, struct svc_fh *current_fh, struct nfsd4_layout_seg *seg, - stateid_t *stateid) + stateid_t *stateid, + bool roc) { dprintk("pNFS %s: ls %p lp %p clp %p fp %p ino %p\n", __func__, ls, lp, clp, fp, fp->fi_inode); @@ -412,6 +413,7 @@ init_layout(struct nfs4_layout_state *ls, lp->lo_file = fp; get_layout_state(ls); lp->lo_state = ls; + lp->lo_roc = roc; memcpy(&lp->lo_seg, seg, sizeof(lp->lo_seg)); spin_lock(&layout_lock); update_layout_stateid(ls, stateid); @@ -914,7 +916,8 @@ nfs4_pnfs_get_layout(struct nfsd4_pnfs_layoutget *lgp, goto out_freelayout; /* Can't merge, so let's initialize this new layout */ - init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg, &lgp->lg_sid); + init_layout(ls, lp, fp, clp, lgp->lg_fhp, &res.lg_seg, &lgp->lg_sid, + res.lg_return_on_close); out_unlock: if (ls) put_layout_state(ls); @@ -1328,6 +1331,43 @@ nomatching_layout(struct nfs4_layoutrecall *clr) iput(inode); } +/* Return On Close: + * Look for all layouts of @fp that belong to @clp, if ROC is set, remove + * the layout and simulate a layout_return. Surly the client has forgotten + * these layouts or it would return them before the close. + */ +void pnfs_roc(struct nfs4_client *clp, struct nfs4_file *fp, + enum pnfs_iomode iomode) +{ + struct nfs4_layout *lo, *nextlp; + + spin_lock(&layout_lock); + list_for_each_entry_safe (lo, nextlp, &fp->fi_layouts, lo_perfile) { + struct nfsd4_pnfs_layoutreturn lr; + bool empty; + + /* Check for a match */ + if (!lo->lo_roc || + (lo->lo_client != clp) || + ((iomode != IOMODE_ANY) && (lo->lo_seg.iomode != iomode)) + ) + continue; + + /* Return the layout */ + memset(&lr, 0, sizeof(lr)); + lr.args.lr_return_type = RETURN_FILE; + lr.args.lr_seg = lo->lo_seg; + dequeue_layout(lo); + destroy_layout(lo); /* do not access lp after this */ + + empty = list_empty(&fp->fi_layouts); + fs_layout_return(fp->fi_inode->i_sb, fp->fi_inode, &lr, + LR_FLAG_EXPIRE, + empty ? PNFS_LAST_LAYOUT_NO_RECALLS : NULL); + } + spin_unlock(&layout_lock); +} + void pnfs_expire_client(struct nfs4_client *clp) { for (;;) { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 66f6f48..a116e41 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3592,6 +3592,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* release_stateid() calls nfsd_close() if needed */ release_open_stateid(stp); + /* RFC: what to do with IOMODE */ + pnfs_roc(stp->st_stateowner->so_client, stp->st_file, IOMODE_ANY); + /* place unused nfs4_stateowners on so_close_lru list to be * released by the laundromat service after the lease period * to enable us to handle CLOSE replay diff --git a/fs/nfsd/pnfsd.h b/fs/nfsd/pnfsd.h index c11d93d..fcb16d2 100644 --- a/fs/nfsd/pnfsd.h +++ b/fs/nfsd/pnfsd.h @@ -59,6 +59,7 @@ struct nfs4_layout { struct nfs4_client *lo_client; struct nfs4_layout_state *lo_state; struct nfsd4_layout_seg lo_seg; + bool lo_roc; }; struct pnfs_inval_state { @@ -124,6 +125,8 @@ int nfs4_pnfs_cb_change_state(struct pnfs_get_state *); void nfs4_ds_get_verifier(stateid_t *, struct super_block *, u32 *); int put_layoutrecall(struct nfs4_layoutrecall *); void nomatching_layout(struct nfs4_layoutrecall *); +void pnfs_roc(struct nfs4_client *clp, struct nfs4_file *fp, + enum pnfs_iomode iomode); void *layoutrecall_done(struct nfs4_layoutrecall *); void nfsd4_cb_layout(struct nfs4_layoutrecall *); int nfsd_layout_recall_cb(struct super_block *, struct inode *, -- 1.7.2.3