Return-Path: Received: from mx2.netapp.com ([216.240.18.37]:10349 "EHLO mx2.netapp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751741Ab1BPTzT convert rfc822-to-8bit (ORCPT ); Wed, 16 Feb 2011 14:55:19 -0500 Subject: Re: [PATCH pNFS wave 3 Version 2 10/18] NFSv4.1: shift pnfs_update_layout locations Content-Type: text/plain; charset=us-ascii From: Fred Isaman In-Reply-To: <4D5C28B3.5010206@panasas.com> Date: Wed, 16 Feb 2011 14:55:00 -0500 Cc: andros@netapp.com, trond.myklebust@netapp.com, linux-nfs@vger.kernel.org, Andy Adamon , Dean Hildebrand , Fred Isaman , Boaz Harrosh , Oleg Drokin Message-Id: <3CAD00F0-0B5E-4A70-B3AA-265E75100DD6@netapp.com> References: <1297759143-2045-1-git-send-email-andros@netapp.com> <1297759143-2045-11-git-send-email-andros@netapp.com> <4D5C28B3.5010206@panasas.com> To: Benny Halevy Sender: linux-nfs-owner@vger.kernel.org List-ID: MIME-Version: 1.0 On Feb 16, 2011, at 2:42 PM, Benny Halevy wrote: > On 2011-02-15 03:38, andros@netapp.com wrote: >> From: Fred Isaman >> >> Move the pnfs_update_layout call location to nfs_pageio_do_add_request(). >> Grab the lseg sent in the doio function to nfs_read_rpcsetup and attach >> it to each nfs_read_data so it can be sent to the layout driver. >> >> Signed-off-by: Andy Adamon >> Signed-off-by: Andy Adamon >> Signed-off-by: Dean Hildebrand >> Signed-off-by: Fred Isaman >> Signed-off-by: Fred Isaman >> Signed-off-by: Benny Halevy >> Signed-off-by: Boaz Harrosh >> Signed-off-by: Oleg Drokin >> Signed-off-by: Tao Guo >> --- >> fs/nfs/file.c | 4 ---- >> fs/nfs/pagelist.c | 6 ++++-- >> fs/nfs/pnfs.c | 27 ++++++++++++++++----------- >> fs/nfs/pnfs.h | 1 + >> fs/nfs/read.c | 36 ++++++++++++++++++++++++------------ >> fs/nfs/write.c | 4 ++-- >> include/linux/nfs_page.h | 4 ++-- >> include/linux/nfs_xdr.h | 1 + >> 8 files changed, 50 insertions(+), 33 deletions(-) >> >> diff --git a/fs/nfs/file.c b/fs/nfs/file.c >> index 7bf029e..d85a534 100644 >> --- a/fs/nfs/file.c >> +++ b/fs/nfs/file.c >> @@ -387,10 +387,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, >> file->f_path.dentry->d_name.name, >> mapping->host->i_ino, len, (long long) pos); >> >> - pnfs_update_layout(mapping->host, >> - nfs_file_open_context(file), >> - IOMODE_RW); >> - >> start: >> /* >> * Prevent starvation issues if someone is doing a consistency >> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c >> index 9b9a65c..b49cb4b 100644 >> --- a/fs/nfs/pagelist.c >> +++ b/fs/nfs/pagelist.c >> @@ -20,6 +20,7 @@ >> #include >> >> #include "internal.h" >> +#include "pnfs.h" >> >> static struct kmem_cache *nfs_page_cachep; >> >> @@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req) >> */ >> void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >> struct inode *inode, >> - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), >> + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *), >> size_t bsize, >> int io_flags) >> { >> @@ -315,7 +316,8 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) >> nfs_page_array_len(desc->pg_base, >> desc->pg_count), >> desc->pg_count, >> - desc->pg_ioflags); >> + desc->pg_ioflags, >> + desc->pg_lseg); >> if (error < 0) >> desc->pg_error = error; >> else >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c >> index d12f463..a09e3a0 100644 >> --- a/fs/nfs/pnfs.c >> +++ b/fs/nfs/pnfs.c >> @@ -245,7 +245,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) >> rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); >> } >> >> -static void >> +void >> put_lseg(struct pnfs_layout_segment *lseg) >> { >> struct inode *inode; >> @@ -784,7 +784,6 @@ pnfs_update_layout(struct inode *ino, >> out: >> dprintk("%s end, state 0x%lx lseg %p\n", __func__, >> nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); >> - put_lseg(lseg); /* STUB - callers currently ignore return value */ >> return lseg; >> out_unlock: >> spin_unlock(&ino->i_lock); >> @@ -858,23 +857,29 @@ out_forget_reply: >> goto out; >> } >> >> -static void >> -pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio) >> +static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, >> + struct nfs_page *prev, >> + struct nfs_page *req) >> { >> - struct pnfs_layoutdriver_type *ld; >> - >> - ld = NFS_SERVER(inode)->pnfs_curr_ld; >> - pgio->pg_test = (ld ? ld->pg_test : NULL); >> + if (pgio->pg_count == prev->wb_bytes) { >> + /* This is first coelesce call for a series of nfs_pages */ >> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, >> + prev->wb_context, >> + IOMODE_READ); >> + } >> + return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); >> } >> >> /* >> * rsize is already set by caller to MDS rsize. >> */ >> void >> -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, >> - struct inode *inode) >> +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) >> { >> - pnfs_set_pg_test(inode, pgio); >> + struct pnfs_layoutdriver_type *ld; >> + >> + ld = NFS_SERVER(inode)->pnfs_curr_ld; >> + pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; >> } >> >> /* >> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h >> index db52d96..5107d14 100644 >> --- a/fs/nfs/pnfs.h >> +++ b/fs/nfs/pnfs.h >> @@ -151,6 +151,7 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); >> >> /* pnfs.c */ >> void get_layout_hdr(struct pnfs_layout_hdr *lo); >> +void put_lseg(struct pnfs_layout_segment *lseg); >> struct pnfs_layout_segment * >> pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, >> enum pnfs_iomode access_type); >> diff --git a/fs/nfs/read.c b/fs/nfs/read.c >> index 2a27659..7896e3d 100644 >> --- a/fs/nfs/read.c >> +++ b/fs/nfs/read.c >> @@ -20,17 +20,17 @@ >> #include >> >> #include >> +#include "pnfs.h" >> >> #include "nfs4_fs.h" >> #include "internal.h" >> #include "iostat.h" >> #include "fscache.h" >> -#include "pnfs.h" >> >> #define NFSDBG_FACILITY NFSDBG_PAGECACHE >> >> -static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); >> -static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); >> +static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); >> +static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); >> static const struct rpc_call_ops nfs_read_partial_ops; >> static const struct rpc_call_ops nfs_read_full_ops; >> >> @@ -69,6 +69,7 @@ void nfs_readdata_free(struct nfs_read_data *p) >> >> static void nfs_readdata_release(struct nfs_read_data *rdata) >> { >> + put_lseg(rdata->lseg); >> put_nfs_open_context(rdata->args.context); >> nfs_readdata_free(rdata); >> } >> @@ -117,11 +118,11 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >> LIST_HEAD(one_request); >> struct nfs_page *new; >> unsigned int len; >> + struct pnfs_layout_segment *lseg; >> >> len = nfs_page_length(page); >> if (len == 0) >> return nfs_return_empty_page(page); >> - pnfs_update_layout(inode, ctx, IOMODE_READ); >> new = nfs_create_request(ctx, inode, page, 0, len); >> if (IS_ERR(new)) { >> unlock_page(page); >> @@ -131,10 +132,12 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, >> zero_user_segment(page, len, PAGE_CACHE_SIZE); >> >> nfs_list_add_request(new, &one_request); >> + lseg = pnfs_update_layout(inode, ctx, IOMODE_READ); >> if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) >> - nfs_pagein_multi(inode, &one_request, 1, len, 0); >> + nfs_pagein_multi(inode, &one_request, 1, len, 0, lseg); >> else >> - nfs_pagein_one(inode, &one_request, 1, len, 0); >> + nfs_pagein_one(inode, &one_request, 1, len, 0, lseg); >> + put_lseg(lseg); >> return 0; >> } >> >> @@ -160,7 +163,8 @@ static void nfs_readpage_release(struct nfs_page *req) >> */ >> static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, >> const struct rpc_call_ops *call_ops, >> - unsigned int count, unsigned int offset) >> + unsigned int count, unsigned int offset, >> + struct pnfs_layout_segment *lseg) >> { >> struct inode *inode = req->wb_context->path.dentry->d_inode; >> int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; >> @@ -183,6 +187,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, >> data->req = req; >> data->inode = inode; >> data->cred = msg.rpc_cred; >> + data->lseg = get_lseg(lseg); >> >> data->args.fh = NFS_FH(inode); >> data->args.offset = req_offset(req) + offset; >> @@ -240,7 +245,7 @@ nfs_async_read_error(struct list_head *head) >> * won't see the new data until our attribute cache is updated. This is more >> * or less conventional NFS client behavior. >> */ >> -static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) >> +static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg) >> { >> struct nfs_page *req = nfs_list_entry(head->next); >> struct page *page = req->wb_page; >> @@ -266,6 +271,8 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne >> } while(nbytes != 0); >> atomic_set(&req->wb_complete, requests); >> >> + /* We know lseg==NULL */ >> + lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ); >> ClearPageError(page); >> offset = 0; >> nbytes = count; >> @@ -280,12 +287,13 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne >> if (nbytes < rsize) >> rsize = nbytes; >> ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, >> - rsize, offset); >> + rsize, offset, lseg); >> if (ret == 0) >> ret = ret2; >> offset += rsize; >> nbytes -= rsize; >> } while (nbytes != 0); >> + put_lseg(lseg); >> >> return ret; >> >> @@ -300,7 +308,7 @@ out_bad: >> return -ENOMEM; >> } >> >> -static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) >> +static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg) >> { >> struct nfs_page *req; >> struct page **pages; >> @@ -320,9 +328,14 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned >> *pages++ = req->wb_page; >> } >> req = nfs_list_entry(data->pages.next); >> + if ((!lseg) && list_is_singular(&data->pages)) >> + lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ); >> >> - return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); >> + ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0, lseg); >> + put_lseg(lseg); > > Shouldn't that be done only if pnfs_update_layout was called here? > Otherwise, the caller, nfs_readpage_async puts the lseg it passes down. > You are right there is a problem. But it needs to be fixed by removing the put_lseg from nfs_readpage_async. >> + return ret; >> out_bad: >> + put_lseg(lseg); > > I'd unify the common exit path by doing nfs_async_read_error on the error path > and then goto out for the common code. > OK. Fred > Benny > >> nfs_async_read_error(head); >> return ret; >> } >> @@ -625,7 +638,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, >> if (ret == 0) >> goto read_complete; /* all pages were read */ >> >> - pnfs_update_layout(inode, desc.ctx, IOMODE_READ); >> pnfs_pageio_init_read(&pgio, inode); >> if (rsize < PAGE_CACHE_SIZE) >> nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); >> diff --git a/fs/nfs/write.c b/fs/nfs/write.c >> index 6e90cdf..aca0268 100644 >> --- a/fs/nfs/write.c >> +++ b/fs/nfs/write.c >> @@ -880,7 +880,7 @@ static void nfs_redirty_request(struct nfs_page *req) >> * Generate multiple small requests to write out a single >> * contiguous dirty area on one page. >> */ >> -static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) >> +static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg) >> { >> struct nfs_page *req = nfs_list_entry(head->next); >> struct page *page = req->wb_page; >> @@ -947,7 +947,7 @@ out_bad: >> * This is the case if nfs_updatepage detects a conflicting request >> * that has been written but not committed. >> */ >> -static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) >> +static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg) >> { >> struct nfs_page *req; >> struct page **pages; >> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h >> index 4eaf27a..ba88ff4 100644 >> --- a/include/linux/nfs_page.h >> +++ b/include/linux/nfs_page.h >> @@ -59,7 +59,7 @@ struct nfs_pageio_descriptor { >> unsigned int pg_base; >> >> struct inode *pg_inode; >> - int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int); >> + int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); >> int pg_ioflags; >> int pg_error; >> struct pnfs_layout_segment *pg_lseg; >> @@ -81,7 +81,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, >> pgoff_t idx_start, unsigned int npages, int tag); >> extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, >> struct inode *inode, >> - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), >> + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *), >> size_t bsize, >> int how); >> extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, >> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h >> index d159fe7..560923e 100644 >> --- a/include/linux/nfs_xdr.h >> +++ b/include/linux/nfs_xdr.h >> @@ -1017,6 +1017,7 @@ struct nfs_read_data { >> struct nfs_readargs args; >> struct nfs_readres res; >> unsigned long timestamp; /* For lease renewal */ >> + struct pnfs_layout_segment *lseg; >> struct page *page_array[NFS_PAGEVEC_SIZE]; >> }; >>