The following cleanups apply on top of the nfs-for-next branch, and is
designed to allow the objects and blocks code to use different read/write
block sizes than the write-through-MDS.
It also pushes a lot of the pNFS code back down into pnfs.c where it
belongs...
Cheers
Trond
Trond Myklebust (8):
NFS: Clean up nfs_read_rpcsetup and nfs_write_rpcsetup
NFS: Clean up: split out the RPC transmission from
nfs_pagein_multi/one
NFS: Cache rpc_ops in struct nfs_pageio_descriptor
NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write
request
NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is
needed
NFS: Move the pnfs read code into pnfs.c
NFS: Move the pnfs write code into pnfs.c
NFS: Clean up - simplify the switch to read/write-through-MDS
fs/nfs/internal.h | 12 +++--
fs/nfs/nfs4filelayout.c | 9 ++--
fs/nfs/objlayout/objio_osd.c | 4 +-
fs/nfs/pagelist.c | 57 +++++++++++++++++++-
fs/nfs/pnfs.c | 118 ++++++++++++++++++++++++++++++++++++++--
fs/nfs/pnfs.h | 20 +------
fs/nfs/read.c | 121 ++++++++++++++++++++++--------------------
fs/nfs/write.c | 119 +++++++++++++++++++++++------------------
include/linux/nfs_page.h | 8 +--
include/linux/nfs_xdr.h | 2 +
10 files changed, 317 insertions(+), 153 deletions(-)
--
1.7.6
Instead of looking up the rsize and wsize, the routines that generate the
RPC requests should really be using the pg_bsize, since that is what we
use when deciding whether or not to coalesce write requests...
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 2 +-
fs/nfs/write.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b2e9aad..47f92c1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -303,7 +303,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
- size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
+ size_t rsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8fa7def..80bd74f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -946,7 +946,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
- size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
+ size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
--
1.7.6
On 2011-07-12 22:29, Trond Myklebust wrote:
> If an attempt to do pNFS fails, and we have to fall back to writing through
> the MDS, then we may want to re-coalesce the requests that we already have
> since the block size for the MDS read/writes may be different to that of
> the DS read/writes.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/pagelist.c | 57 +++++++++++++++++++++++++++++++++++++++++++--
> include/linux/nfs_page.h | 3 +-
> 2 files changed, 56 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index d421e19..7139dbf 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -240,6 +240,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> desc->pg_bsize = bsize;
> desc->pg_base = 0;
> desc->pg_moreio = 0;
> + desc->pg_recoalesce = 0;
> desc->pg_inode = inode;
> desc->pg_ops = pg_ops;
> desc->pg_ioflags = io_flags;
> @@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
> * Returns true if the request 'req' was successfully coalesced into the
> * existing list of pages 'desc'.
> */
> -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> +static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> struct nfs_page *req)
> {
> while (!nfs_pageio_do_add_request(desc, req)) {
> @@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> if (desc->pg_error < 0)
> return 0;
> desc->pg_moreio = 0;
> + if (desc->pg_recoalesce)
> + return 0;
> }
> return 1;
> }
>
> +static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
> +{
> + LIST_HEAD(head);
> +
> + do {
> + list_splice_init(&desc->pg_list, &head);
> + desc->pg_bytes_written -= desc->pg_count;
> + desc->pg_count = 0;
> + desc->pg_base = 0;
> + desc->pg_recoalesce = 0;
> +
> + while (!list_empty(&head)) {
> + struct nfs_page *req;
> +
> + req = list_first_entry(&head, struct nfs_page, wb_list);
> + nfs_list_remove_request(req);
> + if (__nfs_pageio_add_request(desc, req))
> + continue;
> + if (desc->pg_error < 0)
> + return 0;
> + break;
> + }
> + } while (desc->pg_recoalesce);
> + return 1;
> +}
> +
> +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> + struct nfs_page *req)
> +{
> + int ret;
> +
> + do {
> + ret = __nfs_pageio_add_request(desc, req);
> + if (ret)
> + break;
> + if (desc->pg_error < 0)
> + break;
> + ret = nfs_do_recoalesce(desc);
> + } while (ret);
> + return ret;
> +}
> +
> /**
> * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
> * @desc: pointer to io descriptor
> */
> void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
> {
> - nfs_pageio_doio(desc);
> + for (;;) {
> + nfs_pageio_doio(desc);
> + if (!desc->pg_recoalesce)
> + break;
> + if (!nfs_do_recoalesce(desc))
> + break;
> + }
nit: how about the following?
do
nfs_pageio_doio(desc);
while (desc->pg_recoalesce && nfs_do_recoalesce(desc));
Benny
> }
>
> /**
> @@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
> if (!list_empty(&desc->pg_list)) {
> struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
> if (index != prev->wb_index + 1)
> - nfs_pageio_doio(desc);
> + nfs_pageio_complete(desc);
> }
> }
>
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index db3194f..7241b2a 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -68,7 +68,8 @@ struct nfs_pageio_descriptor {
> size_t pg_count;
> size_t pg_bsize;
> unsigned int pg_base;
> - char pg_moreio;
> + unsigned char pg_moreio : 1,
> + pg_recoalesce : 1;
>
> struct inode *pg_inode;
> const struct nfs_pageio_ops *pg_ops;
...and ensure that we recoalese to take into account differences in
block sizes when falling back to read through the MDS.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 4 +++
fs/nfs/nfs4filelayout.c | 2 +-
fs/nfs/objlayout/objio_osd.c | 2 +-
fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++-
fs/nfs/pnfs.h | 10 +------
fs/nfs/read.c | 46 ++++++++++++++-------------------
include/linux/nfs_page.h | 1 -
7 files changed, 82 insertions(+), 40 deletions(-)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31e8b50..795b3e0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -295,10 +295,14 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
struct nfs_pageio_descriptor;
extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode);
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index af9bf9e..fc556d6 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -735,7 +735,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
.pg_init = filelayout_pg_init_read,
.pg_test = filelayout_pg_test,
- .pg_doio = nfs_generic_pg_readpages,
+ .pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops filelayout_pg_write_ops = {
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 70272d5..add6289 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
static const struct nfs_pageio_ops objio_pg_read_ops = {
.pg_init = pnfs_generic_pg_init_read,
.pg_test = objio_pg_test,
- .pg_doio = nfs_generic_pg_readpages,
+ .pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops objio_pg_write_ops = {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5b3cc3f..9eca5a8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -28,6 +28,7 @@
*/
#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
@@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_read_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_read_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_readdata_release(data);
+}
+
/*
* Call the appropriate parallel I/O subsystem read function.
*/
-enum pnfs_try_status
+static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *rdata,
- const struct rpc_call_ops *call_ops)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
{
struct inode *inode = rdata->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
rdata->mds_ops = call_ops;
+ rdata->lseg = get_lseg(lseg);
dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1243,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
return trypnfs;
}
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ struct nfs_read_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_read_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_read_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_pagein(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_reads(desc, &head);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
/*
* Currently there is only one (whole file) write lseg.
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a59736e..c40ffa5 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -157,9 +157,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
const struct rpc_call_ops *, int);
-enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
- const struct rpc_call_ops *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
@@ -330,13 +329,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
}
static inline enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops)
-{
- return PNFS_NOT_ATTEMPTED;
-}
-
-static inline enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops, int how)
{
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 47f92c1..3745eed 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -67,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
mempool_free(p, nfs_rdata_mempool);
}
-static void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_read_data *rdata)
{
put_lseg(rdata->lseg);
put_nfs_open_context(rdata->args.context);
@@ -120,6 +120,12 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);
+void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
+{
+ pgio->pg_ops = &nfs_pageio_read_ops;
+ pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
+}
+
static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
@@ -235,26 +241,16 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
}
static int nfs_do_read(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg)
+ const struct rpc_call_ops *call_ops)
{
struct inode *inode = data->args.context->path.dentry->d_inode;
- if (lseg) {
- data->lseg = get_lseg(lseg);
- if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
- return 0;
- put_lseg(data->lseg);
- data->lseg = NULL;
- }
-
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
static int
nfs_do_multiple_reads(struct list_head *head,
- const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg)
+ const struct rpc_call_ops *call_ops)
{
struct nfs_read_data *data;
int ret = 0;
@@ -265,7 +261,7 @@ nfs_do_multiple_reads(struct list_head *head,
data = list_entry(head->next, struct nfs_read_data, list);
list_del_init(&data->list);
- ret2 = nfs_do_read(data, call_ops, lseg);
+ ret2 = nfs_do_read(data, call_ops);
if (ret == 0)
ret = ret2;
}
@@ -372,25 +368,23 @@ out:
return ret;
}
-int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_pagein_multi(desc, head);
+ return nfs_pagein_one(desc, head);
+}
+
+static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- ret = nfs_pagein_multi(desc, &head);
- else
- ret = nfs_pagein_one(desc, &head);
-
+ ret = nfs_generic_pagein(desc, &head);
if (ret == 0)
- ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops,
- desc->pg_lseg);
- put_lseg(desc->pg_lseg);
- desc->pg_lseg = NULL;
+ ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
-
static const struct nfs_pageio_ops nfs_pageio_read_ops = {
.pg_test = nfs_generic_pg_test,
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 7241b2a..0a48f84 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -108,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);
-extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
--
1.7.6
...and do the same for nfs_flush_multi/one.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 92 ++++++++++++++++++++++++++++++-------------------------
fs/nfs/write.c | 93 +++++++++++++++++++++++++++++++------------------------
2 files changed, 102 insertions(+), 83 deletions(-)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 248a554..d30d6de 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,8 +30,6 @@
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
static const struct nfs_pageio_ops nfs_pageio_read_ops;
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
@@ -253,6 +251,27 @@ static int nfs_do_read(struct nfs_read_data *data,
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
+static int
+nfs_do_multiple_reads(struct list_head *head,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
+{
+ struct nfs_read_data *data;
+ int ret = 0;
+
+ while (!list_empty(head)) {
+ int ret2;
+
+ data = list_entry(head->next, struct nfs_read_data, list);
+ list_del_init(&data->list);
+
+ ret2 = nfs_do_read(data, call_ops, lseg);
+ if (ret == 0)
+ ret = ret2;
+ }
+ return ret;
+}
+
static void
nfs_async_read_error(struct list_head *head)
{
@@ -279,7 +298,7 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
@@ -288,11 +307,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
- LIST_HEAD(list);
nfs_list_remove_request(req);
+ offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);
@@ -300,57 +318,33 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->list, &list);
+ data->pagevec[0] = page;
+ nfs_read_rpcsetup(req, data, len, offset);
+ list_add(&data->list, res);
requests++;
nbytes -= len;
+ offset += len;
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
-
ClearPageError(page);
- offset = 0;
- nbytes = desc->pg_count;
- do {
- int ret2;
-
- data = list_entry(list.next, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- data->pagevec[0] = page;
-
- if (nbytes < rsize)
- rsize = nbytes;
- nfs_read_rpcsetup(req, data, rsize, offset);
- ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg);
- if (ret == 0)
- ret = ret2;
- offset += rsize;
- nbytes -= rsize;
- } while (nbytes != 0);
- put_lseg(lseg);
- desc->pg_lseg = NULL;
-
return ret;
-
out_bad:
- while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_read_data, list);
+ while (!list_empty(res)) {
+ data = list_entry(res->next, struct nfs_read_data, list);
list_del(&data->list);
nfs_readdata_free(data);
}
SetPageError(page);
nfs_readpage_release(req);
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return -ENOMEM;
}
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret = -ENOMEM;
data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
@@ -371,18 +365,32 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
req = nfs_list_entry(data->pages.next);
nfs_read_rpcsetup(req, data, desc->pg_count, 0);
- ret = nfs_do_read(data, &nfs_read_full_ops, lseg);
+ list_add(&data->list, res);
out:
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return ret;
}
int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_pagein_multi(desc);
- return nfs_pagein_one(desc);
+ LIST_HEAD(head);
+ int ret;
+
+ if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ ret = nfs_pagein_multi(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_reads(&head,
+ &nfs_read_partial_ops,
+ desc->pg_lseg);
+ } else {
+ ret = nfs_pagein_one(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_reads(&head,
+ &nfs_read_full_ops,
+ desc->pg_lseg);
+ }
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0aeb09b..89cc687 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -903,6 +903,27 @@ static int nfs_do_write(struct nfs_write_data *data,
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
+static int nfs_do_multiple_writes(struct list_head *head,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
+{
+ struct nfs_write_data *data;
+ int ret = 0;
+
+ while (!list_empty(head)) {
+ int ret2;
+
+ data = list_entry(head->next, struct nfs_write_data, list);
+ list_del_init(&data->list);
+
+ ret2 = nfs_do_write(data, call_ops, lseg, how);
+ if (ret == 0)
+ ret = ret2;
+ }
+ return ret;
+}
+
/* If a nfs_flush_* function fails, it should remove reqs from @head and
* call this on each, which will prepare them to be retried on next
* writeback using standard nfs.
@@ -920,7 +941,7 @@ static void nfs_redirty_request(struct nfs_page *req)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
@@ -929,8 +950,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
- LIST_HEAD(list);
nfs_list_remove_request(req);
@@ -940,6 +959,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+ offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);
@@ -947,47 +967,23 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->list, &list);
+ data->pagevec[0] = page;
+ nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+ list_add(&data->list, res);
requests++;
nbytes -= len;
+ offset += len;
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
-
- ClearPageError(page);
- offset = 0;
- nbytes = desc->pg_count;
- do {
- int ret2;
-
- data = list_entry(list.next, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- data->pagevec[0] = page;
-
- if (nbytes < wsize)
- wsize = nbytes;
- nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
- ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg,
- desc->pg_ioflags);
- if (ret == 0)
- ret = ret2;
- offset += wsize;
- nbytes -= wsize;
- } while (nbytes != 0);
-
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return ret;
out_bad:
- while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_write_data, list);
+ while (!list_empty(res)) {
+ data = list_entry(res->next, struct nfs_write_data, list);
list_del(&data->list);
nfs_writedata_free(data);
}
nfs_redirty_request(req);
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return -ENOMEM;
}
@@ -999,13 +995,12 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret;
data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
@@ -1035,18 +1030,34 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
/* Set up the argument struct */
nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
- ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags);
+ list_add(&data->list, res);
out:
- put_lseg(lseg); /* Cleans any gotten in ->pg_test */
- desc->pg_lseg = NULL;
return ret;
}
int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(desc);
- return nfs_flush_one(desc);
+ LIST_HEAD(head);
+ int ret;
+
+ if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ ret = nfs_flush_multi(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_writes(&head,
+ &nfs_write_partial_ops,
+ desc->pg_lseg,
+ desc->pg_ioflags);
+ } else {
+ ret = nfs_flush_one(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_writes(&head,
+ &nfs_write_full_ops,
+ desc->pg_lseg,
+ desc->pg_ioflags);
+ }
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);
--
1.7.6
Split them up into two parts: one which sets up the struct nfs_read/write_data,
the other which sets up the actual RPC call or pNFS call.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 42 +++++++++++++++++++++++++-----------------
fs/nfs/write.c | 41 ++++++++++++++++++++++++++---------------
include/linux/nfs_xdr.h | 2 ++
3 files changed, 53 insertions(+), 32 deletions(-)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c394662..248a554 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -213,17 +213,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
/*
* Set up the NFS read request struct
*/
-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops,
- unsigned int count, unsigned int offset,
- struct pnfs_layout_segment *lseg)
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ unsigned int count, unsigned int offset)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
data->req = req;
data->inode = inode;
data->cred = req->wb_context->cred;
- data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -237,10 +234,21 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.count = count;
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
+}
- if (data->lseg &&
- (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
- return 0;
+static int nfs_do_read(struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
+{
+ struct inode *inode = data->args.context->path.dentry->d_inode;
+
+ if (lseg) {
+ data->lseg = get_lseg(lseg);
+ if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
+ return 0;
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ }
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
@@ -292,7 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->pages, &list);
+ list_add(&data->list, &list);
requests++;
nbytes -= len;
} while(nbytes != 0);
@@ -304,15 +312,15 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
do {
int ret2;
- data = list_entry(list.next, struct nfs_read_data, pages);
- list_del_init(&data->pages);
+ data = list_entry(list.next, struct nfs_read_data, list);
+ list_del_init(&data->list);
data->pagevec[0] = page;
if (nbytes < rsize)
rsize = nbytes;
- ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
- rsize, offset, lseg);
+ nfs_read_rpcsetup(req, data, rsize, offset);
+ ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg);
if (ret == 0)
ret = ret2;
offset += rsize;
@@ -325,8 +333,8 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
out_bad:
while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_read_data, pages);
- list_del(&data->pages);
+ data = list_entry(list.next, struct nfs_read_data, list);
+ list_del(&data->list);
nfs_readdata_free(data);
}
SetPageError(page);
@@ -362,8 +370,8 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
}
req = nfs_list_entry(data->pages.next);
- ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
- 0, lseg);
+ nfs_read_rpcsetup(req, data, desc->pg_count, 0);
+ ret = nfs_do_read(data, &nfs_read_full_ops, lseg);
out:
put_lseg(lseg);
desc->pg_lseg = NULL;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1af4d82..0aeb09b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
/*
* Set up the argument/result storage required for the RPC call.
*/
-static int nfs_write_rpcsetup(struct nfs_page *req,
+static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
- struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
@@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->req = req;
data->inode = inode = req->wb_context->path.dentry->d_inode;
data->cred = req->wb_context->cred;
- data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -886,10 +883,22 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->res.count = count;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+}
- if (data->lseg &&
- (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
- return 0;
+static int nfs_do_write(struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
+{
+ struct inode *inode = data->args.context->path.dentry->d_inode;
+
+ if (lseg != NULL) {
+ data->lseg = get_lseg(lseg);
+ if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)
+ return 0;
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ }
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
@@ -938,7 +947,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->pages, &list);
+ list_add(&data->list, &list);
requests++;
nbytes -= len;
} while (nbytes != 0);
@@ -950,15 +959,16 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
do {
int ret2;
- data = list_entry(list.next, struct nfs_write_data, pages);
- list_del_init(&data->pages);
+ data = list_entry(list.next, struct nfs_write_data, list);
+ list_del_init(&data->list);
data->pagevec[0] = page;
if (nbytes < wsize)
wsize = nbytes;
- ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
- wsize, offset, lseg, desc->pg_ioflags);
+ nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+ ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg,
+ desc->pg_ioflags);
if (ret == 0)
ret = ret2;
offset += wsize;
@@ -971,8 +981,8 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
out_bad:
while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_write_data, pages);
- list_del(&data->pages);
+ data = list_entry(list.next, struct nfs_write_data, list);
+ list_del(&data->list);
nfs_writedata_free(data);
}
nfs_redirty_request(req);
@@ -1024,7 +1034,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */
- ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
+ nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
+ ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags);
out:
put_lseg(lseg); /* Cleans any gotten in ->pg_test */
desc->pg_lseg = NULL;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 956d357..5b11595 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1126,6 +1126,7 @@ struct nfs_read_data {
struct rpc_cred *cred;
struct nfs_fattr fattr; /* fattr storage */
struct list_head pages; /* Coalesced read requests */
+ struct list_head list; /* lists of struct nfs_read_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
@@ -1149,6 +1150,7 @@ struct nfs_write_data {
struct nfs_fattr fattr;
struct nfs_writeverf verf;
struct list_head pages; /* Coalesced requests we wish to flush */
+ struct list_head list; /* lists of struct nfs_write_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
--
1.7.6
Use nfs_pageio_reset_read_mds and nfs_pageio_reset_write_mds instead of
completely reinitialising the struct nfs_pageio_descriptor.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 4 ----
fs/nfs/nfs4filelayout.c | 5 ++---
fs/nfs/pnfs.c | 4 ++--
fs/nfs/read.c | 4 ++--
fs/nfs/write.c | 4 ++--
5 files changed, 8 insertions(+), 13 deletions(-)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 758db89..38b1ffc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -299,16 +299,12 @@ extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
struct list_head *head);
struct nfs_pageio_descriptor;
-extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct list_head *head);
-extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fbc5b42..f0b37e1 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -711,7 +711,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
GFP_KERNEL);
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_read_mds(pgio, pgio->pg_inode);
+ nfs_pageio_reset_read_mds(pgio);
}
void
@@ -728,8 +728,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_write_mds(pgio, pgio->pg_inode,
- pgio->pg_ioflags);
+ nfs_pageio_reset_write_mds(pgio);
}
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 93c7329..38e5508 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1075,7 +1075,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
GFP_KERNEL);
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_read_mds(pgio, pgio->pg_inode);
+ nfs_pageio_reset_read_mds(pgio);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
@@ -1093,7 +1093,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags);
+ nfs_pageio_reset_write_mds(pgio);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3745eed..1472933 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -112,19 +112,19 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}
-void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
NFS_SERVER(inode)->rsize, 0);
}
-EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pageio_read_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e18d842..525fb02 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1051,19 +1051,19 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_doio = nfs_generic_pg_writepages,
};
-void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
NFS_SERVER(inode)->wsize, ioflags);
}
-EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds);
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pageio_write_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
--
1.7.6
> -----Original Message-----
> From: Benny Halevy [mailto:[email protected]]
> Sent: Wednesday, July 13, 2011 9:41 AM
> To: Myklebust, Trond
> Cc: [email protected]
> Subject: Re: [PATCH 6/8] NFS: Move the pnfs read code into pnfs.c
>
> On 2011-07-12 22:29, Trond Myklebust wrote:
> > ...and ensure that we recoalese to take into account differences in
> > block sizes when falling back to read through the MDS.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > fs/nfs/internal.h | 4 +++
> > fs/nfs/nfs4filelayout.c | 2 +-
> > fs/nfs/objlayout/objio_osd.c | 2 +-
> > fs/nfs/pnfs.c | 57
> ++++++++++++++++++++++++++++++++++++++++-
> > fs/nfs/pnfs.h | 10 +------
> > fs/nfs/read.c | 46
++++++++++++++------------------
> -
> > include/linux/nfs_page.h | 1 -
> > 7 files changed, 82 insertions(+), 40 deletions(-)
> >
> > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> > index 31e8b50..795b3e0 100644
> > --- a/fs/nfs/internal.h
> > +++ b/fs/nfs/internal.h
> > @@ -295,10 +295,14 @@ extern int nfs4_get_rootfh(struct nfs_server
> *server, struct nfs_fh *mntfh);
> > extern int nfs_initiate_read(struct nfs_read_data *data, struct
> rpc_clnt *clnt,
> > const struct rpc_call_ops *call_ops);
> > extern void nfs_read_prepare(struct rpc_task *task, void
*calldata);
> > +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
> > + struct list_head *head);
> >
> > struct nfs_pageio_descriptor;
> > extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor
> *pgio,
> > struct inode *inode);
> > +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor
> *pgio);
> > +extern void nfs_readdata_release(struct nfs_read_data *rdata);
> >
> > /* write.c */
> > extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor
> *pgio,
> > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> > index af9bf9e..fc556d6 100644
> > --- a/fs/nfs/nfs4filelayout.c
> > +++ b/fs/nfs/nfs4filelayout.c
> > @@ -735,7 +735,7 @@ filelayout_pg_init_write(struct
> nfs_pageio_descriptor *pgio,
> > static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> > .pg_init = filelayout_pg_init_read,
> > .pg_test = filelayout_pg_test,
> > - .pg_doio = nfs_generic_pg_readpages,
> > + .pg_doio = pnfs_generic_pg_readpages,
> > };
> >
> > static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> > diff --git a/fs/nfs/objlayout/objio_osd.c
> b/fs/nfs/objlayout/objio_osd.c
> > index 70272d5..add6289 100644
> > --- a/fs/nfs/objlayout/objio_osd.c
> > +++ b/fs/nfs/objlayout/objio_osd.c
> > @@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct
> nfs_pageio_descriptor *pgio,
> > static const struct nfs_pageio_ops objio_pg_read_ops = {
> > .pg_init = pnfs_generic_pg_init_read,
> > .pg_test = objio_pg_test,
> > - .pg_doio = nfs_generic_pg_readpages,
> > + .pg_doio = pnfs_generic_pg_readpages,
> > };
> >
> > static const struct nfs_pageio_ops objio_pg_write_ops = {
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 5b3cc3f..9eca5a8 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -28,6 +28,7 @@
> > */
> >
> > #include <linux/nfs_fs.h>
> > +#include <linux/nfs_page.h>
> > #include "internal.h"
> > #include "pnfs.h"
> > #include "iostat.h"
> > @@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data
*data)
> > }
> > EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
> >
> > +static void
> > +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
> > + struct nfs_read_data *data)
> > +{
> > + list_splice_tail_init(&data->pages, &desc->pg_list);
> > + if (data->req && list_empty(&data->req->wb_list))
> > + nfs_list_add_request(data->req, &desc->pg_list);
> > + nfs_pageio_reset_read_mds(desc);
> > + desc->pg_recoalesce = 1;
> > + nfs_readdata_release(data);
>
> I'm confused...
> Isn't this function supposed to call the nfs read path?
>
No. The reason for doing the above is to avoid recursing back into the
read/write code when we're already deep down in the stack.
Instead, we put the requests back on the pg_list, set the pg_recoalesce
flag, and then allow nfs_do_recoalesce() (which runs much further back
up the stack) to do it's job.
Cheers
Trond
On 2011-07-12 22:29, Trond Myklebust wrote:
> ...and ensure that we recoalese to take into account differences in
> block sizes when falling back to read through the MDS.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/internal.h | 4 +++
> fs/nfs/nfs4filelayout.c | 2 +-
> fs/nfs/objlayout/objio_osd.c | 2 +-
> fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++-
> fs/nfs/pnfs.h | 10 +------
> fs/nfs/read.c | 46 ++++++++++++++-------------------
> include/linux/nfs_page.h | 1 -
> 7 files changed, 82 insertions(+), 40 deletions(-)
>
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 31e8b50..795b3e0 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -295,10 +295,14 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
> extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
> const struct rpc_call_ops *call_ops);
> extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
> +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
> + struct list_head *head);
>
> struct nfs_pageio_descriptor;
> extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
> struct inode *inode);
> +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
> +extern void nfs_readdata_release(struct nfs_read_data *rdata);
>
> /* write.c */
> extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index af9bf9e..fc556d6 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -735,7 +735,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
> static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> .pg_init = filelayout_pg_init_read,
> .pg_test = filelayout_pg_test,
> - .pg_doio = nfs_generic_pg_readpages,
> + .pg_doio = pnfs_generic_pg_readpages,
> };
>
> static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> index 70272d5..add6289 100644
> --- a/fs/nfs/objlayout/objio_osd.c
> +++ b/fs/nfs/objlayout/objio_osd.c
> @@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> static const struct nfs_pageio_ops objio_pg_read_ops = {
> .pg_init = pnfs_generic_pg_init_read,
> .pg_test = objio_pg_test,
> - .pg_doio = nfs_generic_pg_readpages,
> + .pg_doio = pnfs_generic_pg_readpages,
> };
>
> static const struct nfs_pageio_ops objio_pg_write_ops = {
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 5b3cc3f..9eca5a8 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -28,6 +28,7 @@
> */
>
> #include <linux/nfs_fs.h>
> +#include <linux/nfs_page.h>
> #include "internal.h"
> #include "pnfs.h"
> #include "iostat.h"
> @@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
> }
> EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
>
> +static void
> +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
> + struct nfs_read_data *data)
> +{
> + list_splice_tail_init(&data->pages, &desc->pg_list);
> + if (data->req && list_empty(&data->req->wb_list))
> + nfs_list_add_request(data->req, &desc->pg_list);
> + nfs_pageio_reset_read_mds(desc);
> + desc->pg_recoalesce = 1;
> + nfs_readdata_release(data);
I'm confused...
Isn't this function supposed to call the nfs read path?
Benny
> +}
> +
> /*
> * Call the appropriate parallel I/O subsystem read function.
> */
> -enum pnfs_try_status
> +static enum pnfs_try_status
> pnfs_try_to_read_data(struct nfs_read_data *rdata,
> - const struct rpc_call_ops *call_ops)
> + const struct rpc_call_ops *call_ops,
> + struct pnfs_layout_segment *lseg)
> {
> struct inode *inode = rdata->inode;
> struct nfs_server *nfss = NFS_SERVER(inode);
> enum pnfs_try_status trypnfs;
>
> rdata->mds_ops = call_ops;
> + rdata->lseg = get_lseg(lseg);
>
> dprintk("%s: Reading ino:%lu %u@%llu\n",
> __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
> @@ -1243,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
> return trypnfs;
> }
>
> +static void
> +pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
> +{
> + struct nfs_read_data *data;
> + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
> + struct pnfs_layout_segment *lseg = desc->pg_lseg;
> +
> + desc->pg_lseg = NULL;
> + while (!list_empty(head)) {
> + enum pnfs_try_status trypnfs;
> +
> + data = list_entry(head->next, struct nfs_read_data, list);
> + list_del_init(&data->list);
> +
> + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
> + if (trypnfs == PNFS_NOT_ATTEMPTED)
> + pnfs_read_through_mds(desc, data);
> + }
> + put_lseg(lseg);
> +}
> +
> +int
> +pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> +{
> + LIST_HEAD(head);
> + int ret;
> +
> + ret = nfs_generic_pagein(desc, &head);
> + if (ret != 0) {
> + put_lseg(desc->pg_lseg);
> + desc->pg_lseg = NULL;
> + return ret;
> + }
> + pnfs_do_multiple_reads(desc, &head);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
> +
> /*
> * Currently there is only one (whole file) write lseg.
> */
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index a59736e..c40ffa5 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -157,9 +157,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
> void unset_pnfs_layoutdriver(struct nfs_server *);
> enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
> const struct rpc_call_ops *, int);
> -enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
> - const struct rpc_call_ops *);
> void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
> +int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
> bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
> int pnfs_layout_process(struct nfs4_layoutget *lgp);
> @@ -330,13 +329,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
> }
>
> static inline enum pnfs_try_status
> -pnfs_try_to_read_data(struct nfs_read_data *data,
> - const struct rpc_call_ops *call_ops)
> -{
> - return PNFS_NOT_ATTEMPTED;
> -}
> -
> -static inline enum pnfs_try_status
> pnfs_try_to_write_data(struct nfs_write_data *data,
> const struct rpc_call_ops *call_ops, int how)
> {
> diff --git a/fs/nfs/read.c b/fs/nfs/read.c
> index 47f92c1..3745eed 100644
> --- a/fs/nfs/read.c
> +++ b/fs/nfs/read.c
> @@ -67,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
> mempool_free(p, nfs_rdata_mempool);
> }
>
> -static void nfs_readdata_release(struct nfs_read_data *rdata)
> +void nfs_readdata_release(struct nfs_read_data *rdata)
> {
> put_lseg(rdata->lseg);
> put_nfs_open_context(rdata->args.context);
> @@ -120,6 +120,12 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
> }
> EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);
>
> +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
> +{
> + pgio->pg_ops = &nfs_pageio_read_ops;
> + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
> +}
> +
> static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
> struct inode *inode)
> {
> @@ -235,26 +241,16 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
> }
>
> static int nfs_do_read(struct nfs_read_data *data,
> - const struct rpc_call_ops *call_ops,
> - struct pnfs_layout_segment *lseg)
> + const struct rpc_call_ops *call_ops)
> {
> struct inode *inode = data->args.context->path.dentry->d_inode;
>
> - if (lseg) {
> - data->lseg = get_lseg(lseg);
> - if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
> - return 0;
> - put_lseg(data->lseg);
> - data->lseg = NULL;
> - }
> -
> return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
> }
>
> static int
> nfs_do_multiple_reads(struct list_head *head,
> - const struct rpc_call_ops *call_ops,
> - struct pnfs_layout_segment *lseg)
> + const struct rpc_call_ops *call_ops)
> {
> struct nfs_read_data *data;
> int ret = 0;
> @@ -265,7 +261,7 @@ nfs_do_multiple_reads(struct list_head *head,
> data = list_entry(head->next, struct nfs_read_data, list);
> list_del_init(&data->list);
>
> - ret2 = nfs_do_read(data, call_ops, lseg);
> + ret2 = nfs_do_read(data, call_ops);
> if (ret == 0)
> ret = ret2;
> }
> @@ -372,25 +368,23 @@ out:
> return ret;
> }
>
> -int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
> +{
> + if (desc->pg_bsize < PAGE_CACHE_SIZE)
> + return nfs_pagein_multi(desc, head);
> + return nfs_pagein_one(desc, head);
> +}
> +
> +static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> {
> LIST_HEAD(head);
> int ret;
>
> - if (desc->pg_bsize < PAGE_CACHE_SIZE)
> - ret = nfs_pagein_multi(desc, &head);
> - else
> - ret = nfs_pagein_one(desc, &head);
> -
> + ret = nfs_generic_pagein(desc, &head);
> if (ret == 0)
> - ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops,
> - desc->pg_lseg);
> - put_lseg(desc->pg_lseg);
> - desc->pg_lseg = NULL;
> + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
> return ret;
> }
> -EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
> -
>
> static const struct nfs_pageio_ops nfs_pageio_read_ops = {
> .pg_test = nfs_generic_pg_test,
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 7241b2a..0a48f84 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -108,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
> extern int nfs_set_page_tag_locked(struct nfs_page *req);
> extern void nfs_clear_page_tag_locked(struct nfs_page *req);
>
> -extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
>
>
...and ensure that we recoalese to take into account differences in
differences in block sizes when falling back to write through the MDS.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 4 +++
fs/nfs/nfs4filelayout.c | 2 +-
fs/nfs/objlayout/objio_osd.c | 2 +-
fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++-
fs/nfs/pnfs.h | 10 +------
fs/nfs/write.c | 39 +++++++++++++---------------
include/linux/nfs_page.h | 3 --
7 files changed, 80 insertions(+), 37 deletions(-)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 795b3e0..758db89 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -305,8 +305,12 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fc556d6..fbc5b42 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -741,7 +741,7 @@ static const struct nfs_pageio_ops filelayout_pg_read_ops = {
static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_init = filelayout_pg_init_write,
.pg_test = filelayout_pg_test,
- .pg_doio = nfs_generic_pg_writepages,
+ .pg_doio = pnfs_generic_pg_writepages,
};
static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index add6289..7d49bb1 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1013,7 +1013,7 @@ static const struct nfs_pageio_ops objio_pg_read_ops = {
static const struct nfs_pageio_ops objio_pg_write_ops = {
.pg_init = pnfs_generic_pg_init_write,
.pg_test = objio_pg_test,
- .pg_doio = nfs_generic_pg_writepages,
+ .pg_doio = pnfs_generic_pg_writepages,
};
static struct pnfs_layoutdriver_type objlayout_type = {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 9eca5a8..93c7329 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1170,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
-enum pnfs_try_status
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_write_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_write_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_writedata_release(data);
+}
+
+static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *wdata,
- const struct rpc_call_ops *call_ops, int how)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
{
struct inode *inode = wdata->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
wdata->mds_ops = call_ops;
+ wdata->lseg = get_lseg(lseg);
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1194,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
return trypnfs;
}
+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+ struct nfs_write_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_write_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_write_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_flush(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
/*
* Called by non rpc-based layout drivers
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c40ffa5..078670d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -155,11 +155,10 @@ bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int)
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
-enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
- const struct rpc_call_ops *, int);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -328,13 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
{
}
-static inline enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops, int how)
-{
- return PNFS_NOT_ATTEMPTED;
-}
-
static inline int pnfs_return_layout(struct inode *ino)
{
return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80bd74f..e18d842 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
mempool_free(p, nfs_wdata_mempool);
}
-static void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_write_data *wdata)
{
put_lseg(wdata->lseg);
put_nfs_open_context(wdata->args.context);
@@ -887,25 +887,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
static int nfs_do_write(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = data->args.context->path.dentry->d_inode;
- if (lseg != NULL) {
- data->lseg = get_lseg(lseg);
- if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)
- return 0;
- put_lseg(data->lseg);
- data->lseg = NULL;
- }
-
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
static int nfs_do_multiple_writes(struct list_head *head,
const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg,
int how)
{
struct nfs_write_data *data;
@@ -917,7 +907,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
data = list_entry(head->next, struct nfs_write_data, list);
list_del_init(&data->list);
- ret2 = nfs_do_write(data, call_ops, lseg, how);
+ ret2 = nfs_do_write(data, call_ops, how);
if (ret == 0)
ret = ret2;
}
@@ -1037,23 +1027,24 @@ out:
return ret;
}
-int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_flush_multi(desc, head);
+ return nfs_flush_one(desc, head);
+}
+
+static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- ret = nfs_flush_multi(desc, &head);
- else
- ret = nfs_flush_one(desc, &head);
+ ret = nfs_generic_flush(desc, &head);
if (ret == 0)
ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
- desc->pg_lseg, desc->pg_ioflags);
- put_lseg(desc->pg_lseg);
- desc->pg_lseg = NULL;
+ desc->pg_ioflags);
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);
static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_test = nfs_generic_pg_test,
@@ -1068,6 +1059,12 @@ void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds);
+void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
+{
+ pgio->pg_ops = &nfs_pageio_write_ops;
+ pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
+}
+
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 0a48f84..e2791a2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -108,9 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);
-extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-
-
/*
* Lock the page of an asynchronous request without getting a new reference
*/
--
1.7.6
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 19 ++++++++-----------
fs/nfs/write.c | 20 +++++++-------------
include/linux/nfs_page.h | 1 +
3 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d30d6de..b2e9aad 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -327,6 +327,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
ClearPageError(page);
+ desc->pg_rpc_callops = &nfs_read_partial_ops;
return ret;
out_bad:
while (!list_empty(res)) {
@@ -366,6 +367,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
nfs_read_rpcsetup(req, data, desc->pg_count, 0);
list_add(&data->list, res);
+ desc->pg_rpc_callops = &nfs_read_full_ops;
out:
return ret;
}
@@ -375,19 +377,14 @@ int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
LIST_HEAD(head);
int ret;
- if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
ret = nfs_pagein_multi(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_reads(&head,
- &nfs_read_partial_ops,
- desc->pg_lseg);
- } else {
+ else
ret = nfs_pagein_one(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_reads(&head,
- &nfs_read_full_ops,
- desc->pg_lseg);
- }
+
+ if (ret == 0)
+ ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops,
+ desc->pg_lseg);
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 89cc687..8fa7def 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -975,6 +975,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
offset += len;
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
+ desc->pg_rpc_callops = &nfs_write_partial_ops;
return ret;
out_bad:
@@ -1031,6 +1032,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
/* Set up the argument struct */
nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
list_add(&data->list, res);
+ desc->pg_rpc_callops = &nfs_write_full_ops;
out:
return ret;
}
@@ -1040,21 +1042,13 @@ int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
LIST_HEAD(head);
int ret;
- if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
ret = nfs_flush_multi(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_writes(&head,
- &nfs_write_partial_ops,
- desc->pg_lseg,
- desc->pg_ioflags);
- } else {
+ else
ret = nfs_flush_one(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_writes(&head,
- &nfs_write_full_ops,
- desc->pg_lseg,
- desc->pg_ioflags);
- }
+ if (ret == 0)
+ ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
+ desc->pg_lseg, desc->pg_ioflags);
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 9ac2dd1..db3194f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -74,6 +74,7 @@ struct nfs_pageio_descriptor {
const struct nfs_pageio_ops *pg_ops;
int pg_ioflags;
int pg_error;
+ const struct rpc_call_ops *pg_rpc_callops;
struct pnfs_layout_segment *pg_lseg;
};
--
1.7.6
If an attempt to do pNFS fails, and we have to fall back to writing through
the MDS, then we may want to re-coalesce the requests that we already have
since the block size for the MDS read/writes may be different to that of
the DS read/writes.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pagelist.c | 57 +++++++++++++++++++++++++++++++++++++++++++--
include/linux/nfs_page.h | 3 +-
2 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d421e19..7139dbf 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -240,6 +240,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_bsize = bsize;
desc->pg_base = 0;
desc->pg_moreio = 0;
+ desc->pg_recoalesce = 0;
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
desc->pg_ioflags = io_flags;
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
*/
-int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_error < 0)
return 0;
desc->pg_moreio = 0;
+ if (desc->pg_recoalesce)
+ return 0;
}
return 1;
}
+static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+
+ do {
+ list_splice_init(&desc->pg_list, &head);
+ desc->pg_bytes_written -= desc->pg_count;
+ desc->pg_count = 0;
+ desc->pg_base = 0;
+ desc->pg_recoalesce = 0;
+
+ while (!list_empty(&head)) {
+ struct nfs_page *req;
+
+ req = list_first_entry(&head, struct nfs_page, wb_list);
+ nfs_list_remove_request(req);
+ if (__nfs_pageio_add_request(desc, req))
+ continue;
+ if (desc->pg_error < 0)
+ return 0;
+ break;
+ }
+ } while (desc->pg_recoalesce);
+ return 1;
+}
+
+int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *req)
+{
+ int ret;
+
+ do {
+ ret = __nfs_pageio_add_request(desc, req);
+ if (ret)
+ break;
+ if (desc->pg_error < 0)
+ break;
+ ret = nfs_do_recoalesce(desc);
+ } while (ret);
+ return ret;
+}
+
/**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
* @desc: pointer to io descriptor
*/
void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
{
- nfs_pageio_doio(desc);
+ for (;;) {
+ nfs_pageio_doio(desc);
+ if (!desc->pg_recoalesce)
+ break;
+ if (!nfs_do_recoalesce(desc))
+ break;
+ }
}
/**
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
if (!list_empty(&desc->pg_list)) {
struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
if (index != prev->wb_index + 1)
- nfs_pageio_doio(desc);
+ nfs_pageio_complete(desc);
}
}
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index db3194f..7241b2a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -68,7 +68,8 @@ struct nfs_pageio_descriptor {
size_t pg_count;
size_t pg_bsize;
unsigned int pg_base;
- char pg_moreio;
+ unsigned char pg_moreio : 1,
+ pg_recoalesce : 1;
struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
--
1.7.6