2011-07-12 19:30:05

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 0/8] pNFS read/write cleanup

The following cleanups apply on top of the nfs-for-next branch, and is
designed to allow the objects and blocks code to use different read/write
block sizes than the write-through-MDS.

It also pushes a lot of the pNFS code back down into pnfs.c where it
belongs...

Cheers
Trond

Trond Myklebust (8):
NFS: Clean up nfs_read_rpcsetup and nfs_write_rpcsetup
NFS: Clean up: split out the RPC transmission from
nfs_pagein_multi/one
NFS: Cache rpc_ops in struct nfs_pageio_descriptor
NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write
request
NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is
needed
NFS: Move the pnfs read code into pnfs.c
NFS: Move the pnfs write code into pnfs.c
NFS: Clean up - simplify the switch to read/write-through-MDS

fs/nfs/internal.h | 12 +++--
fs/nfs/nfs4filelayout.c | 9 ++--
fs/nfs/objlayout/objio_osd.c | 4 +-
fs/nfs/pagelist.c | 57 +++++++++++++++++++-
fs/nfs/pnfs.c | 118 ++++++++++++++++++++++++++++++++++++++--
fs/nfs/pnfs.h | 20 +------
fs/nfs/read.c | 121 ++++++++++++++++++++++--------------------
fs/nfs/write.c | 119 +++++++++++++++++++++++------------------
include/linux/nfs_page.h | 8 +--
include/linux/nfs_xdr.h | 2 +
10 files changed, 317 insertions(+), 153 deletions(-)

--
1.7.6



2011-07-12 19:30:08

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 4/8] NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write request

Instead of looking up the rsize and wsize, the routines that generate the
RPC requests should really be using the pg_bsize, since that is what we
use when deciding whether or not to coalesce write requests...

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 2 +-
fs/nfs/write.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b2e9aad..47f92c1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -303,7 +303,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
- size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
+ size_t rsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8fa7def..80bd74f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -946,7 +946,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
- size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
+ size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
--
1.7.6


2011-07-13 13:22:08

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 5/8] NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed

On 2011-07-12 22:29, Trond Myklebust wrote:
> If an attempt to do pNFS fails, and we have to fall back to writing through
> the MDS, then we may want to re-coalesce the requests that we already have
> since the block size for the MDS read/writes may be different to that of
> the DS read/writes.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/pagelist.c | 57 +++++++++++++++++++++++++++++++++++++++++++--
> include/linux/nfs_page.h | 3 +-
> 2 files changed, 56 insertions(+), 4 deletions(-)
>
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index d421e19..7139dbf 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -240,6 +240,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> desc->pg_bsize = bsize;
> desc->pg_base = 0;
> desc->pg_moreio = 0;
> + desc->pg_recoalesce = 0;
> desc->pg_inode = inode;
> desc->pg_ops = pg_ops;
> desc->pg_ioflags = io_flags;
> @@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
> * Returns true if the request 'req' was successfully coalesced into the
> * existing list of pages 'desc'.
> */
> -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> +static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> struct nfs_page *req)
> {
> while (!nfs_pageio_do_add_request(desc, req)) {
> @@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> if (desc->pg_error < 0)
> return 0;
> desc->pg_moreio = 0;
> + if (desc->pg_recoalesce)
> + return 0;
> }
> return 1;
> }
>
> +static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
> +{
> + LIST_HEAD(head);
> +
> + do {
> + list_splice_init(&desc->pg_list, &head);
> + desc->pg_bytes_written -= desc->pg_count;
> + desc->pg_count = 0;
> + desc->pg_base = 0;
> + desc->pg_recoalesce = 0;
> +
> + while (!list_empty(&head)) {
> + struct nfs_page *req;
> +
> + req = list_first_entry(&head, struct nfs_page, wb_list);
> + nfs_list_remove_request(req);
> + if (__nfs_pageio_add_request(desc, req))
> + continue;
> + if (desc->pg_error < 0)
> + return 0;
> + break;
> + }
> + } while (desc->pg_recoalesce);
> + return 1;
> +}
> +
> +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> + struct nfs_page *req)
> +{
> + int ret;
> +
> + do {
> + ret = __nfs_pageio_add_request(desc, req);
> + if (ret)
> + break;
> + if (desc->pg_error < 0)
> + break;
> + ret = nfs_do_recoalesce(desc);
> + } while (ret);
> + return ret;
> +}
> +
> /**
> * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
> * @desc: pointer to io descriptor
> */
> void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
> {
> - nfs_pageio_doio(desc);
> + for (;;) {
> + nfs_pageio_doio(desc);
> + if (!desc->pg_recoalesce)
> + break;
> + if (!nfs_do_recoalesce(desc))
> + break;
> + }

nit: how about the following?

do
nfs_pageio_doio(desc);
while (desc->pg_recoalesce && nfs_do_recoalesce(desc));

Benny

> }
>
> /**
> @@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
> if (!list_empty(&desc->pg_list)) {
> struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
> if (index != prev->wb_index + 1)
> - nfs_pageio_doio(desc);
> + nfs_pageio_complete(desc);
> }
> }
>
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index db3194f..7241b2a 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -68,7 +68,8 @@ struct nfs_pageio_descriptor {
> size_t pg_count;
> size_t pg_bsize;
> unsigned int pg_base;
> - char pg_moreio;
> + unsigned char pg_moreio : 1,
> + pg_recoalesce : 1;
>
> struct inode *pg_inode;
> const struct nfs_pageio_ops *pg_ops;

2011-07-12 19:30:10

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 6/8] NFS: Move the pnfs read code into pnfs.c

...and ensure that we recoalese to take into account differences in
block sizes when falling back to read through the MDS.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 4 +++
fs/nfs/nfs4filelayout.c | 2 +-
fs/nfs/objlayout/objio_osd.c | 2 +-
fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++-
fs/nfs/pnfs.h | 10 +------
fs/nfs/read.c | 46 ++++++++++++++-------------------
include/linux/nfs_page.h | 1 -
7 files changed, 82 insertions(+), 40 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31e8b50..795b3e0 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -295,10 +295,14 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);

struct nfs_pageio_descriptor;
extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode);
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);

/* write.c */
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index af9bf9e..fc556d6 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -735,7 +735,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
.pg_init = filelayout_pg_init_read,
.pg_test = filelayout_pg_test,
- .pg_doio = nfs_generic_pg_readpages,
+ .pg_doio = pnfs_generic_pg_readpages,
};

static const struct nfs_pageio_ops filelayout_pg_write_ops = {
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 70272d5..add6289 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
static const struct nfs_pageio_ops objio_pg_read_ops = {
.pg_init = pnfs_generic_pg_init_read,
.pg_test = objio_pg_test,
- .pg_doio = nfs_generic_pg_readpages,
+ .pg_doio = pnfs_generic_pg_readpages,
};

static const struct nfs_pageio_ops objio_pg_write_ops = {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5b3cc3f..9eca5a8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -28,6 +28,7 @@
*/

#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
@@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);

+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_read_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_read_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_readdata_release(data);
+}
+
/*
* Call the appropriate parallel I/O subsystem read function.
*/
-enum pnfs_try_status
+static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *rdata,
- const struct rpc_call_ops *call_ops)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
{
struct inode *inode = rdata->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;

rdata->mds_ops = call_ops;
+ rdata->lseg = get_lseg(lseg);

dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1243,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
return trypnfs;
}

+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ struct nfs_read_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_read_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_read_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_pagein(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_reads(desc, &head);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
/*
* Currently there is only one (whole file) write lseg.
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a59736e..c40ffa5 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -157,9 +157,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
const struct rpc_call_ops *, int);
-enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
- const struct rpc_call_ops *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
@@ -330,13 +329,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
}

static inline enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops)
-{
- return PNFS_NOT_ATTEMPTED;
-}
-
-static inline enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops, int how)
{
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 47f92c1..3745eed 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -67,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
mempool_free(p, nfs_rdata_mempool);
}

-static void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_read_data *rdata)
{
put_lseg(rdata->lseg);
put_nfs_open_context(rdata->args.context);
@@ -120,6 +120,12 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);

+void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
+{
+ pgio->pg_ops = &nfs_pageio_read_ops;
+ pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
+}
+
static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
@@ -235,26 +241,16 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
}

static int nfs_do_read(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg)
+ const struct rpc_call_ops *call_ops)
{
struct inode *inode = data->args.context->path.dentry->d_inode;

- if (lseg) {
- data->lseg = get_lseg(lseg);
- if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
- return 0;
- put_lseg(data->lseg);
- data->lseg = NULL;
- }
-
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}

static int
nfs_do_multiple_reads(struct list_head *head,
- const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg)
+ const struct rpc_call_ops *call_ops)
{
struct nfs_read_data *data;
int ret = 0;
@@ -265,7 +261,7 @@ nfs_do_multiple_reads(struct list_head *head,
data = list_entry(head->next, struct nfs_read_data, list);
list_del_init(&data->list);

- ret2 = nfs_do_read(data, call_ops, lseg);
+ ret2 = nfs_do_read(data, call_ops);
if (ret == 0)
ret = ret2;
}
@@ -372,25 +368,23 @@ out:
return ret;
}

-int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_pagein_multi(desc, head);
+ return nfs_pagein_one(desc, head);
+}
+
+static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;

- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- ret = nfs_pagein_multi(desc, &head);
- else
- ret = nfs_pagein_one(desc, &head);
-
+ ret = nfs_generic_pagein(desc, &head);
if (ret == 0)
- ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops,
- desc->pg_lseg);
- put_lseg(desc->pg_lseg);
- desc->pg_lseg = NULL;
+ ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
-

static const struct nfs_pageio_ops nfs_pageio_read_ops = {
.pg_test = nfs_generic_pg_test,
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 7241b2a..0a48f84 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -108,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);

-extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);


--
1.7.6


2011-07-12 19:30:07

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 2/8] NFS: Clean up: split out the RPC transmission from nfs_pagein_multi/one

...and do the same for nfs_flush_multi/one.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 92 ++++++++++++++++++++++++++++++-------------------------
fs/nfs/write.c | 93 +++++++++++++++++++++++++++++++------------------------
2 files changed, 102 insertions(+), 83 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 248a554..d30d6de 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,8 +30,6 @@

#define NFSDBG_FACILITY NFSDBG_PAGECACHE

-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
static const struct nfs_pageio_ops nfs_pageio_read_ops;
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
@@ -253,6 +251,27 @@ static int nfs_do_read(struct nfs_read_data *data,
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}

+static int
+nfs_do_multiple_reads(struct list_head *head,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
+{
+ struct nfs_read_data *data;
+ int ret = 0;
+
+ while (!list_empty(head)) {
+ int ret2;
+
+ data = list_entry(head->next, struct nfs_read_data, list);
+ list_del_init(&data->list);
+
+ ret2 = nfs_do_read(data, call_ops, lseg);
+ if (ret == 0)
+ ret = ret2;
+ }
+ return ret;
+}
+
static void
nfs_async_read_error(struct list_head *head)
{
@@ -279,7 +298,7 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
@@ -288,11 +307,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
- LIST_HEAD(list);

nfs_list_remove_request(req);

+ offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);
@@ -300,57 +318,33 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->list, &list);
+ data->pagevec[0] = page;
+ nfs_read_rpcsetup(req, data, len, offset);
+ list_add(&data->list, res);
requests++;
nbytes -= len;
+ offset += len;
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
-
ClearPageError(page);
- offset = 0;
- nbytes = desc->pg_count;
- do {
- int ret2;
-
- data = list_entry(list.next, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- data->pagevec[0] = page;
-
- if (nbytes < rsize)
- rsize = nbytes;
- nfs_read_rpcsetup(req, data, rsize, offset);
- ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg);
- if (ret == 0)
- ret = ret2;
- offset += rsize;
- nbytes -= rsize;
- } while (nbytes != 0);
- put_lseg(lseg);
- desc->pg_lseg = NULL;
-
return ret;
-
out_bad:
- while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_read_data, list);
+ while (!list_empty(res)) {
+ data = list_entry(res->next, struct nfs_read_data, list);
list_del(&data->list);
nfs_readdata_free(data);
}
SetPageError(page);
nfs_readpage_release(req);
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return -ENOMEM;
}

-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret = -ENOMEM;

data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
@@ -371,18 +365,32 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
req = nfs_list_entry(data->pages.next);

nfs_read_rpcsetup(req, data, desc->pg_count, 0);
- ret = nfs_do_read(data, &nfs_read_full_ops, lseg);
+ list_add(&data->list, res);
out:
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return ret;
}

int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_pagein_multi(desc);
- return nfs_pagein_one(desc);
+ LIST_HEAD(head);
+ int ret;
+
+ if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ ret = nfs_pagein_multi(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_reads(&head,
+ &nfs_read_partial_ops,
+ desc->pg_lseg);
+ } else {
+ ret = nfs_pagein_one(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_reads(&head,
+ &nfs_read_full_ops,
+ desc->pg_lseg);
+ }
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0aeb09b..89cc687 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -903,6 +903,27 @@ static int nfs_do_write(struct nfs_write_data *data,
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}

+static int nfs_do_multiple_writes(struct list_head *head,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
+{
+ struct nfs_write_data *data;
+ int ret = 0;
+
+ while (!list_empty(head)) {
+ int ret2;
+
+ data = list_entry(head->next, struct nfs_write_data, list);
+ list_del_init(&data->list);
+
+ ret2 = nfs_do_write(data, call_ops, lseg, how);
+ if (ret == 0)
+ ret = ret2;
+ }
+ return ret;
+}
+
/* If a nfs_flush_* function fails, it should remove reqs from @head and
* call this on each, which will prepare them to be retried on next
* writeback using standard nfs.
@@ -920,7 +941,7 @@ static void nfs_redirty_request(struct nfs_page *req)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
@@ -929,8 +950,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
- LIST_HEAD(list);

nfs_list_remove_request(req);

@@ -940,6 +959,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
desc->pg_ioflags &= ~FLUSH_COND_STABLE;


+ offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);
@@ -947,47 +967,23 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->list, &list);
+ data->pagevec[0] = page;
+ nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+ list_add(&data->list, res);
requests++;
nbytes -= len;
+ offset += len;
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
-
- ClearPageError(page);
- offset = 0;
- nbytes = desc->pg_count;
- do {
- int ret2;
-
- data = list_entry(list.next, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- data->pagevec[0] = page;
-
- if (nbytes < wsize)
- wsize = nbytes;
- nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
- ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg,
- desc->pg_ioflags);
- if (ret == 0)
- ret = ret2;
- offset += wsize;
- nbytes -= wsize;
- } while (nbytes != 0);
-
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return ret;

out_bad:
- while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_write_data, list);
+ while (!list_empty(res)) {
+ data = list_entry(res->next, struct nfs_write_data, list);
list_del(&data->list);
nfs_writedata_free(data);
}
nfs_redirty_request(req);
- put_lseg(lseg);
- desc->pg_lseg = NULL;
return -ENOMEM;
}

@@ -999,13 +995,12 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
- struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret;

data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
@@ -1035,18 +1030,34 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)

/* Set up the argument struct */
nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
- ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags);
+ list_add(&data->list, res);
out:
- put_lseg(lseg); /* Cleans any gotten in ->pg_test */
- desc->pg_lseg = NULL;
return ret;
}

int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(desc);
- return nfs_flush_one(desc);
+ LIST_HEAD(head);
+ int ret;
+
+ if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ ret = nfs_flush_multi(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_writes(&head,
+ &nfs_write_partial_ops,
+ desc->pg_lseg,
+ desc->pg_ioflags);
+ } else {
+ ret = nfs_flush_one(desc, &head);
+ if (ret == 0)
+ ret = nfs_do_multiple_writes(&head,
+ &nfs_write_full_ops,
+ desc->pg_lseg,
+ desc->pg_ioflags);
+ }
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);

--
1.7.6


2011-07-12 19:30:07

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 1/8] NFS: Clean up nfs_read_rpcsetup and nfs_write_rpcsetup

Split them up into two parts: one which sets up the struct nfs_read/write_data,
the other which sets up the actual RPC call or pNFS call.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 42 +++++++++++++++++++++++++-----------------
fs/nfs/write.c | 41 ++++++++++++++++++++++++++---------------
include/linux/nfs_xdr.h | 2 ++
3 files changed, 53 insertions(+), 32 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c394662..248a554 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -213,17 +213,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
/*
* Set up the NFS read request struct
*/
-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops,
- unsigned int count, unsigned int offset,
- struct pnfs_layout_segment *lseg)
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+ unsigned int count, unsigned int offset)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;

data->req = req;
data->inode = inode;
data->cred = req->wb_context->cred;
- data->lseg = get_lseg(lseg);

data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -237,10 +234,21 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.count = count;
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
+}

- if (data->lseg &&
- (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
- return 0;
+static int nfs_do_read(struct nfs_read_data *data,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg)
+{
+ struct inode *inode = data->args.context->path.dentry->d_inode;
+
+ if (lseg) {
+ data->lseg = get_lseg(lseg);
+ if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
+ return 0;
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ }

return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
@@ -292,7 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->pages, &list);
+ list_add(&data->list, &list);
requests++;
nbytes -= len;
} while(nbytes != 0);
@@ -304,15 +312,15 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
do {
int ret2;

- data = list_entry(list.next, struct nfs_read_data, pages);
- list_del_init(&data->pages);
+ data = list_entry(list.next, struct nfs_read_data, list);
+ list_del_init(&data->list);

data->pagevec[0] = page;

if (nbytes < rsize)
rsize = nbytes;
- ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
- rsize, offset, lseg);
+ nfs_read_rpcsetup(req, data, rsize, offset);
+ ret2 = nfs_do_read(data, &nfs_read_partial_ops, lseg);
if (ret == 0)
ret = ret2;
offset += rsize;
@@ -325,8 +333,8 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)

out_bad:
while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_read_data, pages);
- list_del(&data->pages);
+ data = list_entry(list.next, struct nfs_read_data, list);
+ list_del(&data->list);
nfs_readdata_free(data);
}
SetPageError(page);
@@ -362,8 +370,8 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
}
req = nfs_list_entry(data->pages.next);

- ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
- 0, lseg);
+ nfs_read_rpcsetup(req, data, desc->pg_count, 0);
+ ret = nfs_do_read(data, &nfs_read_full_ops, lseg);
out:
put_lseg(lseg);
desc->pg_lseg = NULL;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1af4d82..0aeb09b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
/*
* Set up the argument/result storage required for the RPC call.
*/
-static int nfs_write_rpcsetup(struct nfs_page *req,
+static void nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
- struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
@@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->req = req;
data->inode = inode = req->wb_context->path.dentry->d_inode;
data->cred = req->wb_context->cred;
- data->lseg = get_lseg(lseg);

data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -886,10 +883,22 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->res.count = count;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
+}

- if (data->lseg &&
- (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
- return 0;
+static int nfs_do_write(struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
+{
+ struct inode *inode = data->args.context->path.dentry->d_inode;
+
+ if (lseg != NULL) {
+ data->lseg = get_lseg(lseg);
+ if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)
+ return 0;
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ }

return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
@@ -938,7 +947,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
- list_add(&data->pages, &list);
+ list_add(&data->list, &list);
requests++;
nbytes -= len;
} while (nbytes != 0);
@@ -950,15 +959,16 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
do {
int ret2;

- data = list_entry(list.next, struct nfs_write_data, pages);
- list_del_init(&data->pages);
+ data = list_entry(list.next, struct nfs_write_data, list);
+ list_del_init(&data->list);

data->pagevec[0] = page;

if (nbytes < wsize)
wsize = nbytes;
- ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
- wsize, offset, lseg, desc->pg_ioflags);
+ nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+ ret2 = nfs_do_write(data, &nfs_write_partial_ops, lseg,
+ desc->pg_ioflags);
if (ret == 0)
ret = ret2;
offset += wsize;
@@ -971,8 +981,8 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)

out_bad:
while (!list_empty(&list)) {
- data = list_entry(list.next, struct nfs_write_data, pages);
- list_del(&data->pages);
+ data = list_entry(list.next, struct nfs_write_data, list);
+ list_del(&data->list);
nfs_writedata_free(data);
}
nfs_redirty_request(req);
@@ -1024,7 +1034,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
desc->pg_ioflags &= ~FLUSH_COND_STABLE;

/* Set up the argument struct */
- ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
+ nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
+ ret = nfs_do_write(data, &nfs_write_full_ops, lseg, desc->pg_ioflags);
out:
put_lseg(lseg); /* Cleans any gotten in ->pg_test */
desc->pg_lseg = NULL;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 956d357..5b11595 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1126,6 +1126,7 @@ struct nfs_read_data {
struct rpc_cred *cred;
struct nfs_fattr fattr; /* fattr storage */
struct list_head pages; /* Coalesced read requests */
+ struct list_head list; /* lists of struct nfs_read_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
@@ -1149,6 +1150,7 @@ struct nfs_write_data {
struct nfs_fattr fattr;
struct nfs_writeverf verf;
struct list_head pages; /* Coalesced requests we wish to flush */
+ struct list_head list; /* lists of struct nfs_write_data */
struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
--
1.7.6


2011-07-12 19:30:12

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 8/8] NFS: Clean up - simplify the switch to read/write-through-MDS

Use nfs_pageio_reset_read_mds and nfs_pageio_reset_write_mds instead of
completely reinitialising the struct nfs_pageio_descriptor.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 4 ----
fs/nfs/nfs4filelayout.c | 5 ++---
fs/nfs/pnfs.c | 4 ++--
fs/nfs/read.c | 4 ++--
fs/nfs/write.c | 4 ++--
5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 758db89..38b1ffc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -299,16 +299,12 @@ extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
struct list_head *head);

struct nfs_pageio_descriptor;
-extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);

/* write.c */
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct list_head *head);
-extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fbc5b42..f0b37e1 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -711,7 +711,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
GFP_KERNEL);
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_read_mds(pgio, pgio->pg_inode);
+ nfs_pageio_reset_read_mds(pgio);
}

void
@@ -728,8 +728,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_write_mds(pgio, pgio->pg_inode,
- pgio->pg_ioflags);
+ nfs_pageio_reset_write_mds(pgio);
}

static const struct nfs_pageio_ops filelayout_pg_read_ops = {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 93c7329..38e5508 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1075,7 +1075,7 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
GFP_KERNEL);
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_read_mds(pgio, pgio->pg_inode);
+ nfs_pageio_reset_read_mds(pgio);

}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
@@ -1093,7 +1093,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags);
+ nfs_pageio_reset_write_mds(pgio);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3745eed..1472933 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -112,19 +112,19 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}

-void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
NFS_SERVER(inode)->rsize, 0);
}
-EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);

void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pageio_read_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);

static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e18d842..525fb02 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1051,19 +1051,19 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_doio = nfs_generic_pg_writepages,
};

-void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
NFS_SERVER(inode)->wsize, ioflags);
}
-EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds);

void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
pgio->pg_ops = &nfs_pageio_write_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);

static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
--
1.7.6


2011-07-13 14:08:41

by Myklebust, Trond

[permalink] [raw]
Subject: RE: [PATCH 6/8] NFS: Move the pnfs read code into pnfs.c

> -----Original Message-----
> From: Benny Halevy [mailto:[email protected]]
> Sent: Wednesday, July 13, 2011 9:41 AM
> To: Myklebust, Trond
> Cc: [email protected]
> Subject: Re: [PATCH 6/8] NFS: Move the pnfs read code into pnfs.c
>
> On 2011-07-12 22:29, Trond Myklebust wrote:
> > ...and ensure that we recoalese to take into account differences in
> > block sizes when falling back to read through the MDS.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > fs/nfs/internal.h | 4 +++
> > fs/nfs/nfs4filelayout.c | 2 +-
> > fs/nfs/objlayout/objio_osd.c | 2 +-
> > fs/nfs/pnfs.c | 57
> ++++++++++++++++++++++++++++++++++++++++-
> > fs/nfs/pnfs.h | 10 +------
> > fs/nfs/read.c | 46
++++++++++++++------------------
> -
> > include/linux/nfs_page.h | 1 -
> > 7 files changed, 82 insertions(+), 40 deletions(-)
> >
> > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> > index 31e8b50..795b3e0 100644
> > --- a/fs/nfs/internal.h
> > +++ b/fs/nfs/internal.h
> > @@ -295,10 +295,14 @@ extern int nfs4_get_rootfh(struct nfs_server
> *server, struct nfs_fh *mntfh);
> > extern int nfs_initiate_read(struct nfs_read_data *data, struct
> rpc_clnt *clnt,
> > const struct rpc_call_ops *call_ops);
> > extern void nfs_read_prepare(struct rpc_task *task, void
*calldata);
> > +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
> > + struct list_head *head);
> >
> > struct nfs_pageio_descriptor;
> > extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor
> *pgio,
> > struct inode *inode);
> > +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor
> *pgio);
> > +extern void nfs_readdata_release(struct nfs_read_data *rdata);
> >
> > /* write.c */
> > extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor
> *pgio,
> > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> > index af9bf9e..fc556d6 100644
> > --- a/fs/nfs/nfs4filelayout.c
> > +++ b/fs/nfs/nfs4filelayout.c
> > @@ -735,7 +735,7 @@ filelayout_pg_init_write(struct
> nfs_pageio_descriptor *pgio,
> > static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> > .pg_init = filelayout_pg_init_read,
> > .pg_test = filelayout_pg_test,
> > - .pg_doio = nfs_generic_pg_readpages,
> > + .pg_doio = pnfs_generic_pg_readpages,
> > };
> >
> > static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> > diff --git a/fs/nfs/objlayout/objio_osd.c
> b/fs/nfs/objlayout/objio_osd.c
> > index 70272d5..add6289 100644
> > --- a/fs/nfs/objlayout/objio_osd.c
> > +++ b/fs/nfs/objlayout/objio_osd.c
> > @@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct
> nfs_pageio_descriptor *pgio,
> > static const struct nfs_pageio_ops objio_pg_read_ops = {
> > .pg_init = pnfs_generic_pg_init_read,
> > .pg_test = objio_pg_test,
> > - .pg_doio = nfs_generic_pg_readpages,
> > + .pg_doio = pnfs_generic_pg_readpages,
> > };
> >
> > static const struct nfs_pageio_ops objio_pg_write_ops = {
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 5b3cc3f..9eca5a8 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -28,6 +28,7 @@
> > */
> >
> > #include <linux/nfs_fs.h>
> > +#include <linux/nfs_page.h>
> > #include "internal.h"
> > #include "pnfs.h"
> > #include "iostat.h"
> > @@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data
*data)
> > }
> > EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
> >
> > +static void
> > +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
> > + struct nfs_read_data *data)
> > +{
> > + list_splice_tail_init(&data->pages, &desc->pg_list);
> > + if (data->req && list_empty(&data->req->wb_list))
> > + nfs_list_add_request(data->req, &desc->pg_list);
> > + nfs_pageio_reset_read_mds(desc);
> > + desc->pg_recoalesce = 1;
> > + nfs_readdata_release(data);
>
> I'm confused...
> Isn't this function supposed to call the nfs read path?
>

No. The reason for doing the above is to avoid recursing back into the
read/write code when we're already deep down in the stack.

Instead, we put the requests back on the pg_list, set the pg_recoalesce
flag, and then allow nfs_do_recoalesce() (which runs much further back
up the stack) to do it's job.

Cheers
Trond



2011-07-13 13:40:59

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 6/8] NFS: Move the pnfs read code into pnfs.c

On 2011-07-12 22:29, Trond Myklebust wrote:
> ...and ensure that we recoalese to take into account differences in
> block sizes when falling back to read through the MDS.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/internal.h | 4 +++
> fs/nfs/nfs4filelayout.c | 2 +-
> fs/nfs/objlayout/objio_osd.c | 2 +-
> fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++-
> fs/nfs/pnfs.h | 10 +------
> fs/nfs/read.c | 46 ++++++++++++++-------------------
> include/linux/nfs_page.h | 1 -
> 7 files changed, 82 insertions(+), 40 deletions(-)
>
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 31e8b50..795b3e0 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -295,10 +295,14 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
> extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
> const struct rpc_call_ops *call_ops);
> extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
> +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
> + struct list_head *head);
>
> struct nfs_pageio_descriptor;
> extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
> struct inode *inode);
> +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
> +extern void nfs_readdata_release(struct nfs_read_data *rdata);
>
> /* write.c */
> extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index af9bf9e..fc556d6 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -735,7 +735,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
> static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> .pg_init = filelayout_pg_init_read,
> .pg_test = filelayout_pg_test,
> - .pg_doio = nfs_generic_pg_readpages,
> + .pg_doio = pnfs_generic_pg_readpages,
> };
>
> static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> index 70272d5..add6289 100644
> --- a/fs/nfs/objlayout/objio_osd.c
> +++ b/fs/nfs/objlayout/objio_osd.c
> @@ -1007,7 +1007,7 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> static const struct nfs_pageio_ops objio_pg_read_ops = {
> .pg_init = pnfs_generic_pg_init_read,
> .pg_test = objio_pg_test,
> - .pg_doio = nfs_generic_pg_readpages,
> + .pg_doio = pnfs_generic_pg_readpages,
> };
>
> static const struct nfs_pageio_ops objio_pg_write_ops = {
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 5b3cc3f..9eca5a8 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -28,6 +28,7 @@
> */
>
> #include <linux/nfs_fs.h>
> +#include <linux/nfs_page.h>
> #include "internal.h"
> #include "pnfs.h"
> #include "iostat.h"
> @@ -1216,18 +1217,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
> }
> EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
>
> +static void
> +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
> + struct nfs_read_data *data)
> +{
> + list_splice_tail_init(&data->pages, &desc->pg_list);
> + if (data->req && list_empty(&data->req->wb_list))
> + nfs_list_add_request(data->req, &desc->pg_list);
> + nfs_pageio_reset_read_mds(desc);
> + desc->pg_recoalesce = 1;
> + nfs_readdata_release(data);

I'm confused...
Isn't this function supposed to call the nfs read path?

Benny

> +}
> +
> /*
> * Call the appropriate parallel I/O subsystem read function.
> */
> -enum pnfs_try_status
> +static enum pnfs_try_status
> pnfs_try_to_read_data(struct nfs_read_data *rdata,
> - const struct rpc_call_ops *call_ops)
> + const struct rpc_call_ops *call_ops,
> + struct pnfs_layout_segment *lseg)
> {
> struct inode *inode = rdata->inode;
> struct nfs_server *nfss = NFS_SERVER(inode);
> enum pnfs_try_status trypnfs;
>
> rdata->mds_ops = call_ops;
> + rdata->lseg = get_lseg(lseg);
>
> dprintk("%s: Reading ino:%lu %u@%llu\n",
> __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
> @@ -1243,6 +1258,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
> return trypnfs;
> }
>
> +static void
> +pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
> +{
> + struct nfs_read_data *data;
> + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
> + struct pnfs_layout_segment *lseg = desc->pg_lseg;
> +
> + desc->pg_lseg = NULL;
> + while (!list_empty(head)) {
> + enum pnfs_try_status trypnfs;
> +
> + data = list_entry(head->next, struct nfs_read_data, list);
> + list_del_init(&data->list);
> +
> + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
> + if (trypnfs == PNFS_NOT_ATTEMPTED)
> + pnfs_read_through_mds(desc, data);
> + }
> + put_lseg(lseg);
> +}
> +
> +int
> +pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> +{
> + LIST_HEAD(head);
> + int ret;
> +
> + ret = nfs_generic_pagein(desc, &head);
> + if (ret != 0) {
> + put_lseg(desc->pg_lseg);
> + desc->pg_lseg = NULL;
> + return ret;
> + }
> + pnfs_do_multiple_reads(desc, &head);
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
> +
> /*
> * Currently there is only one (whole file) write lseg.
> */
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index a59736e..c40ffa5 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -157,9 +157,8 @@ void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
> void unset_pnfs_layoutdriver(struct nfs_server *);
> enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
> const struct rpc_call_ops *, int);
> -enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
> - const struct rpc_call_ops *);
> void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
> +int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
> bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
> int pnfs_layout_process(struct nfs4_layoutget *lgp);
> @@ -330,13 +329,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
> }
>
> static inline enum pnfs_try_status
> -pnfs_try_to_read_data(struct nfs_read_data *data,
> - const struct rpc_call_ops *call_ops)
> -{
> - return PNFS_NOT_ATTEMPTED;
> -}
> -
> -static inline enum pnfs_try_status
> pnfs_try_to_write_data(struct nfs_write_data *data,
> const struct rpc_call_ops *call_ops, int how)
> {
> diff --git a/fs/nfs/read.c b/fs/nfs/read.c
> index 47f92c1..3745eed 100644
> --- a/fs/nfs/read.c
> +++ b/fs/nfs/read.c
> @@ -67,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
> mempool_free(p, nfs_rdata_mempool);
> }
>
> -static void nfs_readdata_release(struct nfs_read_data *rdata)
> +void nfs_readdata_release(struct nfs_read_data *rdata)
> {
> put_lseg(rdata->lseg);
> put_nfs_open_context(rdata->args.context);
> @@ -120,6 +120,12 @@ void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
> }
> EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);
>
> +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
> +{
> + pgio->pg_ops = &nfs_pageio_read_ops;
> + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
> +}
> +
> static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
> struct inode *inode)
> {
> @@ -235,26 +241,16 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
> }
>
> static int nfs_do_read(struct nfs_read_data *data,
> - const struct rpc_call_ops *call_ops,
> - struct pnfs_layout_segment *lseg)
> + const struct rpc_call_ops *call_ops)
> {
> struct inode *inode = data->args.context->path.dentry->d_inode;
>
> - if (lseg) {
> - data->lseg = get_lseg(lseg);
> - if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
> - return 0;
> - put_lseg(data->lseg);
> - data->lseg = NULL;
> - }
> -
> return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
> }
>
> static int
> nfs_do_multiple_reads(struct list_head *head,
> - const struct rpc_call_ops *call_ops,
> - struct pnfs_layout_segment *lseg)
> + const struct rpc_call_ops *call_ops)
> {
> struct nfs_read_data *data;
> int ret = 0;
> @@ -265,7 +261,7 @@ nfs_do_multiple_reads(struct list_head *head,
> data = list_entry(head->next, struct nfs_read_data, list);
> list_del_init(&data->list);
>
> - ret2 = nfs_do_read(data, call_ops, lseg);
> + ret2 = nfs_do_read(data, call_ops);
> if (ret == 0)
> ret = ret2;
> }
> @@ -372,25 +368,23 @@ out:
> return ret;
> }
>
> -int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
> +{
> + if (desc->pg_bsize < PAGE_CACHE_SIZE)
> + return nfs_pagein_multi(desc, head);
> + return nfs_pagein_one(desc, head);
> +}
> +
> +static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> {
> LIST_HEAD(head);
> int ret;
>
> - if (desc->pg_bsize < PAGE_CACHE_SIZE)
> - ret = nfs_pagein_multi(desc, &head);
> - else
> - ret = nfs_pagein_one(desc, &head);
> -
> + ret = nfs_generic_pagein(desc, &head);
> if (ret == 0)
> - ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops,
> - desc->pg_lseg);
> - put_lseg(desc->pg_lseg);
> - desc->pg_lseg = NULL;
> + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
> return ret;
> }
> -EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
> -
>
> static const struct nfs_pageio_ops nfs_pageio_read_ops = {
> .pg_test = nfs_generic_pg_test,
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 7241b2a..0a48f84 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -108,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
> extern int nfs_set_page_tag_locked(struct nfs_page *req);
> extern void nfs_clear_page_tag_locked(struct nfs_page *req);
>
> -extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
>
>

2011-07-12 19:30:11

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 7/8] NFS: Move the pnfs write code into pnfs.c

...and ensure that we recoalese to take into account differences in
differences in block sizes when falling back to write through the MDS.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 4 +++
fs/nfs/nfs4filelayout.c | 2 +-
fs/nfs/objlayout/objio_osd.c | 2 +-
fs/nfs/pnfs.c | 57 ++++++++++++++++++++++++++++++++++++++++-
fs/nfs/pnfs.h | 10 +------
fs/nfs/write.c | 39 +++++++++++++---------------
include/linux/nfs_page.h | 3 --
7 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 795b3e0..758db89 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -305,8 +305,12 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);

/* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+ struct list_head *head);
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_write_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fc556d6..fbc5b42 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -741,7 +741,7 @@ static const struct nfs_pageio_ops filelayout_pg_read_ops = {
static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_init = filelayout_pg_init_write,
.pg_test = filelayout_pg_test,
- .pg_doio = nfs_generic_pg_writepages,
+ .pg_doio = pnfs_generic_pg_writepages,
};

static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index add6289..7d49bb1 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1013,7 +1013,7 @@ static const struct nfs_pageio_ops objio_pg_read_ops = {
static const struct nfs_pageio_ops objio_pg_write_ops = {
.pg_init = pnfs_generic_pg_init_write,
.pg_test = objio_pg_test,
- .pg_doio = nfs_generic_pg_writepages,
+ .pg_doio = pnfs_generic_pg_writepages,
};

static struct pnfs_layoutdriver_type objlayout_type = {
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 9eca5a8..93c7329 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1170,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);

-enum pnfs_try_status
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+ struct nfs_write_data *data)
+{
+ list_splice_tail_init(&data->pages, &desc->pg_list);
+ if (data->req && list_empty(&data->req->wb_list))
+ nfs_list_add_request(data->req, &desc->pg_list);
+ nfs_pageio_reset_write_mds(desc);
+ desc->pg_recoalesce = 1;
+ nfs_writedata_release(data);
+}
+
+static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *wdata,
- const struct rpc_call_ops *call_ops, int how)
+ const struct rpc_call_ops *call_ops,
+ struct pnfs_layout_segment *lseg,
+ int how)
{
struct inode *inode = wdata->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);

wdata->mds_ops = call_ops;
+ wdata->lseg = get_lseg(lseg);

dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1194,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
return trypnfs;
}

+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+ struct nfs_write_data *data;
+ const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+ desc->pg_lseg = NULL;
+ while (!list_empty(head)) {
+ enum pnfs_try_status trypnfs;
+
+ data = list_entry(head->next, struct nfs_write_data, list);
+ list_del_init(&data->list);
+
+ trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_write_through_mds(desc, data);
+ }
+ put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+ int ret;
+
+ ret = nfs_generic_flush(desc, &head);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ return ret;
+ }
+ pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
/*
* Called by non rpc-based layout drivers
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c40ffa5..078670d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -155,11 +155,10 @@ bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int)

void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
-enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
- const struct rpc_call_ops *, int);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -328,13 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
{
}

-static inline enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops, int how)
-{
- return PNFS_NOT_ATTEMPTED;
-}
-
static inline int pnfs_return_layout(struct inode *ino)
{
return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80bd74f..e18d842 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
mempool_free(p, nfs_wdata_mempool);
}

-static void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_write_data *wdata)
{
put_lseg(wdata->lseg);
put_nfs_open_context(wdata->args.context);
@@ -887,25 +887,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req,

static int nfs_do_write(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = data->args.context->path.dentry->d_inode;

- if (lseg != NULL) {
- data->lseg = get_lseg(lseg);
- if (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)
- return 0;
- put_lseg(data->lseg);
- data->lseg = NULL;
- }
-
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}

static int nfs_do_multiple_writes(struct list_head *head,
const struct rpc_call_ops *call_ops,
- struct pnfs_layout_segment *lseg,
int how)
{
struct nfs_write_data *data;
@@ -917,7 +907,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
data = list_entry(head->next, struct nfs_write_data, list);
list_del_init(&data->list);

- ret2 = nfs_do_write(data, call_ops, lseg, how);
+ ret2 = nfs_do_write(data, call_ops, how);
if (ret == 0)
ret = ret2;
}
@@ -1037,23 +1027,24 @@ out:
return ret;
}

-int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_flush_multi(desc, head);
+ return nfs_flush_one(desc, head);
+}
+
+static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
LIST_HEAD(head);
int ret;

- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- ret = nfs_flush_multi(desc, &head);
- else
- ret = nfs_flush_one(desc, &head);
+ ret = nfs_generic_flush(desc, &head);
if (ret == 0)
ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
- desc->pg_lseg, desc->pg_ioflags);
- put_lseg(desc->pg_lseg);
- desc->pg_lseg = NULL;
+ desc->pg_ioflags);
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);

static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_test = nfs_generic_pg_test,
@@ -1068,6 +1059,12 @@ void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write_mds);

+void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
+{
+ pgio->pg_ops = &nfs_pageio_write_ops;
+ pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
+}
+
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 0a48f84..e2791a2 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -108,9 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);

-extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-
-
/*
* Lock the page of an asynchronous request without getting a new reference
*/
--
1.7.6


2011-07-12 19:30:08

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 3/8] NFS: Cache rpc_ops in struct nfs_pageio_descriptor

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/read.c | 19 ++++++++-----------
fs/nfs/write.c | 20 +++++++-------------
include/linux/nfs_page.h | 1 +
3 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d30d6de..b2e9aad 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -327,6 +327,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
ClearPageError(page);
+ desc->pg_rpc_callops = &nfs_read_partial_ops;
return ret;
out_bad:
while (!list_empty(res)) {
@@ -366,6 +367,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *

nfs_read_rpcsetup(req, data, desc->pg_count, 0);
list_add(&data->list, res);
+ desc->pg_rpc_callops = &nfs_read_full_ops;
out:
return ret;
}
@@ -375,19 +377,14 @@ int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
LIST_HEAD(head);
int ret;

- if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
ret = nfs_pagein_multi(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_reads(&head,
- &nfs_read_partial_ops,
- desc->pg_lseg);
- } else {
+ else
ret = nfs_pagein_one(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_reads(&head,
- &nfs_read_full_ops,
- desc->pg_lseg);
- }
+
+ if (ret == 0)
+ ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops,
+ desc->pg_lseg);
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 89cc687..8fa7def 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -975,6 +975,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
offset += len;
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
+ desc->pg_rpc_callops = &nfs_write_partial_ops;
return ret;

out_bad:
@@ -1031,6 +1032,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
/* Set up the argument struct */
nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
list_add(&data->list, res);
+ desc->pg_rpc_callops = &nfs_write_full_ops;
out:
return ret;
}
@@ -1040,21 +1042,13 @@ int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
LIST_HEAD(head);
int ret;

- if (desc->pg_bsize < PAGE_CACHE_SIZE) {
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
ret = nfs_flush_multi(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_writes(&head,
- &nfs_write_partial_ops,
- desc->pg_lseg,
- desc->pg_ioflags);
- } else {
+ else
ret = nfs_flush_one(desc, &head);
- if (ret == 0)
- ret = nfs_do_multiple_writes(&head,
- &nfs_write_full_ops,
- desc->pg_lseg,
- desc->pg_ioflags);
- }
+ if (ret == 0)
+ ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
+ desc->pg_lseg, desc->pg_ioflags);
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 9ac2dd1..db3194f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -74,6 +74,7 @@ struct nfs_pageio_descriptor {
const struct nfs_pageio_ops *pg_ops;
int pg_ioflags;
int pg_error;
+ const struct rpc_call_ops *pg_rpc_callops;
struct pnfs_layout_segment *pg_lseg;
};

--
1.7.6


2011-07-12 19:30:09

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 5/8] NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed

If an attempt to do pNFS fails, and we have to fall back to writing through
the MDS, then we may want to re-coalesce the requests that we already have
since the block size for the MDS read/writes may be different to that of
the DS read/writes.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pagelist.c | 57 +++++++++++++++++++++++++++++++++++++++++++--
include/linux/nfs_page.h | 3 +-
2 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d421e19..7139dbf 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -240,6 +240,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_bsize = bsize;
desc->pg_base = 0;
desc->pg_moreio = 0;
+ desc->pg_recoalesce = 0;
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
desc->pg_ioflags = io_flags;
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
*/
-int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_error < 0)
return 0;
desc->pg_moreio = 0;
+ if (desc->pg_recoalesce)
+ return 0;
}
return 1;
}

+static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
+{
+ LIST_HEAD(head);
+
+ do {
+ list_splice_init(&desc->pg_list, &head);
+ desc->pg_bytes_written -= desc->pg_count;
+ desc->pg_count = 0;
+ desc->pg_base = 0;
+ desc->pg_recoalesce = 0;
+
+ while (!list_empty(&head)) {
+ struct nfs_page *req;
+
+ req = list_first_entry(&head, struct nfs_page, wb_list);
+ nfs_list_remove_request(req);
+ if (__nfs_pageio_add_request(desc, req))
+ continue;
+ if (desc->pg_error < 0)
+ return 0;
+ break;
+ }
+ } while (desc->pg_recoalesce);
+ return 1;
+}
+
+int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *req)
+{
+ int ret;
+
+ do {
+ ret = __nfs_pageio_add_request(desc, req);
+ if (ret)
+ break;
+ if (desc->pg_error < 0)
+ break;
+ ret = nfs_do_recoalesce(desc);
+ } while (ret);
+ return ret;
+}
+
/**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
* @desc: pointer to io descriptor
*/
void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
{
- nfs_pageio_doio(desc);
+ for (;;) {
+ nfs_pageio_doio(desc);
+ if (!desc->pg_recoalesce)
+ break;
+ if (!nfs_do_recoalesce(desc))
+ break;
+ }
}

/**
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
if (!list_empty(&desc->pg_list)) {
struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
if (index != prev->wb_index + 1)
- nfs_pageio_doio(desc);
+ nfs_pageio_complete(desc);
}
}

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index db3194f..7241b2a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -68,7 +68,8 @@ struct nfs_pageio_descriptor {
size_t pg_count;
size_t pg_bsize;
unsigned int pg_base;
- char pg_moreio;
+ unsigned char pg_moreio : 1,
+ pg_recoalesce : 1;

struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
--
1.7.6