2011-06-06 22:32:54

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 1/3] NFSv4.1: Fix some issues with pnfs_generic_pg_test

1. If the intention is to coalesce requests 'prev' and 'req' then we
have to ensure at least that we have a layout starting at
req_offset(prev).

2. If we're only requesting a minimal layout of length desc->pg_count,
we need to test the length actually returned by the server before
we allow the coalescing to occur.

3. We need to deal correctly with (pgio->lseg == NULL)

4. Fixup the test guarding the pnfs_update_layout.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/objlayout/objio_osd.c | 3 +++
fs/nfs/pnfs.c | 12 +++++++-----
2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9cf208d..4c41a60 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1001,6 +1001,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
if (!pnfs_generic_pg_test(pgio, prev, req))
return false;

+ if (pgio->pg_lseg == NULL)
+ return true;
+
return pgio->pg_count + req->wb_bytes <=
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8c1309d..12b53ef 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1059,19 +1059,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
gfp_flags = GFP_NOFS;
}

- if (pgio->pg_count == prev->wb_bytes) {
+ if (pgio->pg_lseg == NULL) {
+ if (pgio->pg_count != prev->wb_bytes)
+ return true;
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
- req_offset(req),
+ req_offset(prev),
pgio->pg_count,
access_type,
gfp_flags);
- return true;
+ if (pgio->pg_lseg == NULL)
+ return true;
}

- if (pgio->pg_lseg &&
- req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
+ if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
pgio->pg_lseg->pls_range.length))
return false;

--
1.7.5.2



2011-06-06 22:32:55

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

We need to ensure that the layouts are set up before we can decide to
coalesce requests. To do so, we want to further split up the struct
nfs_pageio_descriptor operations into an initialisation callback, a
coalescing test callback, and a 'do i/o' callback.

This patch cleans up the existing callback methods before adding the
'initialisation' callback.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4filelayout.c | 15 +++++++++++++--
fs/nfs/objlayout/objio_osd.c | 13 ++++++++++++-
fs/nfs/pagelist.c | 12 +++++-------
fs/nfs/pnfs.h | 29 ++++++++++++++++++++++-------
fs/nfs/read.c | 42 +++++++++++++++++++++++++++++++-----------
fs/nfs/write.c | 27 +++++++++++++++++++++++----
include/linux/nfs_page.h | 17 ++++++++++++++---
7 files changed, 120 insertions(+), 35 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4269088..e9b9b82 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -654,7 +654,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
* return true : coalesce page
* return false : don't coalesce page
*/
-bool
+static bool
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
@@ -676,6 +676,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return (p_stripe == r_stripe);
}

+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+ .pg_test = filelayout_pg_test,
+ .pg_doio = nfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+ .pg_test = filelayout_pg_test,
+ .pg_doio = nfs_generic_pg_writepages,
+};
+
static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
{
return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -873,7 +883,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.owner = THIS_MODULE,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
- .pg_test = filelayout_pg_test,
+ .pg_read_ops = &filelayout_pg_read_ops,
+ .pg_write_ops = &filelayout_pg_write_ops,
.mark_pnfs_commit = filelayout_mark_pnfs_commit,
.choose_commit_list = filelayout_choose_commit_list,
.commit_pagelist = filelayout_commit_pagelist,
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4c41a60..31088f3 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1008,6 +1008,16 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}

+static const struct nfs_pageio_ops objio_pg_read_ops = {
+ .pg_test = objio_pg_test,
+ .pg_doio = nfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops objio_pg_write_ops = {
+ .pg_test = objio_pg_test,
+ .pg_doio = nfs_generic_pg_writepages,
+};
+
static struct pnfs_layoutdriver_type objlayout_type = {
.id = LAYOUT_OSD2_OBJECTS,
.name = "LAYOUT_OSD2_OBJECTS",
@@ -1021,7 +1031,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {

.read_pagelist = objlayout_read_pagelist,
.write_pagelist = objlayout_write_pagelist,
- .pg_test = objio_pg_test,
+ .pg_read_ops = &objio_pg_read_ops,
+ .pg_write_ops = &objio_pg_write_ops,

.free_deviceid_node = objio_free_deviceid_node,

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961..2b71ad0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
TASK_UNINTERRUPTIBLE);
}

-static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
{
/*
* FIXME: ideally we should be able to coalesce all requests
@@ -229,7 +229,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
*/
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct nfs_pageio_descriptor *),
+ const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int io_flags)
{
@@ -240,12 +240,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_base = 0;
desc->pg_moreio = 0;
desc->pg_inode = inode;
- desc->pg_doio = doio;
+ desc->pg_ops = pg_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
- desc->pg_test = nfs_generic_pg_test;
- pnfs_pageio_init(desc, inode);
}

/**
@@ -275,7 +273,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return false;
- return pgio->pg_test(pgio, prev, req);
+ return pgio->pg_ops->pg_test(pgio, prev, req);
}

/**
@@ -310,7 +308,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
- int error = desc->pg_doio(desc);
+ int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
else
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e..8d063c0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
void (*free_lseg) (struct pnfs_layout_segment *lseg);

/* test for nfs page cache coalescing */
- bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+ const struct nfs_pageio_ops *pg_read_ops;
+ const struct nfs_pageio_ops *pg_write_ops;

/* Returns true if layoutdriver wants to divert this request to
* driver's commit routine.
@@ -292,13 +293,22 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}

-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+static inline const struct nfs_pageio_ops *pnfs_get_read_ops(struct inode *inode)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;

- if (ld)
- pgio->pg_test = ld->pg_test;
+ if (!ld)
+ return NULL;
+ return ld->pg_read_ops;
+}
+
+static inline const struct nfs_pageio_ops *pnfs_get_write_ops(struct inode *inode)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+ if (!ld)
+ return NULL;
+ return ld->pg_write_ops;
}

#else /* CONFIG_NFS_V4_1 */
@@ -384,9 +394,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}

-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+static inline const struct nfs_pageio_ops *pnfs_get_read_ops(struct inode *inode)
{
+ return NULL;
+}
+
+static inline const struct nfs_pageio_ops *pnfs_get_write_ops(struct inode *inode)
+{
+ return NULL;
}

static inline void
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 20a7f95..b0f5c19 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -32,6 +32,7 @@

static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
+static const struct nfs_pageio_ops nfs_pageio_read_ops;
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;

@@ -113,6 +114,19 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}

+static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode)
+{
+ size_t rsize = NFS_SERVER(inode)->rsize;
+ const struct nfs_pageio_ops *ops;
+
+ ops = pnfs_get_read_ops(inode);
+ if (ops == NULL)
+ ops = &nfs_pageio_read_ops;
+
+ nfs_pageio_init(pgio, inode, ops, rsize, 0);
+}
+
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@@ -131,14 +145,11 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);

- nfs_pageio_init(&pgio, inode, NULL, 0, 0);
+ nfs_pageio_init_read(&pgio, inode);
nfs_list_add_request(new, &pgio.pg_list);
pgio.pg_count = len;

- if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
- nfs_pagein_multi(&pgio);
- else
- nfs_pagein_one(&pgio);
+ nfs_pageio_complete(&pgio);
return 0;
}

@@ -365,6 +376,20 @@ out:
return ret;
}

+int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_pagein_multi(desc);
+ return nfs_pagein_one(desc);
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
+
+
+static const struct nfs_pageio_ops nfs_pageio_read_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_readpages,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
@@ -635,8 +660,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
.pgio = &pgio,
};
struct inode *inode = mapping->host;
- struct nfs_server *server = NFS_SERVER(inode);
- size_t rsize = server->rsize;
unsigned long npages;
int ret = -ESTALE;

@@ -664,10 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */

- if (rsize < PAGE_CACHE_SIZE)
- nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
- else
- nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
+ nfs_pageio_init_read(&pgio, inode);

ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e268e3b..c379c86 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -977,6 +977,11 @@ out_bad:
return -ENOMEM;
}

+static const struct nfs_pageio_ops nfs_flush_multi_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_flush_multi,
+};
+
/*
* Create an RPC task for the given write request and kick it.
* The page must have been locked by the caller.
@@ -1031,15 +1036,29 @@ out:
return ret;
}

+int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_flush_multi(desc);
+ return nfs_flush_one(desc);
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);
+
+static const struct nfs_pageio_ops nfs_pageio_write_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_writepages,
+};
+
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
size_t wsize = NFS_SERVER(inode)->wsize;
+ const struct nfs_pageio_ops *ops;
+ ops = pnfs_get_write_ops(inode);
+ if (ops == NULL)
+ ops = &nfs_pageio_write_ops;

- if (wsize < PAGE_CACHE_SIZE)
- nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
- else
- nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
+ nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, wsize, ioflags);
}

/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 3a34e80..b0e26c0 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -55,6 +55,12 @@ struct nfs_page {
struct nfs_writeverf wb_verf; /* Commit cookie */
};

+struct nfs_pageio_descriptor;
+struct nfs_pageio_ops {
+ bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+ int (*pg_doio)(struct nfs_pageio_descriptor *);
+};
+
struct nfs_pageio_descriptor {
struct list_head pg_list;
unsigned long pg_bytes_written;
@@ -64,11 +70,10 @@ struct nfs_pageio_descriptor {
char pg_moreio;

struct inode *pg_inode;
- int (*pg_doio)(struct nfs_pageio_descriptor *);
+ const struct nfs_pageio_ops *pg_ops;
int pg_ioflags;
int pg_error;
struct pnfs_layout_segment *pg_lseg;
- bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
};

#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -85,18 +90,24 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
pgoff_t idx_start, unsigned int npages, int tag);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct nfs_pageio_descriptor *desc),
+ const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
struct nfs_page *);
extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
+extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *prev,
+ struct nfs_page *req);
extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);

+extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
+extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
+

/*
* Lock the page of an asynchronous request without getting a new reference
--
1.7.5.2


2011-06-06 22:32:55

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 3/3] NFSv4.1: Add an initialisation callback for pNFS

Ensure that we always get a layout before setting up the i/o request.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4filelayout.c | 2 +
fs/nfs/objlayout/objio_osd.c | 2 +
fs/nfs/pagelist.c | 2 +
fs/nfs/pnfs.c | 57 ++++++++++++++++++++++-------------------
fs/nfs/pnfs.h | 14 +---------
fs/nfs/read.c | 16 +++---------
fs/nfs/write.c | 12 ++------
include/linux/nfs_page.h | 1 +
8 files changed, 47 insertions(+), 59 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e9b9b82..93c5bae1f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -677,11 +677,13 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
}

static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+ .pg_init = pnfs_generic_pg_init_read,
.pg_test = filelayout_pg_test,
.pg_doio = nfs_generic_pg_readpages,
};

static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+ .pg_init = pnfs_generic_pg_init_write,
.pg_test = filelayout_pg_test,
.pg_doio = nfs_generic_pg_writepages,
};
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 31088f3..84b5fb7 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1009,11 +1009,13 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
}

static const struct nfs_pageio_ops objio_pg_read_ops = {
+ .pg_init = pnfs_generic_pg_init_read,
.pg_test = objio_pg_test,
.pg_doio = nfs_generic_pg_readpages,
};

static const struct nfs_pageio_ops objio_pg_write_ops = {
+ .pg_init = pnfs_generic_pg_init_write,
.pg_test = objio_pg_test,
.pg_doio = nfs_generic_pg_writepages,
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 2b71ad0..ed68d78 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -295,6 +295,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
return 0;
} else {
desc->pg_base = req->wb_pgbase;
+ if (desc->pg_ops->pg_init)
+ desc->pg_ops->pg_init(desc, req);
}
nfs_list_remove_request(req);
nfs_list_add_request(req, &desc->pg_list);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 12b53ef..dec4533 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -900,7 +900,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
* Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller.
*/
-struct pnfs_layout_segment *
+static struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -1043,35 +1043,40 @@ out_forget_reply:
goto out;
}

+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_READ,
+ GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
+
+void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_RW,
+ GFP_NOFS);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
bool
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- enum pnfs_iomode access_type;
- gfp_t gfp_flags;
-
- /* We assume that pg_ioflags == 0 iff we're reading a page */
- if (pgio->pg_ioflags == 0) {
- access_type = IOMODE_READ;
- gfp_flags = GFP_KERNEL;
- } else {
- access_type = IOMODE_RW;
- gfp_flags = GFP_NOFS;
- }
-
- if (pgio->pg_lseg == NULL) {
- if (pgio->pg_count != prev->wb_bytes)
- return true;
- /* This is first coelesce call for a series of nfs_pages */
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- prev->wb_context,
- req_offset(prev),
- pgio->pg_count,
- access_type,
- gfp_flags);
- if (pgio->pg_lseg == NULL)
- return true;
- }
+ if (pgio->pg_lseg == NULL)
+ return nfs_generic_pg_test(pgio, prev, req);

if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
pgio->pg_lseg->pls_range.length))
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 8d063c0..b1babde 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -149,16 +149,14 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
-struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- loff_t pos, u64 count, enum pnfs_iomode access_type,
- gfp_t gfp_flags);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
const struct rpc_call_ops *, int);
enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
const struct rpc_call_ops *);
+void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -331,14 +329,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
{
}

-static inline struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- loff_t pos, u64 count, enum pnfs_iomode access_type,
- gfp_t gfp_flags)
-{
- return NULL;
-}
-
static inline enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b0f5c19..5b65338 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -146,9 +146,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
zero_user_segment(page, len, PAGE_CACHE_SIZE);

nfs_pageio_init_read(&pgio, inode);
- nfs_list_add_request(new, &pgio.pg_list);
- pgio.pg_count = len;
-
+ nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
return 0;
}
@@ -280,7 +278,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
LIST_HEAD(list);

nfs_list_remove_request(req);
@@ -298,10 +296,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);

- BUG_ON(desc->pg_lseg != NULL);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_READ, GFP_KERNEL);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
@@ -335,6 +329,8 @@ out_bad:
}
SetPageError(page);
nfs_readpage_release(req);
+ put_lseg(lseg);
+ desc->pg_lseg = NULL;
return -ENOMEM;
}

@@ -363,10 +359,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
- if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_READ, GFP_KERNEL);

ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c379c86..eb56829 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -914,7 +914,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
LIST_HEAD(list);

nfs_list_remove_request(req);
@@ -938,10 +938,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);

- BUG_ON(desc->pg_lseg);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_RW, GFP_NOFS);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
@@ -974,6 +970,8 @@ out_bad:
nfs_writedata_free(data);
}
nfs_redirty_request(req);
+ put_lseg(lseg);
+ desc->pg_lseg = NULL;
return -ENOMEM;
}

@@ -1019,10 +1017,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
- if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_RW, GFP_NOFS);

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index b0e26c0..2927ecd 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -57,6 +57,7 @@ struct nfs_page {

struct nfs_pageio_descriptor;
struct nfs_pageio_ops {
+ void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
int (*pg_doio)(struct nfs_pageio_descriptor *);
};
--
1.7.5.2


2011-06-08 02:24:21

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 1/3] NFSv4.1: Fix some issues with pnfs_generic_pg_test

On 2011-06-07 21:12, Trond Myklebust wrote:
> On Tue, 2011-06-07 at 20:53 -0400, Benny Halevy wrote:
>> On 2011-06-06 18:32, Trond Myklebust wrote:
>>> 1. If the intention is to coalesce requests 'prev' and 'req' then we
>>> have to ensure at least that we have a layout starting at
>>> req_offset(prev).
>>>
>>> 2. If we're only requesting a minimal layout of length desc->pg_count,
>>> we need to test the length actually returned by the server before
>>> we allow the coalescing to occur.
>>>
>>> 3. We need to deal correctly with (pgio->lseg == NULL)
>>>
>>> 4. Fixup the test guarding the pnfs_update_layout.
>>>
>>> Signed-off-by: Trond Myklebust <[email protected]>
>>> ---
>>> fs/nfs/objlayout/objio_osd.c | 3 +++
>>> fs/nfs/pnfs.c | 12 +++++++-----
>>> 2 files changed, 10 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
>>> index 9cf208d..4c41a60 100644
>>> --- a/fs/nfs/objlayout/objio_osd.c
>>> +++ b/fs/nfs/objlayout/objio_osd.c
>>> @@ -1001,6 +1001,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
>>> if (!pnfs_generic_pg_test(pgio, prev, req))
>>> return false;
>>>
>>> + if (pgio->pg_lseg == NULL)
>>> + return true;
>>> +
>>> return pgio->pg_count + req->wb_bytes <=
>>> OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
>>> }
>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>> index 8c1309d..12b53ef 100644
>>> --- a/fs/nfs/pnfs.c
>>> +++ b/fs/nfs/pnfs.c
>>> @@ -1059,19 +1059,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>>> gfp_flags = GFP_NOFS;
>>> }
>>>
>>> - if (pgio->pg_count == prev->wb_bytes) {
>>> + if (pgio->pg_lseg == NULL) {
>>> + if (pgio->pg_count != prev->wb_bytes)
>>> + return true;
>>> /* This is first coelesce call for a series of nfs_pages */
>>> pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>> prev->wb_context,
>>> - req_offset(req),
>>> + req_offset(prev),
>>> pgio->pg_count,
>>> access_type,
>>> gfp_flags);
>>> - return true;
>>> + if (pgio->pg_lseg == NULL)
>>> + return true;
>>> }
>>>
>>> - if (pgio->pg_lseg &&
>>> - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
>>> + if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
>>
>> One more issue to fix: the condition should be for ">=", not ">"
>
> Hmm.... Shouldn't it rather be:
>
> if (end_offset(req_offset(req), req->wb_bytes) > end_offset(pgio->pg_lseg->pls_range.offset,
> pgio->pg_lseg->pls_range.length))
> return false;
>
> Otherwise you don't know if the entire request fits in this layout
> segment...

True.
Though since we make sure the layout segments are page aligned
having the first offset covered is enough, this is the correct
way to express the condition.

Benny

>
> Cheers
> Trond

2011-06-09 16:31:24

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 1/3] NFSv4.1: Fix some issues with pnfs_generic_pg_test

On Tue, 2011-06-07 at 22:24 -0400, Benny Halevy wrote:
> On 2011-06-07 21:12, Trond Myklebust wrote:
> > On Tue, 2011-06-07 at 20:53 -0400, Benny Halevy wrote:
> >> On 2011-06-06 18:32, Trond Myklebust wrote:
> >>> 1. If the intention is to coalesce requests 'prev' and 'req' then we
> >>> have to ensure at least that we have a layout starting at
> >>> req_offset(prev).
> >>>
> >>> 2. If we're only requesting a minimal layout of length desc->pg_count,
> >>> we need to test the length actually returned by the server before
> >>> we allow the coalescing to occur.
> >>>
> >>> 3. We need to deal correctly with (pgio->lseg == NULL)
> >>>
> >>> 4. Fixup the test guarding the pnfs_update_layout.
> >>>
> >>> Signed-off-by: Trond Myklebust <[email protected]>
> >>> ---
> >>> fs/nfs/objlayout/objio_osd.c | 3 +++
> >>> fs/nfs/pnfs.c | 12 +++++++-----
> >>> 2 files changed, 10 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> >>> index 9cf208d..4c41a60 100644
> >>> --- a/fs/nfs/objlayout/objio_osd.c
> >>> +++ b/fs/nfs/objlayout/objio_osd.c
> >>> @@ -1001,6 +1001,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> >>> if (!pnfs_generic_pg_test(pgio, prev, req))
> >>> return false;
> >>>
> >>> + if (pgio->pg_lseg == NULL)
> >>> + return true;
> >>> +
> >>> return pgio->pg_count + req->wb_bytes <=
> >>> OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
> >>> }
> >>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> >>> index 8c1309d..12b53ef 100644
> >>> --- a/fs/nfs/pnfs.c
> >>> +++ b/fs/nfs/pnfs.c
> >>> @@ -1059,19 +1059,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> >>> gfp_flags = GFP_NOFS;
> >>> }
> >>>
> >>> - if (pgio->pg_count == prev->wb_bytes) {
> >>> + if (pgio->pg_lseg == NULL) {
> >>> + if (pgio->pg_count != prev->wb_bytes)
> >>> + return true;
> >>> /* This is first coelesce call for a series of nfs_pages */
> >>> pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> >>> prev->wb_context,
> >>> - req_offset(req),
> >>> + req_offset(prev),
> >>> pgio->pg_count,
> >>> access_type,
> >>> gfp_flags);
> >>> - return true;
> >>> + if (pgio->pg_lseg == NULL)
> >>> + return true;
> >>> }
> >>>
> >>> - if (pgio->pg_lseg &&
> >>> - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
> >>> + if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
> >>
> >> One more issue to fix: the condition should be for ">=", not ">"
> >
> > Hmm.... Shouldn't it rather be:
> >
> > if (end_offset(req_offset(req), req->wb_bytes) > end_offset(pgio->pg_lseg->pls_range.offset,
> > pgio->pg_lseg->pls_range.length))
> > return false;
> >
> > Otherwise you don't know if the entire request fits in this layout
> > segment...
>
> True.
> Though since we make sure the layout segments are page aligned
> having the first offset covered is enough, this is the correct
> way to express the condition.

Ugh... That definitely would require a comment, and probably deserves a
helper function of its own.

Also, the name 'end_offset()' is rather confusing. It really is the
offset of the first byte that lies _outside_ the actual layout segment.

--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com


2011-06-09 18:44:03

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 1/3] NFSv4.1: Fix some issues with pnfs_generic_pg_test

On 2011-06-09 12:31, Trond Myklebust wrote:
> On Tue, 2011-06-07 at 22:24 -0400, Benny Halevy wrote:
>> On 2011-06-07 21:12, Trond Myklebust wrote:
>>> On Tue, 2011-06-07 at 20:53 -0400, Benny Halevy wrote:
>>>> On 2011-06-06 18:32, Trond Myklebust wrote:
>>>>> 1. If the intention is to coalesce requests 'prev' and 'req' then we
>>>>> have to ensure at least that we have a layout starting at
>>>>> req_offset(prev).
>>>>>
>>>>> 2. If we're only requesting a minimal layout of length desc->pg_count,
>>>>> we need to test the length actually returned by the server before
>>>>> we allow the coalescing to occur.
>>>>>
>>>>> 3. We need to deal correctly with (pgio->lseg == NULL)
>>>>>
>>>>> 4. Fixup the test guarding the pnfs_update_layout.
>>>>>
>>>>> Signed-off-by: Trond Myklebust <[email protected]>
>>>>> ---
>>>>> fs/nfs/objlayout/objio_osd.c | 3 +++
>>>>> fs/nfs/pnfs.c | 12 +++++++-----
>>>>> 2 files changed, 10 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
>>>>> index 9cf208d..4c41a60 100644
>>>>> --- a/fs/nfs/objlayout/objio_osd.c
>>>>> +++ b/fs/nfs/objlayout/objio_osd.c
>>>>> @@ -1001,6 +1001,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
>>>>> if (!pnfs_generic_pg_test(pgio, prev, req))
>>>>> return false;
>>>>>
>>>>> + if (pgio->pg_lseg == NULL)
>>>>> + return true;
>>>>> +
>>>>> return pgio->pg_count + req->wb_bytes <=
>>>>> OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
>>>>> }
>>>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>>>> index 8c1309d..12b53ef 100644
>>>>> --- a/fs/nfs/pnfs.c
>>>>> +++ b/fs/nfs/pnfs.c
>>>>> @@ -1059,19 +1059,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
>>>>> gfp_flags = GFP_NOFS;
>>>>> }
>>>>>
>>>>> - if (pgio->pg_count == prev->wb_bytes) {
>>>>> + if (pgio->pg_lseg == NULL) {
>>>>> + if (pgio->pg_count != prev->wb_bytes)
>>>>> + return true;
>>>>> /* This is first coelesce call for a series of nfs_pages */
>>>>> pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>>>> prev->wb_context,
>>>>> - req_offset(req),
>>>>> + req_offset(prev),
>>>>> pgio->pg_count,
>>>>> access_type,
>>>>> gfp_flags);
>>>>> - return true;
>>>>> + if (pgio->pg_lseg == NULL)
>>>>> + return true;
>>>>> }
>>>>>
>>>>> - if (pgio->pg_lseg &&
>>>>> - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
>>>>> + if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
>>>>
>>>> One more issue to fix: the condition should be for ">=", not ">"
>>>
>>> Hmm.... Shouldn't it rather be:
>>>
>>> if (end_offset(req_offset(req), req->wb_bytes) > end_offset(pgio->pg_lseg->pls_range.offset,
>>> pgio->pg_lseg->pls_range.length))
>>> return false;
>>>
>>> Otherwise you don't know if the entire request fits in this layout
>>> segment...
>>
>> True.
>> Though since we make sure the layout segments are page aligned
>> having the first offset covered is enough, this is the correct
>> way to express the condition.
>
> Ugh... That definitely would require a comment, and probably deserves a
> helper function of its own.
>

Agreed. I'm totally with you on your proposal.

> Also, the name 'end_offset()' is rather confusing. It really is the
> offset of the first byte that lies _outside_ the actual layout segment.
>

Correct. I do not mind changing it to a better name if you have something
in mind.

Benny

2011-06-08 00:51:25

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On Tue, 2011-06-07 at 20:28 -0400, Benny Halevy wrote:
> On 2011-06-06 18:32, Trond Myklebust wrote:
> > We need to ensure that the layouts are set up before we can decide to
> > coalesce requests. To do so, we want to further split up the struct
> > nfs_pageio_descriptor operations into an initialisation callback, a
> > coalescing test callback, and a 'do i/o' callback.
> >
> > This patch cleans up the existing callback methods before adding the
> > 'initialisation' callback.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > +
> > static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
> > struct inode *inode, int ioflags)
> > {
> > size_t wsize = NFS_SERVER(inode)->wsize;
> > + const struct nfs_pageio_ops *ops;
> > + ops = pnfs_get_write_ops(inode);
> > + if (ops == NULL)
> > + ops = &nfs_pageio_write_ops;
> >
> > - if (wsize < PAGE_CACHE_SIZE)
> > - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
> > - else
> > - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
> > + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, wsize, ioflags);
>
> Trond, this should be:
> nfs_pageio_init(pgio, inode, ops, wsize, ioflags);
>
> Benny

Yep. Fixed in the patch...

Thanks!
Trond

--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com


2011-06-08 01:13:05

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 1/3] NFSv4.1: Fix some issues with pnfs_generic_pg_test

On Tue, 2011-06-07 at 20:53 -0400, Benny Halevy wrote:
> On 2011-06-06 18:32, Trond Myklebust wrote:
> > 1. If the intention is to coalesce requests 'prev' and 'req' then we
> > have to ensure at least that we have a layout starting at
> > req_offset(prev).
> >
> > 2. If we're only requesting a minimal layout of length desc->pg_count,
> > we need to test the length actually returned by the server before
> > we allow the coalescing to occur.
> >
> > 3. We need to deal correctly with (pgio->lseg == NULL)
> >
> > 4. Fixup the test guarding the pnfs_update_layout.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > fs/nfs/objlayout/objio_osd.c | 3 +++
> > fs/nfs/pnfs.c | 12 +++++++-----
> > 2 files changed, 10 insertions(+), 5 deletions(-)
> >
> > diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> > index 9cf208d..4c41a60 100644
> > --- a/fs/nfs/objlayout/objio_osd.c
> > +++ b/fs/nfs/objlayout/objio_osd.c
> > @@ -1001,6 +1001,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> > if (!pnfs_generic_pg_test(pgio, prev, req))
> > return false;
> >
> > + if (pgio->pg_lseg == NULL)
> > + return true;
> > +
> > return pgio->pg_count + req->wb_bytes <=
> > OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
> > }
> > diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> > index 8c1309d..12b53ef 100644
> > --- a/fs/nfs/pnfs.c
> > +++ b/fs/nfs/pnfs.c
> > @@ -1059,19 +1059,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> > gfp_flags = GFP_NOFS;
> > }
> >
> > - if (pgio->pg_count == prev->wb_bytes) {
> > + if (pgio->pg_lseg == NULL) {
> > + if (pgio->pg_count != prev->wb_bytes)
> > + return true;
> > /* This is first coelesce call for a series of nfs_pages */
> > pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> > prev->wb_context,
> > - req_offset(req),
> > + req_offset(prev),
> > pgio->pg_count,
> > access_type,
> > gfp_flags);
> > - return true;
> > + if (pgio->pg_lseg == NULL)
> > + return true;
> > }
> >
> > - if (pgio->pg_lseg &&
> > - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
> > + if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
>
> One more issue to fix: the condition should be for ">=", not ">"

Hmm.... Shouldn't it rather be:

if (end_offset(req_offset(req), req->wb_bytes) > end_offset(pgio->pg_lseg->pls_range.offset,
pgio->pg_lseg->pls_range.length))
return false;

Otherwise you don't know if the entire request fits in this layout
segment...

Cheers
Trond
--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com


2011-06-08 02:30:36

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On 2011-06-06 18:32, Trond Myklebust wrote:
> We need to ensure that the layouts are set up before we can decide to
> coalesce requests. To do so, we want to further split up the struct
> nfs_pageio_descriptor operations into an initialisation callback, a
> coalescing test callback, and a 'do i/o' callback.
>
> This patch cleans up the existing callback methods before adding the
> 'initialisation' callback.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/nfs4filelayout.c | 15 +++++++++++++--
> fs/nfs/objlayout/objio_osd.c | 13 ++++++++++++-
> fs/nfs/pagelist.c | 12 +++++-------
> fs/nfs/pnfs.h | 29 ++++++++++++++++++++++-------
> fs/nfs/read.c | 42 +++++++++++++++++++++++++++++++-----------
> fs/nfs/write.c | 27 +++++++++++++++++++++++----
> include/linux/nfs_page.h | 17 ++++++++++++++---
> 7 files changed, 120 insertions(+), 35 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 4269088..e9b9b82 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -654,7 +654,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
> * return true : coalesce page
> * return false : don't coalesce page
> */
> -bool
> +static bool
> filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> struct nfs_page *req)
> {
> @@ -676,6 +676,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> return (p_stripe == r_stripe);
> }
>
> +static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> + .pg_test = filelayout_pg_test,
> + .pg_doio = nfs_generic_pg_readpages,
> +};
> +
> +static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> + .pg_test = filelayout_pg_test,
> + .pg_doio = nfs_generic_pg_writepages,
> +};
> +
> static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
> {
> return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
> @@ -873,7 +883,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
> .owner = THIS_MODULE,
> .alloc_lseg = filelayout_alloc_lseg,
> .free_lseg = filelayout_free_lseg,
> - .pg_test = filelayout_pg_test,
> + .pg_read_ops = &filelayout_pg_read_ops,
> + .pg_write_ops = &filelayout_pg_write_ops,
> .mark_pnfs_commit = filelayout_mark_pnfs_commit,
> .choose_commit_list = filelayout_choose_commit_list,
> .commit_pagelist = filelayout_commit_pagelist,
> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> index 4c41a60..31088f3 100644
> --- a/fs/nfs/objlayout/objio_osd.c
> +++ b/fs/nfs/objlayout/objio_osd.c
> @@ -1008,6 +1008,16 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
> }
>
> +static const struct nfs_pageio_ops objio_pg_read_ops = {
> + .pg_test = objio_pg_test,
> + .pg_doio = nfs_generic_pg_readpages,
> +};
> +
> +static const struct nfs_pageio_ops objio_pg_write_ops = {
> + .pg_test = objio_pg_test,
> + .pg_doio = nfs_generic_pg_writepages,

Hmm, for the non-files layouts we should ignore the server's wsize
for pnfs I/O.

How about using the follwing?

int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
return desc->pg_lseg ? nfs_pagein_one(desc) : nfs_generic_pg_readpages(desc);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);

int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
return desc->pg_lseg ? nfs_flush_one(desc) : nfs_generic_pg_writepages(desc);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);

Benny

> +};
> +
> static struct pnfs_layoutdriver_type objlayout_type = {
> .id = LAYOUT_OSD2_OBJECTS,
> .name = "LAYOUT_OSD2_OBJECTS",
> @@ -1021,7 +1031,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
>
> .read_pagelist = objlayout_read_pagelist,
> .write_pagelist = objlayout_write_pagelist,
> - .pg_test = objio_pg_test,
> + .pg_read_ops = &objio_pg_read_ops,
> + .pg_write_ops = &objio_pg_write_ops,
>
> .free_deviceid_node = objio_free_deviceid_node,
>
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index 7913961..2b71ad0 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
> TASK_UNINTERRUPTIBLE);
> }
>
> -static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
> +bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
> {
> /*
> * FIXME: ideally we should be able to coalesce all requests
> @@ -229,7 +229,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
> */
> void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> struct inode *inode,
> - int (*doio)(struct nfs_pageio_descriptor *),
> + const struct nfs_pageio_ops *pg_ops,
> size_t bsize,
> int io_flags)
> {
> @@ -240,12 +240,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> desc->pg_base = 0;
> desc->pg_moreio = 0;
> desc->pg_inode = inode;
> - desc->pg_doio = doio;
> + desc->pg_ops = pg_ops;
> desc->pg_ioflags = io_flags;
> desc->pg_error = 0;
> desc->pg_lseg = NULL;
> - desc->pg_test = nfs_generic_pg_test;
> - pnfs_pageio_init(desc, inode);
> }
>
> /**
> @@ -275,7 +273,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
> return false;
> if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
> return false;
> - return pgio->pg_test(pgio, prev, req);
> + return pgio->pg_ops->pg_test(pgio, prev, req);
> }
>
> /**
> @@ -310,7 +308,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
> static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
> {
> if (!list_empty(&desc->pg_list)) {
> - int error = desc->pg_doio(desc);
> + int error = desc->pg_ops->pg_doio(desc);
> if (error < 0)
> desc->pg_error = error;
> else
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 48d0a8e..8d063c0 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
> void (*free_lseg) (struct pnfs_layout_segment *lseg);
>
> /* test for nfs page cache coalescing */
> - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
> + const struct nfs_pageio_ops *pg_read_ops;
> + const struct nfs_pageio_ops *pg_write_ops;
>
> /* Returns true if layoutdriver wants to divert this request to
> * driver's commit routine.
> @@ -292,13 +293,22 @@ static inline int pnfs_return_layout(struct inode *ino)
> return 0;
> }
>
> -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
> - struct inode *inode)
> +static inline const struct nfs_pageio_ops *pnfs_get_read_ops(struct inode *inode)
> {
> struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
>
> - if (ld)
> - pgio->pg_test = ld->pg_test;
> + if (!ld)
> + return NULL;
> + return ld->pg_read_ops;
> +}
> +
> +static inline const struct nfs_pageio_ops *pnfs_get_write_ops(struct inode *inode)
> +{
> + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
> +
> + if (!ld)
> + return NULL;
> + return ld->pg_write_ops;
> }
>
> #else /* CONFIG_NFS_V4_1 */
> @@ -384,9 +394,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
> {
> }
>
> -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
> - struct inode *inode)
> +static inline const struct nfs_pageio_ops *pnfs_get_read_ops(struct inode *inode)
> {
> + return NULL;
> +}
> +
> +static inline const struct nfs_pageio_ops *pnfs_get_write_ops(struct inode *inode)
> +{
> + return NULL;
> }
>
> static inline void
> diff --git a/fs/nfs/read.c b/fs/nfs/read.c
> index 20a7f95..b0f5c19 100644
> --- a/fs/nfs/read.c
> +++ b/fs/nfs/read.c
> @@ -32,6 +32,7 @@
>
> static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
> static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
> +static const struct nfs_pageio_ops nfs_pageio_read_ops;
> static const struct rpc_call_ops nfs_read_partial_ops;
> static const struct rpc_call_ops nfs_read_full_ops;
>
> @@ -113,6 +114,19 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
> }
> }
>
> +static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
> + struct inode *inode)
> +{
> + size_t rsize = NFS_SERVER(inode)->rsize;
> + const struct nfs_pageio_ops *ops;
> +
> + ops = pnfs_get_read_ops(inode);
> + if (ops == NULL)
> + ops = &nfs_pageio_read_ops;
> +
> + nfs_pageio_init(pgio, inode, ops, rsize, 0);
> +}
> +
> int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
> struct page *page)
> {
> @@ -131,14 +145,11 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
> if (len < PAGE_CACHE_SIZE)
> zero_user_segment(page, len, PAGE_CACHE_SIZE);
>
> - nfs_pageio_init(&pgio, inode, NULL, 0, 0);
> + nfs_pageio_init_read(&pgio, inode);
> nfs_list_add_request(new, &pgio.pg_list);
> pgio.pg_count = len;
>
> - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
> - nfs_pagein_multi(&pgio);
> - else
> - nfs_pagein_one(&pgio);
> + nfs_pageio_complete(&pgio);
> return 0;
> }
>
> @@ -365,6 +376,20 @@ out:
> return ret;
> }
>
> +int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> +{
> + if (desc->pg_bsize < PAGE_CACHE_SIZE)
> + return nfs_pagein_multi(desc);
> + return nfs_pagein_one(desc);
> +}
> +EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
> +
> +
> +static const struct nfs_pageio_ops nfs_pageio_read_ops = {
> + .pg_test = nfs_generic_pg_test,
> + .pg_doio = nfs_generic_pg_readpages,
> +};
> +
> /*
> * This is the callback from RPC telling us whether a reply was
> * received or some error occurred (timeout or socket shutdown).
> @@ -635,8 +660,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
> .pgio = &pgio,
> };
> struct inode *inode = mapping->host;
> - struct nfs_server *server = NFS_SERVER(inode);
> - size_t rsize = server->rsize;
> unsigned long npages;
> int ret = -ESTALE;
>
> @@ -664,10 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
> if (ret == 0)
> goto read_complete; /* all pages were read */
>
> - if (rsize < PAGE_CACHE_SIZE)
> - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
> - else
> - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
> + nfs_pageio_init_read(&pgio, inode);
>
> ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
>
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index e268e3b..c379c86 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -977,6 +977,11 @@ out_bad:
> return -ENOMEM;
> }
>
> +static const struct nfs_pageio_ops nfs_flush_multi_ops = {
> + .pg_test = nfs_generic_pg_test,
> + .pg_doio = nfs_flush_multi,
> +};
> +
> /*
> * Create an RPC task for the given write request and kick it.
> * The page must have been locked by the caller.
> @@ -1031,15 +1036,29 @@ out:
> return ret;
> }
>
> +int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
> +{
> + if (desc->pg_bsize < PAGE_CACHE_SIZE)
> + return nfs_flush_multi(desc);
> + return nfs_flush_one(desc);
> +}
> +EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);
> +
> +static const struct nfs_pageio_ops nfs_pageio_write_ops = {
> + .pg_test = nfs_generic_pg_test,
> + .pg_doio = nfs_generic_pg_writepages,
> +};
> +
> static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
> struct inode *inode, int ioflags)
> {
> size_t wsize = NFS_SERVER(inode)->wsize;
> + const struct nfs_pageio_ops *ops;
> + ops = pnfs_get_write_ops(inode);
> + if (ops == NULL)
> + ops = &nfs_pageio_write_ops;
>
> - if (wsize < PAGE_CACHE_SIZE)
> - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
> - else
> - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
> + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, wsize, ioflags);
> }
>
> /*
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 3a34e80..b0e26c0 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -55,6 +55,12 @@ struct nfs_page {
> struct nfs_writeverf wb_verf; /* Commit cookie */
> };
>
> +struct nfs_pageio_descriptor;
> +struct nfs_pageio_ops {
> + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
> + int (*pg_doio)(struct nfs_pageio_descriptor *);
> +};
> +
> struct nfs_pageio_descriptor {
> struct list_head pg_list;
> unsigned long pg_bytes_written;
> @@ -64,11 +70,10 @@ struct nfs_pageio_descriptor {
> char pg_moreio;
>
> struct inode *pg_inode;
> - int (*pg_doio)(struct nfs_pageio_descriptor *);
> + const struct nfs_pageio_ops *pg_ops;
> int pg_ioflags;
> int pg_error;
> struct pnfs_layout_segment *pg_lseg;
> - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
> };
>
> #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
> @@ -85,18 +90,24 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
> pgoff_t idx_start, unsigned int npages, int tag);
> extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> struct inode *inode,
> - int (*doio)(struct nfs_pageio_descriptor *desc),
> + const struct nfs_pageio_ops *pg_ops,
> size_t bsize,
> int how);
> extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
> struct nfs_page *);
> extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
> extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
> +extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
> + struct nfs_page *prev,
> + struct nfs_page *req);
> extern int nfs_wait_on_request(struct nfs_page *);
> extern void nfs_unlock_request(struct nfs_page *req);
> extern int nfs_set_page_tag_locked(struct nfs_page *req);
> extern void nfs_clear_page_tag_locked(struct nfs_page *req);
>
> +extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> +extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
> +
>
> /*
> * Lock the page of an asynchronous request without getting a new reference

2011-06-09 18:13:07

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On Thu, 2011-06-09 at 10:51 -0700, Boaz Harrosh wrote:

> 1. We will then need to go through the ld in .pg_init

No. I'll fix nfs_pageio_init_read() and friends by replacing
pnfs_get_read_ops() by something more appropriate.

> 2. What happens in the none pnfs-IO error case pg_bsize will need to be
> saved and restored to MDS value.

Yes, but if you are falling back to read/write-through-mds, then you
need to re-run the coalescing _anyway_, since the total length of the
request needs to fit in an rsize/wsize sized request.

> 3. At least in objects there is no such constant limit, it all depends on
> the layout, start and end. I thought the all point of .pg_test was
> exactly for avoiding a constant pg_bsize. (This is what we had before)
> 4. All "the tests make no sense..." should be moved to the no-pnfs case
> please point these you found out, we'll need to fix them.
>
> Please understand that for none-files layouts pg_bsize is when IO goes
> through MDS only.

As I said, I don't see how fallback to MDS can work correctly today for
the objects case for arbitrary values of rsize/wsize.

Cheers
Trond
--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com


2011-06-08 00:28:20

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On 2011-06-06 18:32, Trond Myklebust wrote:
> We need to ensure that the layouts are set up before we can decide to
> coalesce requests. To do so, we want to further split up the struct
> nfs_pageio_descriptor operations into an initialisation callback, a
> coalescing test callback, and a 'do i/o' callback.
>
> This patch cleans up the existing callback methods before adding the
> 'initialisation' callback.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/nfs4filelayout.c | 15 +++++++++++++--
> fs/nfs/objlayout/objio_osd.c | 13 ++++++++++++-
> fs/nfs/pagelist.c | 12 +++++-------
> fs/nfs/pnfs.h | 29 ++++++++++++++++++++++-------
> fs/nfs/read.c | 42 +++++++++++++++++++++++++++++++-----------
> fs/nfs/write.c | 27 +++++++++++++++++++++++----
> include/linux/nfs_page.h | 17 ++++++++++++++---
> 7 files changed, 120 insertions(+), 35 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 4269088..e9b9b82 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -654,7 +654,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
> * return true : coalesce page
> * return false : don't coalesce page
> */
> -bool
> +static bool
> filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> struct nfs_page *req)
> {
> @@ -676,6 +676,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> return (p_stripe == r_stripe);
> }
>
> +static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> + .pg_test = filelayout_pg_test,
> + .pg_doio = nfs_generic_pg_readpages,
> +};
> +
> +static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> + .pg_test = filelayout_pg_test,
> + .pg_doio = nfs_generic_pg_writepages,
> +};
> +
> static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
> {
> return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
> @@ -873,7 +883,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
> .owner = THIS_MODULE,
> .alloc_lseg = filelayout_alloc_lseg,
> .free_lseg = filelayout_free_lseg,
> - .pg_test = filelayout_pg_test,
> + .pg_read_ops = &filelayout_pg_read_ops,
> + .pg_write_ops = &filelayout_pg_write_ops,
> .mark_pnfs_commit = filelayout_mark_pnfs_commit,
> .choose_commit_list = filelayout_choose_commit_list,
> .commit_pagelist = filelayout_commit_pagelist,
> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> index 4c41a60..31088f3 100644
> --- a/fs/nfs/objlayout/objio_osd.c
> +++ b/fs/nfs/objlayout/objio_osd.c
> @@ -1008,6 +1008,16 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
> }
>
> +static const struct nfs_pageio_ops objio_pg_read_ops = {
> + .pg_test = objio_pg_test,
> + .pg_doio = nfs_generic_pg_readpages,
> +};
> +
> +static const struct nfs_pageio_ops objio_pg_write_ops = {
> + .pg_test = objio_pg_test,
> + .pg_doio = nfs_generic_pg_writepages,
> +};
> +
> static struct pnfs_layoutdriver_type objlayout_type = {
> .id = LAYOUT_OSD2_OBJECTS,
> .name = "LAYOUT_OSD2_OBJECTS",
> @@ -1021,7 +1031,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
>
> .read_pagelist = objlayout_read_pagelist,
> .write_pagelist = objlayout_write_pagelist,
> - .pg_test = objio_pg_test,
> + .pg_read_ops = &objio_pg_read_ops,
> + .pg_write_ops = &objio_pg_write_ops,
>
> .free_deviceid_node = objio_free_deviceid_node,
>
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index 7913961..2b71ad0 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
> TASK_UNINTERRUPTIBLE);
> }
>
> -static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
> +bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
> {
> /*
> * FIXME: ideally we should be able to coalesce all requests
> @@ -229,7 +229,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
> */
> void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> struct inode *inode,
> - int (*doio)(struct nfs_pageio_descriptor *),
> + const struct nfs_pageio_ops *pg_ops,
> size_t bsize,
> int io_flags)
> {
> @@ -240,12 +240,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> desc->pg_base = 0;
> desc->pg_moreio = 0;
> desc->pg_inode = inode;
> - desc->pg_doio = doio;
> + desc->pg_ops = pg_ops;
> desc->pg_ioflags = io_flags;
> desc->pg_error = 0;
> desc->pg_lseg = NULL;
> - desc->pg_test = nfs_generic_pg_test;
> - pnfs_pageio_init(desc, inode);
> }
>
> /**
> @@ -275,7 +273,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
> return false;
> if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
> return false;
> - return pgio->pg_test(pgio, prev, req);
> + return pgio->pg_ops->pg_test(pgio, prev, req);
> }
>
> /**
> @@ -310,7 +308,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
> static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
> {
> if (!list_empty(&desc->pg_list)) {
> - int error = desc->pg_doio(desc);
> + int error = desc->pg_ops->pg_doio(desc);
> if (error < 0)
> desc->pg_error = error;
> else
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 48d0a8e..8d063c0 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
> void (*free_lseg) (struct pnfs_layout_segment *lseg);
>
> /* test for nfs page cache coalescing */
> - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
> + const struct nfs_pageio_ops *pg_read_ops;
> + const struct nfs_pageio_ops *pg_write_ops;
>
> /* Returns true if layoutdriver wants to divert this request to
> * driver's commit routine.
> @@ -292,13 +293,22 @@ static inline int pnfs_return_layout(struct inode *ino)
> return 0;
> }
>
> -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
> - struct inode *inode)
> +static inline const struct nfs_pageio_ops *pnfs_get_read_ops(struct inode *inode)
> {
> struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
>
> - if (ld)
> - pgio->pg_test = ld->pg_test;
> + if (!ld)
> + return NULL;
> + return ld->pg_read_ops;
> +}
> +
> +static inline const struct nfs_pageio_ops *pnfs_get_write_ops(struct inode *inode)
> +{
> + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
> +
> + if (!ld)
> + return NULL;
> + return ld->pg_write_ops;
> }
>
> #else /* CONFIG_NFS_V4_1 */
> @@ -384,9 +394,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
> {
> }
>
> -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
> - struct inode *inode)
> +static inline const struct nfs_pageio_ops *pnfs_get_read_ops(struct inode *inode)
> {
> + return NULL;
> +}
> +
> +static inline const struct nfs_pageio_ops *pnfs_get_write_ops(struct inode *inode)
> +{
> + return NULL;
> }
>
> static inline void
> diff --git a/fs/nfs/read.c b/fs/nfs/read.c
> index 20a7f95..b0f5c19 100644
> --- a/fs/nfs/read.c
> +++ b/fs/nfs/read.c
> @@ -32,6 +32,7 @@
>
> static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
> static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
> +static const struct nfs_pageio_ops nfs_pageio_read_ops;
> static const struct rpc_call_ops nfs_read_partial_ops;
> static const struct rpc_call_ops nfs_read_full_ops;
>
> @@ -113,6 +114,19 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
> }
> }
>
> +static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
> + struct inode *inode)
> +{
> + size_t rsize = NFS_SERVER(inode)->rsize;
> + const struct nfs_pageio_ops *ops;
> +
> + ops = pnfs_get_read_ops(inode);
> + if (ops == NULL)
> + ops = &nfs_pageio_read_ops;
> +
> + nfs_pageio_init(pgio, inode, ops, rsize, 0);
> +}
> +
> int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
> struct page *page)
> {
> @@ -131,14 +145,11 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
> if (len < PAGE_CACHE_SIZE)
> zero_user_segment(page, len, PAGE_CACHE_SIZE);
>
> - nfs_pageio_init(&pgio, inode, NULL, 0, 0);
> + nfs_pageio_init_read(&pgio, inode);
> nfs_list_add_request(new, &pgio.pg_list);
> pgio.pg_count = len;
>
> - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
> - nfs_pagein_multi(&pgio);
> - else
> - nfs_pagein_one(&pgio);
> + nfs_pageio_complete(&pgio);
> return 0;
> }
>
> @@ -365,6 +376,20 @@ out:
> return ret;
> }
>
> +int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> +{
> + if (desc->pg_bsize < PAGE_CACHE_SIZE)
> + return nfs_pagein_multi(desc);
> + return nfs_pagein_one(desc);
> +}
> +EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
> +
> +
> +static const struct nfs_pageio_ops nfs_pageio_read_ops = {
> + .pg_test = nfs_generic_pg_test,
> + .pg_doio = nfs_generic_pg_readpages,
> +};
> +
> /*
> * This is the callback from RPC telling us whether a reply was
> * received or some error occurred (timeout or socket shutdown).
> @@ -635,8 +660,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
> .pgio = &pgio,
> };
> struct inode *inode = mapping->host;
> - struct nfs_server *server = NFS_SERVER(inode);
> - size_t rsize = server->rsize;
> unsigned long npages;
> int ret = -ESTALE;
>
> @@ -664,10 +687,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
> if (ret == 0)
> goto read_complete; /* all pages were read */
>
> - if (rsize < PAGE_CACHE_SIZE)
> - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
> - else
> - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
> + nfs_pageio_init_read(&pgio, inode);
>
> ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
>
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index e268e3b..c379c86 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -977,6 +977,11 @@ out_bad:
> return -ENOMEM;
> }
>
> +static const struct nfs_pageio_ops nfs_flush_multi_ops = {
> + .pg_test = nfs_generic_pg_test,
> + .pg_doio = nfs_flush_multi,
> +};
> +
> /*
> * Create an RPC task for the given write request and kick it.
> * The page must have been locked by the caller.
> @@ -1031,15 +1036,29 @@ out:
> return ret;
> }
>
> +int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
> +{
> + if (desc->pg_bsize < PAGE_CACHE_SIZE)
> + return nfs_flush_multi(desc);
> + return nfs_flush_one(desc);
> +}
> +EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);
> +
> +static const struct nfs_pageio_ops nfs_pageio_write_ops = {
> + .pg_test = nfs_generic_pg_test,
> + .pg_doio = nfs_generic_pg_writepages,
> +};
> +
> static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
> struct inode *inode, int ioflags)
> {
> size_t wsize = NFS_SERVER(inode)->wsize;
> + const struct nfs_pageio_ops *ops;
> + ops = pnfs_get_write_ops(inode);
> + if (ops == NULL)
> + ops = &nfs_pageio_write_ops;
>
> - if (wsize < PAGE_CACHE_SIZE)
> - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
> - else
> - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
> + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, wsize, ioflags);

Trond, this should be:
nfs_pageio_init(pgio, inode, ops, wsize, ioflags);

Benny

> }
>
> /*
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 3a34e80..b0e26c0 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -55,6 +55,12 @@ struct nfs_page {
> struct nfs_writeverf wb_verf; /* Commit cookie */
> };
>
> +struct nfs_pageio_descriptor;
> +struct nfs_pageio_ops {
> + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
> + int (*pg_doio)(struct nfs_pageio_descriptor *);
> +};
> +
> struct nfs_pageio_descriptor {
> struct list_head pg_list;
> unsigned long pg_bytes_written;
> @@ -64,11 +70,10 @@ struct nfs_pageio_descriptor {
> char pg_moreio;
>
> struct inode *pg_inode;
> - int (*pg_doio)(struct nfs_pageio_descriptor *);
> + const struct nfs_pageio_ops *pg_ops;
> int pg_ioflags;
> int pg_error;
> struct pnfs_layout_segment *pg_lseg;
> - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
> };
>
> #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
> @@ -85,18 +90,24 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
> pgoff_t idx_start, unsigned int npages, int tag);
> extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> struct inode *inode,
> - int (*doio)(struct nfs_pageio_descriptor *desc),
> + const struct nfs_pageio_ops *pg_ops,
> size_t bsize,
> int how);
> extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
> struct nfs_page *);
> extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
> extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
> +extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
> + struct nfs_page *prev,
> + struct nfs_page *req);
> extern int nfs_wait_on_request(struct nfs_page *);
> extern void nfs_unlock_request(struct nfs_page *req);
> extern int nfs_set_page_tag_locked(struct nfs_page *req);
> extern void nfs_clear_page_tag_locked(struct nfs_page *req);
>
> +extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
> +extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
> +
>
> /*
> * Lock the page of an asynchronous request without getting a new reference

2011-06-08 00:53:26

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 1/3] NFSv4.1: Fix some issues with pnfs_generic_pg_test

On 2011-06-06 18:32, Trond Myklebust wrote:
> 1. If the intention is to coalesce requests 'prev' and 'req' then we
> have to ensure at least that we have a layout starting at
> req_offset(prev).
>
> 2. If we're only requesting a minimal layout of length desc->pg_count,
> we need to test the length actually returned by the server before
> we allow the coalescing to occur.
>
> 3. We need to deal correctly with (pgio->lseg == NULL)
>
> 4. Fixup the test guarding the pnfs_update_layout.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/objlayout/objio_osd.c | 3 +++
> fs/nfs/pnfs.c | 12 +++++++-----
> 2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> index 9cf208d..4c41a60 100644
> --- a/fs/nfs/objlayout/objio_osd.c
> +++ b/fs/nfs/objlayout/objio_osd.c
> @@ -1001,6 +1001,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> if (!pnfs_generic_pg_test(pgio, prev, req))
> return false;
>
> + if (pgio->pg_lseg == NULL)
> + return true;
> +
> return pgio->pg_count + req->wb_bytes <=
> OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
> }
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 8c1309d..12b53ef 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1059,19 +1059,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> gfp_flags = GFP_NOFS;
> }
>
> - if (pgio->pg_count == prev->wb_bytes) {
> + if (pgio->pg_lseg == NULL) {
> + if (pgio->pg_count != prev->wb_bytes)
> + return true;
> /* This is first coelesce call for a series of nfs_pages */
> pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> prev->wb_context,
> - req_offset(req),
> + req_offset(prev),
> pgio->pg_count,
> access_type,
> gfp_flags);
> - return true;
> + if (pgio->pg_lseg == NULL)
> + return true;
> }
>
> - if (pgio->pg_lseg &&
> - req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
> + if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,

One more issue to fix: the condition should be for ">=", not ">"

Benny

> pgio->pg_lseg->pls_range.length))
> return false;
>

2011-06-09 17:51:09

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On 06/09/2011 09:37 AM, Trond Myklebust wrote:
>> How about using the follwing?
>>
>> int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
>> {
>> return desc->pg_lseg ? nfs_pagein_one(desc) : nfs_generic_pg_readpages(desc);
>> }
>> EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
>>
>> int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
>> {
>> return desc->pg_lseg ? nfs_flush_one(desc) : nfs_generic_pg_writepages(desc);
>> }
>> EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
>>
>> Benny
>
> I'd prefer to have the objects and blocks set desc->pg_bsize correctly
> according to their requirements. Otherwise, some of the tests make no
> sense...
>

1. We will then need to go through the ld in .pg_init
2. What happens in the none pnfs-IO error case pg_bsize will need to be
saved and restored to MDS value.
3. At least in objects there is no such constant limit, it all depends on
the layout, start and end. I thought the all point of .pg_test was
exactly for avoiding a constant pg_bsize. (This is what we had before)
4. All "the tests make no sense..." should be moved to the no-pnfs case
please point these you found out, we'll need to fix them.

Please understand that for none-files layouts pg_bsize is when IO goes
through MDS only.

Thanks
Boaz

2011-06-09 18:58:22

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On 2011-06-09 14:13, Trond Myklebust wrote:
> On Thu, 2011-06-09 at 10:51 -0700, Boaz Harrosh wrote:
>
>> 1. We will then need to go through the ld in .pg_init
>
> No. I'll fix nfs_pageio_init_read() and friends by replacing
> pnfs_get_read_ops() by something more appropriate.
>

OK, I just push the pnfs-block stuff out so you can see what I did
over your previous version so I'll rebase over the new version once
you send it.

>> 2. What happens in the none pnfs-IO error case pg_bsize will need to be
>> saved and restored to MDS value.
>
> Yes, but if you are falling back to read/write-through-mds, then you
> need to re-run the coalescing _anyway_, since the total length of the
> request needs to fit in an rsize/wsize sized request.
>

Right. So we will need a place holder to keep a separate value for
minimum OSD's pg_bsize vs. the MDS's (like what we used to have for DS's
[rw]size in the past)

Benny

>> 3. At least in objects there is no such constant limit, it all depends on
>> the layout, start and end. I thought the all point of .pg_test was
>> exactly for avoiding a constant pg_bsize. (This is what we had before)
>> 4. All "the tests make no sense..." should be moved to the no-pnfs case
>> please point these you found out, we'll need to fix them.
>>
>> Please understand that for none-files layouts pg_bsize is when IO goes
>> through MDS only.
>
> As I said, I don't see how fallback to MDS can work correctly today for
> the objects case for arbitrary values of rsize/wsize.
>
> Cheers
> Trond

2011-06-09 16:37:22

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On Tue, 2011-06-07 at 22:30 -0400, Benny Halevy wrote:
> On 2011-06-06 18:32, Trond Myklebust wrote:
> > We need to ensure that the layouts are set up before we can decide to
> > coalesce requests. To do so, we want to further split up the struct
> > nfs_pageio_descriptor operations into an initialisation callback, a
> > coalescing test callback, and a 'do i/o' callback.
> >
> > This patch cleans up the existing callback methods before adding the
> > 'initialisation' callback.
> >
> > Signed-off-by: Trond Myklebust <[email protected]>
> > ---
> > fs/nfs/nfs4filelayout.c | 15 +++++++++++++--
> > fs/nfs/objlayout/objio_osd.c | 13 ++++++++++++-
> > fs/nfs/pagelist.c | 12 +++++-------
> > fs/nfs/pnfs.h | 29 ++++++++++++++++++++++-------
> > fs/nfs/read.c | 42 +++++++++++++++++++++++++++++++-----------
> > fs/nfs/write.c | 27 +++++++++++++++++++++++----
> > include/linux/nfs_page.h | 17 ++++++++++++++---
> > 7 files changed, 120 insertions(+), 35 deletions(-)
> >
> > diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> > index 4269088..e9b9b82 100644
> > --- a/fs/nfs/nfs4filelayout.c
> > +++ b/fs/nfs/nfs4filelayout.c
> > @@ -654,7 +654,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
> > * return true : coalesce page
> > * return false : don't coalesce page
> > */
> > -bool
> > +static bool
> > filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> > struct nfs_page *req)
> > {
> > @@ -676,6 +676,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
> > return (p_stripe == r_stripe);
> > }
> >
> > +static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> > + .pg_test = filelayout_pg_test,
> > + .pg_doio = nfs_generic_pg_readpages,
> > +};
> > +
> > +static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> > + .pg_test = filelayout_pg_test,
> > + .pg_doio = nfs_generic_pg_writepages,
> > +};
> > +
> > static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
> > {
> > return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
> > @@ -873,7 +883,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
> > .owner = THIS_MODULE,
> > .alloc_lseg = filelayout_alloc_lseg,
> > .free_lseg = filelayout_free_lseg,
> > - .pg_test = filelayout_pg_test,
> > + .pg_read_ops = &filelayout_pg_read_ops,
> > + .pg_write_ops = &filelayout_pg_write_ops,
> > .mark_pnfs_commit = filelayout_mark_pnfs_commit,
> > .choose_commit_list = filelayout_choose_commit_list,
> > .commit_pagelist = filelayout_commit_pagelist,
> > diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
> > index 4c41a60..31088f3 100644
> > --- a/fs/nfs/objlayout/objio_osd.c
> > +++ b/fs/nfs/objlayout/objio_osd.c
> > @@ -1008,6 +1008,16 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
> > OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
> > }
> >
> > +static const struct nfs_pageio_ops objio_pg_read_ops = {
> > + .pg_test = objio_pg_test,
> > + .pg_doio = nfs_generic_pg_readpages,
> > +};
> > +
> > +static const struct nfs_pageio_ops objio_pg_write_ops = {
> > + .pg_test = objio_pg_test,
> > + .pg_doio = nfs_generic_pg_writepages,
>
> Hmm, for the non-files layouts we should ignore the server's wsize
> for pnfs I/O.
>
> How about using the follwing?
>
> int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> {
> return desc->pg_lseg ? nfs_pagein_one(desc) : nfs_generic_pg_readpages(desc);
> }
> EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
>
> int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
> {
> return desc->pg_lseg ? nfs_flush_one(desc) : nfs_generic_pg_writepages(desc);
> }
> EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
>
> Benny

I'd prefer to have the objects and blocks set desc->pg_bsize correctly
according to their requirements. Otherwise, some of the tests make no
sense...

--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com


2011-06-09 21:31:59

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 2/3] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

On 06/09/2011 11:58 AM, Benny Halevy wrote:
> On 2011-06-09 14:13, Trond Myklebust wrote:
>> On Thu, 2011-06-09 at 10:51 -0700, Boaz Harrosh wrote:
>>
>>> 1. We will then need to go through the ld in .pg_init
>>
>> No. I'll fix nfs_pageio_init_read() and friends by replacing
>> pnfs_get_read_ops() by something more appropriate.
>>
>
> OK, I just push the pnfs-block stuff out so you can see what I did
> over your previous version so I'll rebase over the new version once
> you send it.
>
>>> 2. What happens in the none pnfs-IO error case pg_bsize will need to be
>>> saved and restored to MDS value.
>>
>> Yes, but if you are falling back to read/write-through-mds, then you
>> need to re-run the coalescing _anyway_, since the total length of the
>> request needs to fit in an rsize/wsize sized request.
>>
>
> Right. So we will need a place holder to keep a separate value for
> minimum OSD's pg_bsize vs. the MDS's (like what we used to have for DS's
> [rw]size in the past)
>

There is no minimum OSD pg_bsize. Just ~0 would be good.

I still don't see what is the fuss? Why can't we just not care about
[rw]size in the LD case and files-layout can take care of itself?
Then pg_bsize is always the MDS value.

One thing I still did not understand in the files-layout case is
that if you are striping over, lets say 3 DSs. And each DS has a
limit of wsize. Than is it not allowed to write "3 * wsize" in total?

I understand that in the sparse case you will need to break up your
IO to a strip_unit each because of the gap in the addressing. but
what about the dense case. Or is it the opposite, I always get the
dense and the sparse mixed up.

> Benny
>
>>> 3. At least in objects there is no such constant limit, it all depends on
>>> the layout, start and end. I thought the all point of .pg_test was
>>> exactly for avoiding a constant pg_bsize. (This is what we had before)
>>> 4. All "the tests make no sense..." should be moved to the no-pnfs case
>>> please point these you found out, we'll need to fix them.
>>>
>>> Please understand that for none-files layouts pg_bsize is when IO goes
>>> through MDS only.
>>
>> As I said, I don't see how fallback to MDS can work correctly today for
>> the objects case for arbitrary values of rsize/wsize.
>>

That's fine then. For the error case we redo the coalescing with the ops
changed to generic nfs and we should be good right?

I'll be testing for these cases in Bakeathone with error injection patches.

>> Cheers
>> Trond

Thanks
Boaz