2011-06-10 01:31:07

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 1/5] NFSv4.1: Fix some issues with pnfs_generic_pg_test

1. If the intention is to coalesce requests 'prev' and 'req' then we
have to ensure at least that we have a layout starting at
req_offset(prev).

2. If we're only requesting a minimal layout of length desc->pg_count,
we need to test the length actually returned by the server before
we allow the coalescing to occur.

3. We need to deal correctly with (pgio->lseg == NULL)

4. Fixup the test guarding the pnfs_update_layout.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/objlayout/objio_osd.c | 3 +++
fs/nfs/pnfs.c | 12 +++++++-----
2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9cf208d..4c41a60 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1001,6 +1001,9 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
if (!pnfs_generic_pg_test(pgio, prev, req))
return false;

+ if (pgio->pg_lseg == NULL)
+ return true;
+
return pgio->pg_count + req->wb_bytes <=
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8c1309d..12b53ef 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1059,19 +1059,21 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
gfp_flags = GFP_NOFS;
}

- if (pgio->pg_count == prev->wb_bytes) {
+ if (pgio->pg_lseg == NULL) {
+ if (pgio->pg_count != prev->wb_bytes)
+ return true;
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
- req_offset(req),
+ req_offset(prev),
pgio->pg_count,
access_type,
gfp_flags);
- return true;
+ if (pgio->pg_lseg == NULL)
+ return true;
}

- if (pgio->pg_lseg &&
- req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
+ if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
pgio->pg_lseg->pls_range.length))
return false;

--
1.7.5.2



2011-06-10 04:06:33

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On 2011-06-09 22:53, Boaz Harrosh wrote:
> On 06/09/2011 06:31 PM, Trond Myklebust wrote:
> <snip>
>> +void
>> +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>> +{
>> + BUG_ON(pgio->pg_lseg != NULL);
>> +
>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>> + req->wb_context,
>> + req_offset(req),
>> + req->wb_bytes,
>> + IOMODE_READ,
>> + GFP_KERNEL);
>> +}
>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
>> +
>> +void
>> +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>> +{
>> + BUG_ON(pgio->pg_lseg != NULL);
>> +
>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>> + req->wb_context,
>> + req_offset(req),
>> + req->wb_bytes,
>> + IOMODE_RW,
>> + GFP_NOFS);
>> +}
>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
>> +
>
> These two above are identical except the IOMODE_{READ,RW} variable.

And the respective gfp flags...

> Why don't you just have one and let the caller pass the IOMODE_ as a
> 3rd parameter. Do you expect more code to be added here?
>
> Boaz
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2011-06-10 01:31:08

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 2/5] NFSv4.1: Fix an off-by-one error in pnfs_generic_pg_test

And document what is going on there...

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 21 ++++++++++++++++-----
1 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 12b53ef..9a4ce7c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1073,11 +1073,22 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return true;
}

- if (req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
- pgio->pg_lseg->pls_range.length))
- return false;
-
- return true;
+ /*
+ * Test if a nfs_page is fully contained in the pnfs_layout_range.
+ * Note that this test makes several assumptions:
+ * - that the previous nfs_page in the struct nfs_pageio_descriptor
+ * is known to lie within the range.
+ * - that the nfs_page being tested is known to be contiguous with the
+ * previous nfs_page.
+ * - Layout ranges are page aligned, so we only have to test the
+ * start offset of the request.
+ *
+ * Please also note that 'end_offset' is actually the offset of the
+ * first byte that lies outside the pnfs_layout_range. FIXME?
+ *
+ */
+ return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
+ pgio->pg_lseg->pls_range.length);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);

--
1.7.5.2


2011-06-10 01:31:08

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 3/5] NFS: Cleanup of the nfs_pageio code in preparation for a pnfs bugfix

We need to ensure that the layouts are set up before we can decide to
coalesce requests. To do so, we want to further split up the struct
nfs_pageio_descriptor operations into an initialisation callback, a
coalescing test callback, and a 'do i/o' callback.

This patch cleans up the existing callback methods before adding the
'initialisation' callback.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4filelayout.c | 15 ++++++++++++-
fs/nfs/objlayout/objio_osd.c | 13 +++++++++++-
fs/nfs/pagelist.c | 12 ++++------
fs/nfs/pnfs.c | 24 ++++++++++++++++++++++
fs/nfs/pnfs.h | 25 ++++++++++++-----------
fs/nfs/read.c | 44 +++++++++++++++++++++++++++++++----------
fs/nfs/write.c | 33 +++++++++++++++++++++++++-----
include/linux/nfs_page.h | 17 +++++++++++++--
8 files changed, 141 insertions(+), 42 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4269088..e9b9b82 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -654,7 +654,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
* return true : coalesce page
* return false : don't coalesce page
*/
-bool
+static bool
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
@@ -676,6 +676,16 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
return (p_stripe == r_stripe);
}

+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+ .pg_test = filelayout_pg_test,
+ .pg_doio = nfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+ .pg_test = filelayout_pg_test,
+ .pg_doio = nfs_generic_pg_writepages,
+};
+
static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
{
return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -873,7 +883,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.owner = THIS_MODULE,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
- .pg_test = filelayout_pg_test,
+ .pg_read_ops = &filelayout_pg_read_ops,
+ .pg_write_ops = &filelayout_pg_write_ops,
.mark_pnfs_commit = filelayout_mark_pnfs_commit,
.choose_commit_list = filelayout_choose_commit_list,
.commit_pagelist = filelayout_commit_pagelist,
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4c41a60..31088f3 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1008,6 +1008,16 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}

+static const struct nfs_pageio_ops objio_pg_read_ops = {
+ .pg_test = objio_pg_test,
+ .pg_doio = nfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops objio_pg_write_ops = {
+ .pg_test = objio_pg_test,
+ .pg_doio = nfs_generic_pg_writepages,
+};
+
static struct pnfs_layoutdriver_type objlayout_type = {
.id = LAYOUT_OSD2_OBJECTS,
.name = "LAYOUT_OSD2_OBJECTS",
@@ -1021,7 +1031,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {

.read_pagelist = objlayout_read_pagelist,
.write_pagelist = objlayout_write_pagelist,
- .pg_test = objio_pg_test,
+ .pg_read_ops = &objio_pg_read_ops,
+ .pg_write_ops = &objio_pg_write_ops,

.free_deviceid_node = objio_free_deviceid_node,

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961..2b71ad0 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,7 @@ nfs_wait_on_request(struct nfs_page *req)
TASK_UNINTERRUPTIBLE);
}

-static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
{
/*
* FIXME: ideally we should be able to coalesce all requests
@@ -229,7 +229,7 @@ static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_p
*/
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct nfs_pageio_descriptor *),
+ const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int io_flags)
{
@@ -240,12 +240,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_base = 0;
desc->pg_moreio = 0;
desc->pg_inode = inode;
- desc->pg_doio = doio;
+ desc->pg_ops = pg_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
- desc->pg_test = nfs_generic_pg_test;
- pnfs_pageio_init(desc, inode);
}

/**
@@ -275,7 +273,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return false;
- return pgio->pg_test(pgio, prev, req);
+ return pgio->pg_ops->pg_test(pgio, prev, req);
}

/**
@@ -310,7 +308,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
- int error = desc->pg_doio(desc);
+ int error = desc->pg_ops->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 9a4ce7c..68349dc 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1044,6 +1044,30 @@ out_forget_reply:
}

bool
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+ if (ld == NULL)
+ return false;
+ nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+ return true;
+}
+
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+ if (ld == NULL)
+ return false;
+ nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+ return true;
+}
+
+bool
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e..c5f1d8c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
void (*free_lseg) (struct pnfs_layout_segment *lseg);

/* test for nfs page cache coalescing */
- bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+ const struct nfs_pageio_ops *pg_read_ops;
+ const struct nfs_pageio_ops *pg_write_ops;

/* Returns true if layoutdriver wants to divert this request to
* driver's commit routine.
@@ -152,6 +153,10 @@ struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
loff_t pos, u64 count, enum pnfs_iomode access_type,
gfp_t gfp_flags);
+
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -292,15 +297,6 @@ static inline int pnfs_return_layout(struct inode *ino)
return 0;
}

-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
-{
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-
- if (ld)
- pgio->pg_test = ld->pg_test;
-}
-
#else /* CONFIG_NFS_V4_1 */

static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -384,9 +380,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}

-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+static bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
+ return false;
+}
+
+static bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+ return false;
}

static inline void
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 20a7f95..b46157d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -32,6 +32,7 @@

static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
+static const struct nfs_pageio_ops nfs_pageio_read_ops;
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;

@@ -113,6 +114,21 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}

+static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode)
+{
+ nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+ NFS_SERVER(inode)->rsize, 0);
+}
+
+static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode)
+{
+
+ if (!pnfs_pageio_init_read(pgio, inode))
+ nfs_pageio_init_read_mds(pgio, inode);
+}
+
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@@ -131,14 +147,11 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);

- nfs_pageio_init(&pgio, inode, NULL, 0, 0);
+ nfs_pageio_init_read(&pgio, inode);
nfs_list_add_request(new, &pgio.pg_list);
pgio.pg_count = len;

- if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
- nfs_pagein_multi(&pgio);
- else
- nfs_pagein_one(&pgio);
+ nfs_pageio_complete(&pgio);
return 0;
}

@@ -365,6 +378,20 @@ out:
return ret;
}

+int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_pagein_multi(desc);
+ return nfs_pagein_one(desc);
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);
+
+
+static const struct nfs_pageio_ops nfs_pageio_read_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_readpages,
+};
+
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
@@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
.pgio = &pgio,
};
struct inode *inode = mapping->host;
- struct nfs_server *server = NFS_SERVER(inode);
- size_t rsize = server->rsize;
unsigned long npages;
int ret = -ESTALE;

@@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */

- if (rsize < PAGE_CACHE_SIZE)
- nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
- else
- nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
+ nfs_pageio_init_read(&pgio, inode);

ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e268e3b..828fba7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -977,6 +977,11 @@ out_bad:
return -ENOMEM;
}

+static const struct nfs_pageio_ops nfs_flush_multi_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_flush_multi,
+};
+
/*
* Create an RPC task for the given write request and kick it.
* The page must have been locked by the caller.
@@ -1031,15 +1036,31 @@ out:
return ret;
}

-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_bsize < PAGE_CACHE_SIZE)
+ return nfs_flush_multi(desc);
+ return nfs_flush_one(desc);
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pg_writepages);
+
+static const struct nfs_pageio_ops nfs_pageio_write_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_writepages,
+};
+
+static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
- size_t wsize = NFS_SERVER(inode)->wsize;
+ nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+ NFS_SERVER(inode)->wsize, ioflags);
+}

- if (wsize < PAGE_CACHE_SIZE)
- nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
- else
- nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
+static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags)
+{
+ if (!pnfs_pageio_init_write(pgio, inode, ioflags))
+ nfs_pageio_init_write_mds(pgio, inode, ioflags);
}

/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 3a34e80..b0e26c0 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -55,6 +55,12 @@ struct nfs_page {
struct nfs_writeverf wb_verf; /* Commit cookie */
};

+struct nfs_pageio_descriptor;
+struct nfs_pageio_ops {
+ bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+ int (*pg_doio)(struct nfs_pageio_descriptor *);
+};
+
struct nfs_pageio_descriptor {
struct list_head pg_list;
unsigned long pg_bytes_written;
@@ -64,11 +70,10 @@ struct nfs_pageio_descriptor {
char pg_moreio;

struct inode *pg_inode;
- int (*pg_doio)(struct nfs_pageio_descriptor *);
+ const struct nfs_pageio_ops *pg_ops;
int pg_ioflags;
int pg_error;
struct pnfs_layout_segment *pg_lseg;
- bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
};

#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -85,18 +90,24 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
pgoff_t idx_start, unsigned int npages, int tag);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct nfs_pageio_descriptor *desc),
+ const struct nfs_pageio_ops *pg_ops,
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
struct nfs_page *);
extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
+extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *prev,
+ struct nfs_page *req);
extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
extern int nfs_set_page_tag_locked(struct nfs_page *req);
extern void nfs_clear_page_tag_locked(struct nfs_page *req);

+extern int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
+extern int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
+

/*
* Lock the page of an asynchronous request without getting a new reference
--
1.7.5.2


2011-06-10 16:57:27

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On 06/10/2011 09:28 AM, Trond Myklebust wrote:
>>
>> That makes it even more complicated for a do nothing function. We dont do
>> a different function for each different parameter. We can just do a
>> "bool write" and unify the dam thing
>
> Right now, the nfs_pageio_descriptor doesn't know about reads vs writes.
> It just knows about 'coalesce requests' and 'perform i/o'. I'd prefer to
> keep that abstraction, as it makes things cleaner, particularly when you
> get to patch 5 (NFSv4.1: Fall back to ordinary i/o through the mds if we
> have no layout segment). Why add more 'if' statements when you don't
> need to...
>

OK It's fine. I'm convinced. Do you have this on a git tree? I want to test
it out.

What was the disposition of desc->pg_bsize do I need to adjust it for the
pnfs_case in objlayout?

Thanks
Boaz

2011-06-10 02:53:30

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On 06/09/2011 06:31 PM, Trond Myklebust wrote:
<snip>
> +void
> +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
> +{
> + BUG_ON(pgio->pg_lseg != NULL);
> +
> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> + req->wb_context,
> + req_offset(req),
> + req->wb_bytes,
> + IOMODE_READ,
> + GFP_KERNEL);
> +}
> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
> +
> +void
> +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
> +{
> + BUG_ON(pgio->pg_lseg != NULL);
> +
> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> + req->wb_context,
> + req_offset(req),
> + req->wb_bytes,
> + IOMODE_RW,
> + GFP_NOFS);
> +}
> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
> +

These two above are identical except the IOMODE_{READ,RW} variable.
Why don't you just have one and let the caller pass the IOMODE_ as a
3rd parameter. Do you expect more code to be added here?

Boaz

2011-06-10 19:29:48

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On 2011-06-10 13:32, Trond Myklebust wrote:
> On Fri, 2011-06-10 at 09:57 -0700, Boaz Harrosh wrote:
>> On 06/10/2011 09:28 AM, Trond Myklebust wrote:
>>>>
>>>> That makes it even more complicated for a do nothing function. We dont do
>>>> a different function for each different parameter. We can just do a
>>>> "bool write" and unify the dam thing
>>>
>>> Right now, the nfs_pageio_descriptor doesn't know about reads vs writes.
>>> It just knows about 'coalesce requests' and 'perform i/o'. I'd prefer to
>>> keep that abstraction, as it makes things cleaner, particularly when you
>>> get to patch 5 (NFSv4.1: Fall back to ordinary i/o through the mds if we
>>> have no layout segment). Why add more 'if' statements when you don't
>>> need to...
>>>
>>
>> OK It's fine. I'm convinced. Do you have this on a git tree? I want to test
>> it out.
>
> I've added it to the 'nfs-for-bakeathon' branch.
>

I've also merged it into
git://git.linux-nfs.org/~bhalevy/linux-pnfs.git nfs-upstream

and rebased everything on top of it.

>> What was the disposition of desc->pg_bsize do I need to adjust it for the
>> pnfs_case in objlayout?
>
> You might need to adjust it. Please check...
>

As long the the MDS [rw]size are larger or equal to PAGE_SIZE
I believe you should be OK.

> I'm still working on the 'fallback to write through mds' case to ensure
> that we re-coalesce if the call to pnfs_try_to_read_data() and
> pnfs_try_to_write_data(). Once that is done, I think that the objects
> code will always do the right thing and I anticipate that the blocks
> code can reuse the same code...

Right.

Thanks for picking this up!

Benny

>
> Cheers
> Trond
>

2011-06-10 16:14:23

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On 06/09/2011 09:43 PM, Benny Halevy wrote:
> On 2011-06-10 00:28, Boaz Harrosh wrote:
>> On 06/09/2011 09:06 PM, Benny Halevy wrote:
>>> On 2011-06-09 22:53, Boaz Harrosh wrote:
>>>> On 06/09/2011 06:31 PM, Trond Myklebust wrote:
>>>> <snip>
>>>>> +void
>>>>> +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>>>>> +{
>>>>> + BUG_ON(pgio->pg_lseg != NULL);
>>>>> +
>>>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>>>> + req->wb_context,
>>>>> + req_offset(req),
>>>>> + req->wb_bytes,
>>>>> + IOMODE_READ,
>>>>> + GFP_KERNEL);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
>>>>> +
>>>>> +void
>>>>> +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>>>>> +{
>>>>> + BUG_ON(pgio->pg_lseg != NULL);
>>>>> +
>>>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>>>> + req->wb_context,
>>>>> + req_offset(req),
>>>>> + req->wb_bytes,
>>>>> + IOMODE_RW,
>>>>> + GFP_NOFS);
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
>>>>> +
>>>>
>>>> These two above are identical except the IOMODE_{READ,RW} variable.
>>>
>>> And the respective gfp flags...
>>>
>>
>> So is that "we should" or should-not?
>>
>
> That doesn't make much sense when you've defined separate vectors
> for read and write.

So what the same function is set at both vectors, and the few callers (1)
passes a flag.

If you ask me we could do with one vector and a flag throughout this
stack. Actually don't make the flag as function parameter but as a member
of nfs_pageio_descriptor. For example in osd we want to know if we are
reading or writing in the raid5 case it produces different results.

> And in any case, since pg_init is not pnfs specific the caller shouldn't
> pass IOMODE_* values but rather a generic value that will require translation anyway.
> In this case I'd just consider using a common function to be called
> respectively from both methods:
>
> static void
> pnfs_pg_update_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req,
> enum pnfs_iomode iomode, gfp_t gfp_flags)
> {
> BUG_ON(pgio->pg_lseg != NULL);
>
> pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> req->wb_context,
> req_offset(req),
> req->wb_bytes,
> iomode,
> gfp_flags);
> }
>

That makes it even more complicated for a do nothing function. We dont do
a different function for each different parameter. We can just do a
"bool write" and unify the dam thing

Boaz

2011-06-10 04:43:57

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On 2011-06-10 00:28, Boaz Harrosh wrote:
> On 06/09/2011 09:06 PM, Benny Halevy wrote:
>> On 2011-06-09 22:53, Boaz Harrosh wrote:
>>> On 06/09/2011 06:31 PM, Trond Myklebust wrote:
>>> <snip>
>>>> +void
>>>> +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>>>> +{
>>>> + BUG_ON(pgio->pg_lseg != NULL);
>>>> +
>>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>>> + req->wb_context,
>>>> + req_offset(req),
>>>> + req->wb_bytes,
>>>> + IOMODE_READ,
>>>> + GFP_KERNEL);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
>>>> +
>>>> +void
>>>> +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>>>> +{
>>>> + BUG_ON(pgio->pg_lseg != NULL);
>>>> +
>>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>>> + req->wb_context,
>>>> + req_offset(req),
>>>> + req->wb_bytes,
>>>> + IOMODE_RW,
>>>> + GFP_NOFS);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
>>>> +
>>>
>>> These two above are identical except the IOMODE_{READ,RW} variable.
>>
>> And the respective gfp flags...
>>
>
> So is that "we should" or should-not?
>

That doesn't make much sense when you've defined separate vectors
for read and write.
And in any case, since pg_init is not pnfs specific the caller shouldn't
pass IOMODE_* values but rather a generic value that will require translation anyway.
In this case I'd just consider using a common function to be called
respectively from both methods:

static void
pnfs_pg_update_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req,
enum pnfs_iomode iomode, gfp_t gfp_flags)
{
BUG_ON(pgio->pg_lseg != NULL);

pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
req->wb_bytes,
iomode,
gfp_flags);
}

>>> Why don't you just have one and let the caller pass the IOMODE_ as a
>>> 3rd parameter. Do you expect more code to be added here?
>>>
>>> Boaz
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to [email protected]
>>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

2011-06-10 01:31:08

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

Ensure that we always get a layout before setting up the i/o request.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4filelayout.c | 2 +
fs/nfs/objlayout/objio_osd.c | 2 +
fs/nfs/pagelist.c | 2 +
fs/nfs/pnfs.c | 57 ++++++++++++++++++++++-------------------
fs/nfs/pnfs.h | 14 +---------
fs/nfs/read.c | 16 +++---------
fs/nfs/write.c | 12 ++------
include/linux/nfs_page.h | 1 +
8 files changed, 47 insertions(+), 59 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e9b9b82..93c5bae1f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -677,11 +677,13 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
}

static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+ .pg_init = pnfs_generic_pg_init_read,
.pg_test = filelayout_pg_test,
.pg_doio = nfs_generic_pg_readpages,
};

static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+ .pg_init = pnfs_generic_pg_init_write,
.pg_test = filelayout_pg_test,
.pg_doio = nfs_generic_pg_writepages,
};
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 31088f3..84b5fb7 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1009,11 +1009,13 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
}

static const struct nfs_pageio_ops objio_pg_read_ops = {
+ .pg_init = pnfs_generic_pg_init_read,
.pg_test = objio_pg_test,
.pg_doio = nfs_generic_pg_readpages,
};

static const struct nfs_pageio_ops objio_pg_write_ops = {
+ .pg_init = pnfs_generic_pg_init_write,
.pg_test = objio_pg_test,
.pg_doio = nfs_generic_pg_writepages,
};
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 2b71ad0..a46d827 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -294,6 +294,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
} else {
+ if (desc->pg_ops->pg_init)
+ desc->pg_ops->pg_init(desc, req);
desc->pg_base = req->wb_pgbase;
}
nfs_list_remove_request(req);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 68349dc..e9a8072 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -900,7 +900,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
* Layout segment is retreived from the server if not cached.
* The appropriate layout segment is referenced and returned to the caller.
*/
-struct pnfs_layout_segment *
+static struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -1043,6 +1043,34 @@ out_forget_reply:
goto out;
}

+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_READ,
+ GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
+
+void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ BUG_ON(pgio->pg_lseg != NULL);
+
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ req->wb_context,
+ req_offset(req),
+ req->wb_bytes,
+ IOMODE_RW,
+ GFP_NOFS);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
bool
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
@@ -1071,31 +1099,8 @@ bool
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- enum pnfs_iomode access_type;
- gfp_t gfp_flags;
-
- /* We assume that pg_ioflags == 0 iff we're reading a page */
- if (pgio->pg_ioflags == 0) {
- access_type = IOMODE_READ;
- gfp_flags = GFP_KERNEL;
- } else {
- access_type = IOMODE_RW;
- gfp_flags = GFP_NOFS;
- }
-
- if (pgio->pg_lseg == NULL) {
- if (pgio->pg_count != prev->wb_bytes)
- return true;
- /* This is first coelesce call for a series of nfs_pages */
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
- prev->wb_context,
- req_offset(prev),
- pgio->pg_count,
- access_type,
- gfp_flags);
- if (pgio->pg_lseg == NULL)
- return true;
- }
+ if (pgio->pg_lseg == NULL)
+ return nfs_generic_pg_test(pgio, prev, req);

/*
* Test if a nfs_page is fully contained in the pnfs_layout_range.
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c5f1d8c..2504b82 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -149,10 +149,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
-struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- loff_t pos, u64 count, enum pnfs_iomode access_type,
- gfp_t gfp_flags);

bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
@@ -163,6 +159,8 @@ enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
const struct rpc_call_ops *, int);
enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
const struct rpc_call_ops *);
+void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -317,14 +315,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
{
}

-static inline struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
- loff_t pos, u64 count, enum pnfs_iomode access_type,
- gfp_t gfp_flags)
-{
- return NULL;
-}
-
static inline enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b46157d..dbb89a3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -148,9 +148,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
zero_user_segment(page, len, PAGE_CACHE_SIZE);

nfs_pageio_init_read(&pgio, inode);
- nfs_list_add_request(new, &pgio.pg_list);
- pgio.pg_count = len;
-
+ nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
return 0;
}
@@ -282,7 +280,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
LIST_HEAD(list);

nfs_list_remove_request(req);
@@ -300,10 +298,6 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);

- BUG_ON(desc->pg_lseg != NULL);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_READ, GFP_KERNEL);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
@@ -337,6 +331,8 @@ out_bad:
}
SetPageError(page);
nfs_readpage_release(req);
+ put_lseg(lseg);
+ desc->pg_lseg = NULL;
return -ENOMEM;
}

@@ -365,10 +361,6 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
- if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_READ, GFP_KERNEL);

ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 828fba7..f0ce0c3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -914,7 +914,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
unsigned int offset;
int requests = 0;
int ret = 0;
- struct pnfs_layout_segment *lseg;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
LIST_HEAD(list);

nfs_list_remove_request(req);
@@ -938,10 +938,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);

- BUG_ON(desc->pg_lseg);
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_RW, GFP_NOFS);
ClearPageError(page);
offset = 0;
nbytes = desc->pg_count;
@@ -974,6 +970,8 @@ out_bad:
nfs_writedata_free(data);
}
nfs_redirty_request(req);
+ put_lseg(lseg);
+ desc->pg_lseg = NULL;
return -ENOMEM;
}

@@ -1019,10 +1017,6 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
- if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
- req_offset(req), desc->pg_count,
- IOMODE_RW, GFP_NOFS);

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index b0e26c0..2927ecd 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -57,6 +57,7 @@ struct nfs_page {

struct nfs_pageio_descriptor;
struct nfs_pageio_ops {
+ void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
int (*pg_doio)(struct nfs_pageio_descriptor *);
};
--
1.7.5.2


2011-06-10 17:32:08

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On Fri, 2011-06-10 at 09:57 -0700, Boaz Harrosh wrote:
> On 06/10/2011 09:28 AM, Trond Myklebust wrote:
> >>
> >> That makes it even more complicated for a do nothing function. We dont do
> >> a different function for each different parameter. We can just do a
> >> "bool write" and unify the dam thing
> >
> > Right now, the nfs_pageio_descriptor doesn't know about reads vs writes.
> > It just knows about 'coalesce requests' and 'perform i/o'. I'd prefer to
> > keep that abstraction, as it makes things cleaner, particularly when you
> > get to patch 5 (NFSv4.1: Fall back to ordinary i/o through the mds if we
> > have no layout segment). Why add more 'if' statements when you don't
> > need to...
> >
>
> OK It's fine. I'm convinced. Do you have this on a git tree? I want to test
> it out.

I've added it to the 'nfs-for-bakeathon' branch.

> What was the disposition of desc->pg_bsize do I need to adjust it for the
> pnfs_case in objlayout?

You might need to adjust it. Please check...

I'm still working on the 'fallback to write through mds' case to ensure
that we re-coalesce if the call to pnfs_try_to_read_data() and
pnfs_try_to_write_data(). Once that is done, I think that the objects
code will always do the right thing and I anticipate that the blocks
code can reuse the same code...

Cheers
Trond

--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com


2011-06-10 16:28:56

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On Fri, 2011-06-10 at 09:14 -0700, Boaz Harrosh wrote:
> On 06/09/2011 09:43 PM, Benny Halevy wrote:
> > On 2011-06-10 00:28, Boaz Harrosh wrote:
> >> On 06/09/2011 09:06 PM, Benny Halevy wrote:
> >>> On 2011-06-09 22:53, Boaz Harrosh wrote:
> >>>> On 06/09/2011 06:31 PM, Trond Myklebust wrote:
> >>>> <snip>
> >>>>> +void
> >>>>> +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
> >>>>> +{
> >>>>> + BUG_ON(pgio->pg_lseg != NULL);
> >>>>> +
> >>>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> >>>>> + req->wb_context,
> >>>>> + req_offset(req),
> >>>>> + req->wb_bytes,
> >>>>> + IOMODE_READ,
> >>>>> + GFP_KERNEL);
> >>>>> +}
> >>>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
> >>>>> +
> >>>>> +void
> >>>>> +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
> >>>>> +{
> >>>>> + BUG_ON(pgio->pg_lseg != NULL);
> >>>>> +
> >>>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> >>>>> + req->wb_context,
> >>>>> + req_offset(req),
> >>>>> + req->wb_bytes,
> >>>>> + IOMODE_RW,
> >>>>> + GFP_NOFS);
> >>>>> +}
> >>>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
> >>>>> +
> >>>>
> >>>> These two above are identical except the IOMODE_{READ,RW} variable.
> >>>
> >>> And the respective gfp flags...
> >>>
> >>
> >> So is that "we should" or should-not?
> >>
> >
> > That doesn't make much sense when you've defined separate vectors
> > for read and write.
>
> So what the same function is set at both vectors, and the few callers (1)
> passes a flag.
>
> If you ask me we could do with one vector and a flag throughout this
> stack. Actually don't make the flag as function parameter but as a member
> of nfs_pageio_descriptor. For example in osd we want to know if we are
> reading or writing in the raid5 case it produces different results.
>
> > And in any case, since pg_init is not pnfs specific the caller shouldn't
> > pass IOMODE_* values but rather a generic value that will require translation anyway.
> > In this case I'd just consider using a common function to be called
> > respectively from both methods:
> >
> > static void
> > pnfs_pg_update_layout(struct nfs_pageio_descriptor *pgio, struct nfs_page *req,
> > enum pnfs_iomode iomode, gfp_t gfp_flags)
> > {
> > BUG_ON(pgio->pg_lseg != NULL);
> >
> > pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
> > req->wb_context,
> > req_offset(req),
> > req->wb_bytes,
> > iomode,
> > gfp_flags);
> > }
> >
>
> That makes it even more complicated for a do nothing function. We dont do
> a different function for each different parameter. We can just do a
> "bool write" and unify the dam thing

Right now, the nfs_pageio_descriptor doesn't know about reads vs writes.
It just knows about 'coalesce requests' and 'perform i/o'. I'd prefer to
keep that abstraction, as it makes things cleaner, particularly when you
get to patch 5 (NFSv4.1: Fall back to ordinary i/o through the mds if we
have no layout segment). Why add more 'if' statements when you don't
need to...

--
Trond Myklebust
Linux NFS client maintainer

NetApp
[email protected]
http://www.netapp.com


2011-06-10 04:28:48

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 4/5] NFSv4.1: Add an initialisation callback for pNFS

On 06/09/2011 09:06 PM, Benny Halevy wrote:
> On 2011-06-09 22:53, Boaz Harrosh wrote:
>> On 06/09/2011 06:31 PM, Trond Myklebust wrote:
>> <snip>
>>> +void
>>> +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>>> +{
>>> + BUG_ON(pgio->pg_lseg != NULL);
>>> +
>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>> + req->wb_context,
>>> + req_offset(req),
>>> + req->wb_bytes,
>>> + IOMODE_READ,
>>> + GFP_KERNEL);
>>> +}
>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
>>> +
>>> +void
>>> +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
>>> +{
>>> + BUG_ON(pgio->pg_lseg != NULL);
>>> +
>>> + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
>>> + req->wb_context,
>>> + req_offset(req),
>>> + req->wb_bytes,
>>> + IOMODE_RW,
>>> + GFP_NOFS);
>>> +}
>>> +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
>>> +
>>
>> These two above are identical except the IOMODE_{READ,RW} variable.
>
> And the respective gfp flags...
>

So is that "we should" or should-not?

>> Why don't you just have one and let the caller pass the IOMODE_ as a
>> 3rd parameter. Do you expect more code to be added here?
>>
>> Boaz
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html


2011-06-10 01:31:07

by Myklebust, Trond

[permalink] [raw]
Subject: [PATCH 5/5] NFSv4.1: Fall back to ordinary i/o through the mds if we have no layout segment

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 6 ++++++
fs/nfs/nfs4filelayout.c | 4 +---
fs/nfs/objlayout/objio_osd.c | 3 ---
fs/nfs/pnfs.c | 7 +++++++
fs/nfs/read.c | 2 +-
fs/nfs/write.c | 2 +-
6 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b9056cb..9f4386a 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -282,7 +282,13 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);

+struct nfs_pageio_descriptor;
+extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode);
+
/* write.c */
+extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags);
extern void nfs_commit_free(struct nfs_write_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 93c5bae1f..c3857f4 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -662,10 +662,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
u32 stripe_unit;

if (!pnfs_generic_pg_test(pgio, prev, req))
- return 0;
+ return false;

- if (!pgio->pg_lseg)
- return 1;
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 84b5fb7..f014038 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1001,9 +1001,6 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
if (!pnfs_generic_pg_test(pgio, prev, req))
return false;

- if (pgio->pg_lseg == NULL)
- return true;
-
return pgio->pg_count + req->wb_bytes <=
OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e9a8072..b848a7e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1054,6 +1054,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
req->wb_bytes,
IOMODE_READ,
GFP_KERNEL);
+ /* If no lseg, fall back to read through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_init_read_mds(pgio, pgio->pg_inode);
+
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);

@@ -1068,6 +1072,9 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
req->wb_bytes,
IOMODE_RW,
GFP_NOFS);
+ /* If no lseg, fall back to write through mds */
+ if (pgio->pg_lseg == NULL)
+ nfs_pageio_init_write_mds(pgio, pgio->pg_inode, pgio->pg_ioflags);
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index dbb89a3..9c5cf57 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -114,7 +114,7 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}

-static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f0ce0c3..70f1ef0 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1043,7 +1043,7 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_doio = nfs_generic_pg_writepages,
};

-static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
--
1.7.5.2