2010-06-11 07:36:48

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 0/3] LAYUOTGET invocation rebasing

I have published pnfs-all-latest rebased atop my submitted patches at
git://linux-nfs.org/~isaman/pnfs-block.git in the branch for-benny-20100611.

(Actually, I am uploading now...it is going very slowly.)

These three patches changed significantly in the rebase, so I include them
here for review. (Two others nearby vanished entirely.) The basic idea is
that fsdata->ok_to_use_pnfs and PG_USE_PNFS can be replaced by checking
req->wb_lseg==NULL.

The rebase leaves the blocklayout directory untouched. I've fixed that up
with some straightforward SQUASHME patches to come.

Fred



2010-06-11 07:36:49

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 1/3] pnfs_post_submit: Restore "pnfs: pnfs_do_flush" part 1

From: Fred Isaman <[email protected]>

This adds the hooks in nfs_write_begin and nfs_write_end needed
by the block server

Signed-off-by: Fred Isaman <[email protected]>
[pnfs: prevent offset overflow in _pnfs_do_flush]
[pnfs: pnfs_has_layout take_ref parameter should be bool]
[pnfs: clean up put_unlock_current_layout's interface]
[pnfs: introduce lseg valid bit]
Signed-off-by: Benny Halevy <[email protected]>

Signed-off-by: Fred Isaman <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/file.c | 15 ++++++++---
fs/nfs/pnfs.c | 43 +++++++++++++++++++++++++++++++
fs/nfs/pnfs.h | 62 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/nfs4_pnfs.h | 7 +++++
4 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3066141..0999200 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -420,8 +420,7 @@ start:

ret = nfs_flush_incompatible(file, page, lseg);
if (ret) {
- unlock_page(page);
- page_cache_release(page);
+ goto out_err;
} else if (!once_thru &&
nfs_want_read_modify_write(file, page, pos, len)) {
once_thru = 1;
@@ -430,13 +429,19 @@ start:
if (!ret)
goto start;
}
- *fsdata = lseg;
+ ret = pnfs_write_begin(file, page, pos, len, lseg, fsdata);
out:
if (ret) {
put_lseg(lseg);
*fsdata = NULL;
}
return ret;
+
+ out_err:
+ unlock_page(page);
+ page_cache_release(page);
+ *pagep = NULL;
+ goto out;
}

static int nfs_write_end(struct file *file, struct address_space *mapping,
@@ -445,7 +450,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
{
unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
int status;
- struct pnfs_layout_segment *lseg = fsdata;
+ struct pnfs_layout_segment *lseg;

dfprintk(PAGECACHE, "NFS: write_end(%s/%s(%ld), %u@%lld)\n",
file->f_path.dentry->d_parent->d_name.name,
@@ -472,10 +477,12 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
}

+ lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
status = nfs_updatepage(file, page, offset, copied, lseg);

unlock_page(page);
page_cache_release(page);
+ pnfs_write_end_cleanup(file, fsdata);
put_lseg(lseg);

if (status < 0)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0e91e9b..679171e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1658,6 +1658,41 @@ _pnfs_try_to_read_data(struct nfs_read_data *data,
return pnfs_readpages(data);
}

+/*
+ * This gives the layout driver an opportunity to read in page "around"
+ * the data to be written. It returns 0 on success, otherwise an error code
+ * which will either be passed up to user, or ignored if
+ * some previous part of write succeeded.
+ * Note the range [pos, pos+len-1] is entirely within the page.
+ */
+int _pnfs_write_begin(struct inode *inode, struct page *page,
+ loff_t pos, unsigned len,
+ struct pnfs_layout_segment *lseg,
+ struct pnfs_fsdata **fsdata)
+{
+ struct pnfs_fsdata *data;
+ int status = 0;
+
+ dprintk("--> %s: pos=%llu len=%u\n",
+ __func__, (unsigned long long)pos, len);
+ data = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
+ if (!data) {
+ status = -ENOMEM;
+ goto out;
+ }
+ data->lseg = lseg; /* refcount passed into data to be managed there */
+ status = NFS_SERVER(inode)->pnfs_curr_ld->ld_io_ops->write_begin(
+ lseg, page, pos, len, data);
+ if (status) {
+ kfree(data);
+ data = NULL;
+ }
+out:
+ *fsdata = data;
+ dprintk("<-- %s: status=%d\n", __func__, status);
+ return status;
+}
+
enum pnfs_try_status
_pnfs_try_to_write_data(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops, int how)
@@ -1853,6 +1888,14 @@ out_free:
goto out;
}

+void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
+{
+ if (fsdata) {
+ /* lseg refcounting handled directly in nfs_Write_end */
+ kfree(fsdata);
+ }
+}
+
/* Callback operations for layout drivers.
*/
struct pnfs_client_operations pnfs_ops = {
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f3a3325..df5668d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -64,12 +64,17 @@ void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
size_t *);
void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
size_t *);
+void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
void pnfs_get_layout_done(struct nfs4_pnfs_layoutget *, int rpc_status);
int pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp);
void pnfs_layout_release(struct pnfs_layout_type *, struct nfs4_pnfs_layout_segment *range);
void pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
const nfs4_stateid *stateid);
void pnfs_destroy_layout(struct nfs_inode *);
+int _pnfs_write_begin(struct inode *inode, struct page *page,
+ loff_t pos, unsigned len,
+ struct pnfs_layout_segment *lseg,
+ struct pnfs_fsdata **fsdata);

#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld && \
(srv)->pnfs_curr_ld->ld_io_ops && \
@@ -160,6 +165,32 @@ pnfs_try_to_commit(struct nfs_write_data *data,
return ret;
}

+static inline int pnfs_write_begin(struct file *filp, struct page *page,
+ loff_t pos, unsigned len,
+ struct pnfs_layout_segment *lseg,
+ void **fsdata)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ int status = 0;
+
+ *fsdata = lseg;
+ if (lseg && PNFS_EXISTS_LDIO_OP(nfss, write_begin))
+ status = _pnfs_write_begin(inode, page, pos, len, lseg,
+ (struct pnfs_fsdata **) fsdata);
+ return status;
+}
+
+static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
+{
+ if (fsdata) {
+ struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
+
+ if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
+ pnfs_free_fsdata(fsdata);
+ }
+}
+
static inline int pnfs_return_layout(struct inode *ino,
struct nfs4_pnfs_layout_segment *lseg,
const nfs4_stateid *stateid, /* optional */
@@ -209,6 +240,17 @@ static inline int pnfs_use_rpc(struct nfs_server *nfss)
return 1;
}

+static inline struct pnfs_layout_segment *
+nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
+{
+ if (fsdata) {
+ struct nfs_server *nfss = NFS_SERVER(filp->f_dentry->d_inode);
+
+ if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
+ return ((struct pnfs_fsdata *) fsdata)->lseg;
+ }
+ return fsdata;
+}
#else /* CONFIG_NFS_V4_1 */

static inline void get_lseg(struct pnfs_layout_segment *lseg)
@@ -249,6 +291,19 @@ pnfs_try_to_commit(struct nfs_write_data *data,
return PNFS_NOT_ATTEMPTED;
}

+static inline int pnfs_write_begin(struct file *filp, struct page *page,
+ loff_t pos, unsigned len,
+ struct pnfs_layout_segment *lseg,
+ void **fsdata)
+{
+ *fsdata = NULL;
+ return 0;
+}
+
+static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
+{
+}
+
static inline int pnfs_get_write_status(struct nfs_write_data *data)
{
return 0;
@@ -268,6 +323,13 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, int sync)
{
return 0;
}
+
+static inline struct pnfs_layout_segment *
+nfs4_pull_lseg_from_fsdata(struct file *filp, void *fsdata)
+{
+ return NULL;
+}
+
#endif /* CONFIG_NFS_V4_1 */

#endif /* FS_NFS_PNFS_H */
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 07cb761..0880a2e 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -30,6 +30,10 @@ struct pnfs_layoutdriver_type {
struct layoutdriver_policy_operations *ld_policy_ops;
};

+struct pnfs_fsdata {
+ struct pnfs_layout_segment *lseg;
+};
+
#if defined(CONFIG_NFS_V4_1)

static inline struct nfs_inode *
@@ -136,6 +140,9 @@ struct layoutdriver_io_operations {
struct page **pages, unsigned int pgbase,
unsigned nr_pages, loff_t offset, size_t count,
int sync, struct nfs_write_data *nfs_data);
+ int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
+ loff_t pos, unsigned count,
+ struct pnfs_fsdata *fsdata);

/* Consistency ops */
/* 2 problems:
--
1.6.6.1


2010-06-11 07:36:50

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 2/3] pnfs_post_submit: Restore "pnfs: pnfs_do_flush" part 2

From: Fred Isaman <[email protected]>

pnfs: pnfs_do_flush

Adds a hook into the "check if request needs flushed" routines.
This will be needed to allow driver the ability to prevent comingling
of layout driver handled requests and fallback nfs requests.

Signed-off-by: Fred Isaman <[email protected]>
[pnfs: prevent offset overflow in _pnfs_do_flush]
[pnfs: pnfs_has_layout take_ref parameter should be bool]
[pnfs: clean up put_unlock_current_layout's interface]
[pnfs: introduce lseg valid bit]
Signed-off-by: Benny Halevy <[email protected]>

Signed-off-by: Fred Isaman <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 14 ++++++++++++++
fs/nfs/pnfs.h | 24 ++++++++++++++++++++++++
fs/nfs/write.c | 4 ++--
include/linux/nfs4_pnfs.h | 3 +++
4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 679171e..f60420c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1693,6 +1693,20 @@ out:
return status;
}

+/* Given an nfs request, determine if it should be flushed before proceeding.
+ * It should default to returning False, returning True only if there is a
+ * specific reason to flush.
+ */
+int _pnfs_do_flush(struct inode *inode, struct nfs_page *req)
+{
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ int status = 0;
+
+ /* Note that lseg==NULL may be useful info for do_flush */
+ status = nfss->pnfs_curr_ld->ld_policy_ops->do_flush(req->wb_lseg, req);
+ return status;
+}
+
enum pnfs_try_status
_pnfs_try_to_write_data(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops, int how)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index df5668d..23cd4c3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -75,6 +75,7 @@ int _pnfs_write_begin(struct inode *inode, struct page *page,
loff_t pos, unsigned len,
struct pnfs_layout_segment *lseg,
struct pnfs_fsdata **fsdata);
+int _pnfs_do_flush(struct inode *inode, struct nfs_page *req);

#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld && \
(srv)->pnfs_curr_ld->ld_io_ops && \
@@ -181,6 +182,24 @@ static inline int pnfs_write_begin(struct file *filp, struct page *page,
return status;
}

+/* req may not be locked, so we have to be prepared for req->wb_page being
+ * set to NULL at any time.
+ */
+static inline int pnfs_do_flush(struct nfs_page *req)
+{
+ struct page *page = req->wb_page;
+ struct inode *inode;
+
+ if (!page)
+ return 1;
+ inode = page->mapping->host;
+
+ if (PNFS_EXISTS_LDPOLICY_OP(NFS_SERVER(inode), do_flush))
+ return _pnfs_do_flush(inode, req);
+ else
+ return 0;
+}
+
static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
{
if (fsdata) {
@@ -291,6 +310,11 @@ pnfs_try_to_commit(struct nfs_write_data *data,
return PNFS_NOT_ATTEMPTED;
}

+static inline int pnfs_do_flush(struct nfs_page *req)
+{
+ return 0;
+}
+
static inline int pnfs_write_begin(struct file *filp, struct page *page,
loff_t pos, unsigned len,
struct pnfs_layout_segment *lseg,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e575f7a..bd1115f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -603,7 +603,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
* have flushed out requests having wrong owners.
*/
if (offset > rqend || end < req->wb_offset ||
- req->wb_lseg != lseg)
+ req->wb_lseg != lseg || pnfs_do_flush(req))
goto out_flushme;

if (nfs_set_page_tag_locked(req))
@@ -710,7 +710,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page,
if (req == NULL)
return 0;
do_flush = req->wb_page != page || req->wb_context != ctx ||
- req->wb_lseg != lseg;
+ req->wb_lseg != lseg || pnfs_do_flush(req);
nfs_release_request(req);
if (!do_flush)
return 0;
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 0880a2e..e2e6cd0 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -199,6 +199,9 @@ struct layoutdriver_policy_operations {
/* test for nfs page cache coalescing */
int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);

+ /* Test for pre-write request flushing */
+ int (*do_flush)(struct pnfs_layout_segment *lseg, struct nfs_page *req);
+
/* Retreive the block size of the file system.
* If gather_across_stripes == 1, then the file system will gather
* requests into the block size.
--
1.6.6.1


2010-06-11 07:36:51

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 3/3] pnfs_post_submit: Restore the pnfs_write_end part of "pnfs: commit and pnfs_write_end"

From: Fred Isaman <[email protected]>

pnfs: commit and pnfs_write_end

Add hooks in the nfs_write_end path, giving a driver the potential for
post-copy manipulation of the page.

[pnfs: pass lseg from write_begin to write_end]
Signed-off-by: Fred Isaman <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
[pnfs: fix pnfs_commit update_layout range]
Whole file semantics are different for COMMIT (0,0) and layouts
(0,NFS4_MAX_UINT64).
Reported-by: Alexandros Batsakis <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>

Signed-off-by: Fred Isaman <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
---
fs/nfs/file.c | 4 ++++
fs/nfs/pnfs.c | 16 +++++++++++++++-
fs/nfs/pnfs.h | 23 +++++++++++++++++++++++
include/linux/nfs4_pnfs.h | 3 +++
4 files changed, 45 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0999200..d453487 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -478,8 +478,12 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
}

lseg = nfs4_pull_lseg_from_fsdata(file, fsdata);
+ status = pnfs_write_end(file, page, pos, len, copied, lseg);
+ if (status)
+ goto out;
status = nfs_updatepage(file, page, offset, copied, lseg);

+ out:
unlock_page(page);
page_cache_release(page);
pnfs_write_end_cleanup(file, fsdata);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f60420c..7c48713 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1693,7 +1693,21 @@ out:
return status;
}

-/* Given an nfs request, determine if it should be flushed before proceeding.
+/* Return 0 on succes, negative on failure */
+/* CAREFUL - what happens if copied < len??? */
+int _pnfs_write_end(struct inode *inode, struct page *page,
+ loff_t pos, unsigned len, unsigned copied,
+ struct pnfs_layout_segment *lseg)
+{
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ int status;
+
+ status = nfss->pnfs_curr_ld->ld_io_ops->write_end(inode, page,
+ pos, len, copied, lseg);
+ return status;
+}
+
+ /* Given an nfs request, determine if it should be flushed before proceeding.
* It should default to returning False, returning True only if there is a
* specific reason to flush.
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 23cd4c3..ceb9934 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -75,6 +75,9 @@ int _pnfs_write_begin(struct inode *inode, struct page *page,
loff_t pos, unsigned len,
struct pnfs_layout_segment *lseg,
struct pnfs_fsdata **fsdata);
+int _pnfs_write_end(struct inode *inode, struct page *page,
+ loff_t pos, unsigned len, unsigned copied,
+ struct pnfs_layout_segment *lseg);
int _pnfs_do_flush(struct inode *inode, struct nfs_page *req);

#define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld && \
@@ -200,6 +203,19 @@ static inline int pnfs_do_flush(struct nfs_page *req)
return 0;
}

+static inline int pnfs_write_end(struct file *filp, struct page *page,
+ loff_t pos, unsigned len, unsigned copied,
+ struct pnfs_layout_segment *lseg)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct nfs_server *nfss = NFS_SERVER(inode);
+
+ if (PNFS_EXISTS_LDIO_OP(nfss, write_end))
+ return _pnfs_write_end(inode, page, pos, len, copied, lseg);
+ else
+ return 0;
+}
+
static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
{
if (fsdata) {
@@ -324,6 +340,13 @@ static inline int pnfs_write_begin(struct file *filp, struct page *page,
return 0;
}

+static inline int pnfs_write_end(struct file *filp, struct page *page,
+ loff_t pos, unsigned len, unsigned copied,
+ struct pnfs_layout_segment *lseg)
+{
+ return 0;
+}
+
static inline void pnfs_write_end_cleanup(struct file *filp, void *fsdata)
{
}
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index e2e6cd0..a84d622 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -143,6 +143,9 @@ struct layoutdriver_io_operations {
int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
loff_t pos, unsigned count,
struct pnfs_fsdata *fsdata);
+ int (*write_end)(struct inode *inode, struct page *page, loff_t pos,
+ unsigned count, unsigned copied,
+ struct pnfs_layout_segment *lseg);

/* Consistency ops */
/* 2 problems:
--
1.6.6.1