The following patches are intended to smooth out the page writeback
performance by ensuring that we commit the data earlier on the server.
We assume that if something is starting writeback on the pages, then
that process wants to commit the data as soon as possible, whether it
is an application or just the background flush process.
We also assume that for streaming type processes, we don't want to pause
the I/O in order to commit, so we don't want to rely on a counter of
in-flight I/O to the entire inode going to zero.
We therefore set up a monitor that counts the number of in-flight
writes for each call to nfs_writepages(). Once all the writes to that
call to nfs_writepages has completed, we send the commit. Note that this
mirrors the behaviour for O_DIRECT writes, where we similarly track the
in-flight writes on a per-call basis.
Trond Myklebust (3):
NFS: Remove unused fields in the page I/O structures
NFS: Ensure we commit after writeback is complete
NFS: Fix commit policy for non-blocking calls to nfs_write_inode()
fs/nfs/pagelist.c | 5 ++--
fs/nfs/write.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++-
include/linux/nfs_page.h | 2 +-
include/linux/nfs_xdr.h | 3 ++-
4 files changed, 64 insertions(+), 5 deletions(-)
--
2.9.4
Remove the 'layout_private' fields that were only used by the pNFS OSD
layout driver.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pagelist.c | 2 --
include/linux/nfs_page.h | 1 -
include/linux/nfs_xdr.h | 1 -
3 files changed, 4 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ad92b401326c..de107d866297 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -51,7 +51,6 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = mirror->pg_count;
hdr->dreq = desc->pg_dreq;
- hdr->layout_private = desc->pg_layout_private;
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
if (hdr->completion_ops->init_hdr)
@@ -711,7 +710,6 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_error = 0;
desc->pg_lseg = NULL;
desc->pg_dreq = NULL;
- desc->pg_layout_private = NULL;
desc->pg_bsize = bsize;
desc->pg_mirror_count = 1;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 247cc3d3498f..6138cf91346b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -94,7 +94,6 @@ struct nfs_pageio_descriptor {
const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
struct nfs_direct_req *pg_dreq;
- void *pg_layout_private;
unsigned int pg_bsize; /* default bsize for mirrors */
u32 pg_mirror_count;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b28c83475ee8..ee998ed08cf6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1436,7 +1436,6 @@ struct nfs_pgio_header {
const struct nfs_pgio_completion_ops *completion_ops;
const struct nfs_rw_ops *rw_ops;
struct nfs_direct_req *dreq;
- void *layout_private;
spinlock_t lock;
/* fields protected by lock */
int pnfs_error;
--
2.9.4
If the page cache is being flushed, then we want to ensure that we
do start a commit once the pages are done being flushed.
If we just wait until all I/O is done to that file, we can end up
livelocking until the balance_dirty_pages() mechanism puts its
foot down and forces I/O to stop.
So instead we do more or less the same thing that O_DIRECT does,
and set up a counter to tell us when the flush is done,
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pagelist.c | 3 +++
fs/nfs/write.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/nfs_page.h | 1 +
include/linux/nfs_xdr.h | 2 ++
4 files changed, 63 insertions(+)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index de107d866297..83d2918f1b13 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -50,6 +50,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->cred = hdr->req->wb_context->cred;
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = mirror->pg_count;
+ hdr->io_completion = desc->pg_io_completion;
hdr->dreq = desc->pg_dreq;
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
@@ -709,6 +710,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
+ desc->pg_io_completion = NULL;
desc->pg_dreq = NULL;
desc->pg_bsize = bsize;
@@ -1231,6 +1233,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
{
LIST_HEAD(failed);
+ desc->pg_io_completion = hdr->io_completion;
desc->pg_dreq = hdr->dreq;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index db7ba542559e..051197cb9195 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -40,6 +40,12 @@
#define MIN_POOL_WRITE (32)
#define MIN_POOL_COMMIT (4)
+struct nfs_io_completion {
+ void (*complete)(void *data);
+ void *data;
+ struct kref refcount;
+};
+
/*
* Local function declarations
*/
@@ -108,6 +114,39 @@ static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
mempool_free(hdr, nfs_wdata_mempool);
}
+static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
+{
+ return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
+}
+
+static void nfs_io_completion_init(struct nfs_io_completion *ioc,
+ void (*complete)(void *), void *data)
+{
+ ioc->complete = complete;
+ ioc->data = data;
+ kref_init(&ioc->refcount);
+}
+
+static void nfs_io_completion_release(struct kref *kref)
+{
+ struct nfs_io_completion *ioc = container_of(kref,
+ struct nfs_io_completion, refcount);
+ ioc->complete(ioc->data);
+ kfree(ioc);
+}
+
+static void nfs_io_completion_get(struct nfs_io_completion *ioc)
+{
+ if (ioc != NULL)
+ kref_get(&ioc->refcount);
+}
+
+static void nfs_io_completion_put(struct nfs_io_completion *ioc)
+{
+ if (ioc != NULL)
+ kref_put(&ioc->refcount, nfs_io_completion_release);
+}
+
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
ctx->error = error;
@@ -681,18 +720,29 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
return ret;
}
+static void nfs_io_completion_commit(void *inode)
+{
+ nfs_commit_inode(inode, 0);
+}
+
int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct nfs_pageio_descriptor pgio;
+ struct nfs_io_completion *ioc = nfs_io_completion_alloc(GFP_NOFS);
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+ if (ioc)
+ nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
&nfs_async_write_completion_ops);
+ pgio.pg_io_completion = ioc;
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
+ nfs_io_completion_put(ioc);
if (err < 0)
goto out_err;
@@ -940,6 +990,11 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
return hdr->verf.committed != NFS_FILE_SYNC;
}
+static void nfs_async_write_init(struct nfs_pgio_header *hdr)
+{
+ nfs_io_completion_get(hdr->io_completion);
+}
+
static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
@@ -973,6 +1028,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
nfs_release_request(req);
}
out:
+ nfs_io_completion_put(hdr->io_completion);
hdr->release(hdr);
}
@@ -1378,6 +1434,7 @@ static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
}
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+ .init_hdr = nfs_async_write_init,
.error_cleanup = nfs_async_write_error,
.completion = nfs_write_completion,
.reschedule_io = nfs_async_write_reschedule_io,
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6138cf91346b..abbee2d15dce 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -93,6 +93,7 @@ struct nfs_pageio_descriptor {
const struct rpc_call_ops *pg_rpc_callops;
const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
+ struct nfs_io_completion *pg_io_completion;
struct nfs_direct_req *pg_dreq;
unsigned int pg_bsize; /* default bsize for mirrors */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ee998ed08cf6..ab6a8381459f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1422,6 +1422,7 @@ enum {
NFS_IOHDR_STAT,
};
+struct nfs_io_completion;
struct nfs_pgio_header {
struct inode *inode;
struct rpc_cred *cred;
@@ -1435,6 +1436,7 @@ struct nfs_pgio_header {
void (*release) (struct nfs_pgio_header *hdr);
const struct nfs_pgio_completion_ops *completion_ops;
const struct nfs_rw_ops *rw_ops;
+ struct nfs_io_completion *io_completion;
struct nfs_direct_req *dreq;
spinlock_t lock;
/* fields protected by lock */
--
2.9.4
Now that the writes will schedule a commit on their own, we don't
need nfs_write_inode() to schedule one if there are outstanding
writes, and we're being called in non-blocking mode.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/write.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 051197cb9195..b1af5dee5e0a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1941,7 +1941,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
- if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
+ if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
goto out_mark_dirty;
/* don't wait for the COMMIT response */
--
2.9.4