2012-04-20 18:36:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 00/28]

These patches rewrite the directio code to use the same coalesce
and RPC sendoff code as cached io. This allows directio to make
use of the preexisting pnfs code.

There are some limitations at the moment that need to be fixed, but
given the amount of change this patchset introduces I'm
trying to put out as small a base set of patches as possible.

This set of patches responds to comments from the previous version,
as well as including a number of bugfixes.


The primary TODOs are:
- Coalescing of bsize<PAGESIZE, while technically correct, is suboptimal.
To fix this I'll need to adjust the *_multi functions to handle
more than a single req

- Unaligned io is not sent to pnfs, as there is no existing code to deal
with a layout boundary (think stripe edge) occurring in the middle
of a nfs_page.

Fred



2012-04-20 18:36:49

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 09/28] NFS: reverse arg order in nfs_initiate_[read|write]

Make it consistent with nfs_initiate_commit.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/internal.h | 7 ++++---
fs/nfs/nfs4filelayout.c | 4 ++--
fs/nfs/read.c | 5 +++--
fs/nfs/write.c | 6 +++---
4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 87e899d..abdf40c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -296,7 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);

struct nfs_pageio_descriptor;
/* read.c */
-extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
+extern int nfs_initiate_read(struct rpc_clnt *clnt,
+ struct nfs_read_data *data,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
@@ -315,8 +316,8 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+extern int nfs_initiate_write(struct rpc_clnt *clnt,
+ struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
int how);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 675ce3b..adbadcb 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -413,7 +413,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
data->mds_offset = offset;

/* Perform an asynchronous read to ds */
- status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
+ status = nfs_initiate_read(ds->ds_clp->cl_rpcclient, data,
&filelayout_read_call_ops);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
@@ -460,7 +460,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
data->args.offset = filelayout_get_dserver_offset(lseg, offset);

/* Perform an asynchronous write */
- status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
+ status = nfs_initiate_write(ds->ds_clp->cl_rpcclient, data,
&filelayout_write_call_ops, sync);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a4be28..4ddba67 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -169,7 +169,8 @@ static void nfs_readpage_release(struct nfs_page *req)
nfs_release_request(req);
}

-int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
+int nfs_initiate_read(struct rpc_clnt *clnt,
+ struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
struct inode *inode = data->inode;
@@ -240,7 +241,7 @@ static int nfs_do_read(struct nfs_read_data *data,
{
struct inode *inode = data->args.context->dentry->d_inode;

- return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
+ return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops);
}

static int
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 54f7c0f..76735dd 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -836,8 +836,8 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}

-int nfs_initiate_write(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+int nfs_initiate_write(struct rpc_clnt *clnt,
+ struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
int how)
{
@@ -937,7 +937,7 @@ static int nfs_do_write(struct nfs_write_data *data,
{
struct inode *inode = data->args.context->dentry->d_inode;

- return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
+ return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how);
}

static int nfs_do_multiple_writes(struct list_head *head,
--
1.7.2.1


2012-04-20 18:36:47

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 06/28] NFS4.1: Add lseg to struct nfs4_fl_commit_bucket

Also create a commit_info structure to hold the bucket array and push
it up from the lseg to the layout where it really belongs.

While we are at it, fix a refcounting bug due to an (incorrect)
implicit assumption that filelayout_scan_ds_commit_list always
completely emptied the src list.

This clarifies refcounting, removes the ugly find_only_write_lseg
functions, and pushes the file layout commit code along on the path to
supporting multiple lsegs.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4filelayout.c | 216 +++++++++++++++++++++++++++--------------------
fs/nfs/nfs4filelayout.h | 20 ++++-
2 files changed, 144 insertions(+), 92 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 5acfd9e..15aeba2 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -650,10 +650,66 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)

dprintk("--> %s\n", __func__);
nfs4_fl_put_deviceid(fl->dsaddr);
- kfree(fl->commit_buckets);
+ /* This assumes a single RW lseg */
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ struct nfs4_filelayout *flo;
+
+ flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
+ flo->commit_info.nbuckets = 0;
+ kfree(flo->commit_info.buckets);
+ flo->commit_info.buckets = NULL;
+ }
_filelayout_free_lseg(fl);
}

+static int
+filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
+ gfp_t gfp_flags)
+{
+ struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
+ struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
+
+ struct nfs4_fl_commit_bucket *buckets;
+ int size;
+
+ if (fl->commit_through_mds)
+ return 0;
+ if (flo->commit_info.nbuckets != 0) {
+ /* This assumes there is only one IOMODE_RW lseg. What
+ * we really want to do is have a layout_hdr level
+ * dictionary of <multipath_list4, fh> keys, each
+ * associated with a struct list_head, populated by calls
+ * to filelayout_write_pagelist().
+ * */
+ return 0;
+ }
+
+ size = (fl->stripe_type == STRIPE_SPARSE) ?
+ fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+ buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket),
+ gfp_flags);
+ if (!buckets)
+ return -ENOMEM;
+ else {
+ int i;
+
+ spin_lock(&lseg->pls_layout->plh_inode->i_lock);
+ if (flo->commit_info.nbuckets != 0)
+ kfree(buckets);
+ else {
+ flo->commit_info.buckets = buckets;
+ flo->commit_info.nbuckets = size;
+ for (i = 0; i < size; i++) {
+ INIT_LIST_HEAD(&buckets[i].written);
+ INIT_LIST_HEAD(&buckets[i].committing);
+ }
+ }
+ spin_unlock(&lseg->pls_layout->plh_inode->i_lock);
+ return 0;
+ }
+}
+
static struct pnfs_layout_segment *
filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
struct nfs4_layoutget_res *lgr,
@@ -673,29 +729,6 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
_filelayout_free_lseg(fl);
return NULL;
}
-
- /* This assumes there is only one IOMODE_RW lseg. What
- * we really want to do is have a layout_hdr level
- * dictionary of <multipath_list4, fh> keys, each
- * associated with a struct list_head, populated by calls
- * to filelayout_write_pagelist().
- * */
- if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
- int i;
- int size = (fl->stripe_type == STRIPE_SPARSE) ?
- fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
- fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags);
- if (!fl->commit_buckets) {
- filelayout_free_lseg(&fl->generic_hdr);
- return NULL;
- }
- fl->number_of_buckets = size;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&fl->commit_buckets[i].written);
- INIT_LIST_HEAD(&fl->commit_buckets[i].committing);
- }
- }
return &fl->generic_hdr;
}

@@ -747,6 +780,8 @@ static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
+ int status;
+
BUG_ON(pgio->pg_lseg != NULL);

pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
@@ -757,7 +792,16 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
GFP_NOFS);
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
- nfs_pageio_reset_write_mds(pgio);
+ goto out_mds;
+ status = filelayout_alloc_commit_info(pgio->pg_lseg, GFP_NOFS);
+ if (status < 0) {
+ put_lseg(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ goto out_mds;
+ }
+ return;
+out_mds:
+ nfs_pageio_reset_write_mds(pgio);
}

static const struct nfs_pageio_ops filelayout_pg_read_ops = {
@@ -793,17 +837,13 @@ filelayout_clear_request_commit(struct nfs_page *req)
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out;
if (list_is_singular(&req->wb_list)) {
- struct pnfs_layout_segment *lseg;
+ struct nfs4_fl_commit_bucket *bucket;

- /* From here we can find the bucket, but for the moment,
- * since there is only one relevant lseg...
- */
- list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
- if (lseg->pls_range.iomode == IOMODE_RW) {
- freeme = lseg;
- break;
- }
- }
+ bucket = list_first_entry(&req->wb_list,
+ struct nfs4_fl_commit_bucket,
+ written);
+ freeme = bucket->wlseg;
+ bucket->wlseg = NULL;
}
out:
nfs_request_remove_commit_list(req);
@@ -818,6 +858,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
struct list_head *list;
+ struct nfs4_fl_commit_bucket *buckets;

if (fl->commit_through_mds)
return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
@@ -831,15 +872,16 @@ filelayout_choose_commit_list(struct nfs_page *req,
j = nfs4_fl_calc_j_index(lseg,
(loff_t)req->wb_index << PAGE_CACHE_SHIFT);
i = select_bucket_index(fl, j);
- list = &fl->commit_buckets[i].written;
+ buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets;
+ list = &buckets[i].written;
if (list_empty(list)) {
/* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released
* there. It could also be released if the last req is pulled
* off due to a rewrite, in which case it will be done in
- * filelayout_remove_commit_req
+ * filelayout_clear_request_commit
*/
- get_lseg(lseg);
+ buckets[i].wlseg = get_lseg(lseg);
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
return list;
@@ -908,32 +950,6 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
&filelayout_commit_call_ops, how);
}

-/*
- * This is only useful while we are using whole file layouts.
- */
-static struct pnfs_layout_segment *
-find_only_write_lseg_locked(struct inode *inode)
-{
- struct pnfs_layout_segment *lseg;
-
- list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
- if (lseg->pls_range.iomode == IOMODE_RW)
- return lseg;
- return NULL;
-}
-
-static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
-{
- struct pnfs_layout_segment *rv;
-
- spin_lock(&inode->i_lock);
- rv = find_only_write_lseg_locked(inode);
- if (rv)
- get_lseg(rv);
- spin_unlock(&inode->i_lock);
- return rv;
-}
-
static int
filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
spinlock_t *lock)
@@ -955,6 +971,13 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
if (ret == max)
break;
}
+ if (ret) {
+ bucket->clseg = bucket->wlseg;
+ if (list_empty(src))
+ bucket->wlseg = NULL;
+ else
+ get_lseg(bucket->clseg);
+ }
return ret;
}

@@ -964,18 +987,14 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
static int filelayout_scan_commit_lists(struct inode *inode, int max,
spinlock_t *lock)
{
- struct pnfs_layout_segment *lseg;
- struct nfs4_filelayout_segment *fl;
+ struct nfs4_fl_commit_info *fl_cinfo;
int i, rv = 0, cnt;

- lseg = find_only_write_lseg_locked(inode);
- if (!lseg)
- goto out_done;
- fl = FILELAYOUT_LSEG(lseg);
- if (fl->commit_through_mds)
+ fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+ if (fl_cinfo->nbuckets == 0)
goto out_done;
- for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
- cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i],
+ for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) {
+ cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i],
max, lock);
max -= cnt;
rv += cnt;
@@ -987,38 +1006,35 @@ out_done:
static unsigned int
alloc_ds_commits(struct inode *inode, struct list_head *list)
{
- struct pnfs_layout_segment *lseg;
- struct nfs4_filelayout_segment *fl;
+ struct nfs4_fl_commit_info *fl_cinfo;
+ struct nfs4_fl_commit_bucket *bucket;
struct nfs_write_data *data;
int i, j;
unsigned int nreq = 0;

- /* Won't need this when non-whole file layout segments are supported
- * instead we will use a pnfs_layout_hdr structure */
- lseg = find_only_write_lseg(inode);
- if (!lseg)
- return 0;
- fl = FILELAYOUT_LSEG(lseg);
- for (i = 0; i < fl->number_of_buckets; i++) {
- if (list_empty(&fl->commit_buckets[i].committing))
+ fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+ bucket = fl_cinfo->buckets;
+ for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
+ if (list_empty(&bucket->committing))
continue;
data = nfs_commitdata_alloc();
if (!data)
break;
data->ds_commit_index = i;
- data->lseg = lseg;
+ data->lseg = bucket->clseg;
+ bucket->clseg = NULL;
list_add(&data->pages, list);
nreq++;
}

/* Clean up on error */
- for (j = i; j < fl->number_of_buckets; j++) {
- if (list_empty(&fl->commit_buckets[i].committing))
+ for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
+ if (list_empty(&bucket->committing))
continue;
- nfs_retry_commit(&fl->commit_buckets[i].committing, lseg);
- put_lseg(lseg); /* associated with emptying bucket */
+ nfs_retry_commit(&bucket->committing, bucket->clseg);
+ put_lseg(bucket->clseg);
+ bucket->clseg = NULL;
}
- put_lseg(lseg);
/* Caller will clean up entries put on list */
return nreq;
}
@@ -1058,7 +1074,10 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
nfs_initiate_commit(data, NFS_CLIENT(inode),
data->mds_ops, how);
} else {
- nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg);
+ struct nfs4_fl_commit_info *fl_cinfo;
+
+ fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info;
+ nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg);
filelayout_initiate_commit(data, how);
}
}
@@ -1072,10 +1091,27 @@ filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
}

+static struct pnfs_layout_hdr *
+filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
+{
+ struct nfs4_filelayout *flo;
+
+ flo = kzalloc(sizeof(*flo), gfp_flags);
+ return &flo->generic_hdr;
+}
+
+static void
+filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+ kfree(FILELAYOUT_FROM_HDR(lo));
+}
+
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
.owner = THIS_MODULE,
+ .alloc_layout_hdr = filelayout_alloc_layout_hdr,
+ .free_layout_hdr = filelayout_free_layout_hdr,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
.pg_read_ops = &filelayout_pg_read_ops,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 21190bb..333a3ac 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -77,6 +77,13 @@ struct nfs4_file_layout_dsaddr {
struct nfs4_fl_commit_bucket {
struct list_head written;
struct list_head committing;
+ struct pnfs_layout_segment *wlseg;
+ struct pnfs_layout_segment *clseg;
+};
+
+struct nfs4_fl_commit_info {
+ int nbuckets;
+ struct nfs4_fl_commit_bucket *buckets;
};

struct nfs4_filelayout_segment {
@@ -89,10 +96,19 @@ struct nfs4_filelayout_segment {
struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
unsigned int num_fh;
struct nfs_fh **fh_array;
- struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */
- int number_of_buckets;
};

+struct nfs4_filelayout {
+ struct pnfs_layout_hdr generic_hdr;
+ struct nfs4_fl_commit_info commit_info;
+};
+
+static inline struct nfs4_filelayout *
+FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo)
+{
+ return container_of(lo, struct nfs4_filelayout, generic_hdr);
+}
+
static inline struct nfs4_filelayout_segment *
FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
{
--
1.7.2.1


2012-04-20 18:36:53

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 26/28] NFS: avoid some stat gathering for direct io

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 18 ++++++++++++------
1 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 44a93d8..56db9e7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -472,9 +472,13 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
nfs_list_add_request(req, dst);
cinfo->mds->ncommit++;
spin_unlock(cinfo->lock);
- inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
- __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC);
+ if (!cinfo->dreq) {
+ inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ __mark_inode_dirty(req->wb_context->dentry->d_inode,
+ I_DIRTY_DATASYNC);
+ }
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);

@@ -1455,9 +1459,11 @@ void nfs_retry_commit(struct list_head *page_list,
req = nfs_list_entry(page_list->next);
nfs_list_remove_request(req);
nfs_mark_request_commit(req, lseg, cinfo);
- dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
- BDI_RECLAIMABLE);
+ if (!cinfo->dreq) {
+ dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ }
nfs_unlock_request(req);
}
}
--
1.7.2.1


2012-04-20 18:36:52

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 21/28] direct read

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 21 ++++++++++++++-------
1 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d713234..4ba9a2c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -222,18 +222,17 @@ void nfs_direct_readpage_release(struct nfs_page *req)

static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
{
- unsigned long pos = req_offset(hdr->req);
+ unsigned long bytes = 0;
struct nfs_direct_req *dreq = hdr->dreq;

if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out_put;

spin_lock(&dreq->lock);
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
- (hdr->first_error == pos))
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0))
dreq->error = hdr->error;
else
- dreq->count += (hdr->first_error - pos);
+ dreq->count += hdr->good_bytes;
spin_unlock(&dreq->lock);

if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
@@ -241,6 +240,15 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;

+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+ if (bytes > hdr->good_bytes)
+ zero_user(page, 0, PAGE_SIZE);
+ else if (hdr->good_bytes - bytes < PAGE_SIZE)
+ zero_user_segment(page,
+ hdr->good_bytes & ~PAGE_MASK,
+ PAGE_SIZE);
+ }
+ bytes += req->wb_bytes;
nfs_list_remove_request(req);
nfs_direct_readpage_release(req);
if (!PageCompound(page))
@@ -248,17 +256,16 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
page_cache_release(page);
}
} else {
- pos &= PAGE_MASK;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);

- if (pos < (hdr->first_error & PAGE_MASK))
+ if (bytes < hdr->good_bytes)
if (!PageCompound(req->wb_page))
set_page_dirty(req->wb_page);
+ bytes += req->wb_bytes;
page_cache_release(req->wb_page);
nfs_list_remove_request(req);
nfs_direct_readpage_release(req);
- pos += PAGE_SIZE;
}
}
out_put:
--
1.7.2.1


2012-04-20 18:36:48

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 10/28] NFS: remove unnecessary casts of void pointers in nfs4filelayout.c

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4filelayout.c | 16 ++++++++--------
1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index adbadcb..31afd81 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -191,7 +191,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;

rdata->read_done_cb = filelayout_read_done_cb;

@@ -205,7 +205,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)

static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;

dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);

@@ -215,14 +215,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)

static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;

rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
}

static void filelayout_read_release(void *data)
{
- struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ struct nfs_read_data *rdata = data;

put_lseg(rdata->lseg);
rdata->mds_ops->rpc_release(data);
@@ -282,7 +282,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,

static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;

if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
&wdata->args.seq_args, &wdata->res.seq_res,
@@ -294,7 +294,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)

static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;

/* Note this may cause RPC to be resent */
wdata->mds_ops->rpc_call_done(task, data);
@@ -302,14 +302,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)

static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;

rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
}

static void filelayout_write_release(void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_write_data *wdata = data;

put_lseg(wdata->lseg);
wdata->mds_ops->rpc_release(data);
--
1.7.2.1


2012-04-20 18:36:50

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 16/28] paged writed

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 13b90b1..076075e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -560,7 +560,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)

void nfs_write_completion(struct nfs_pgio_header *hdr)
{
- unsigned long pos = hdr->req->wb_index << PAGE_CACHE_SHIFT;
+ unsigned long bytes = 0;

if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -568,9 +568,10 @@ void nfs_write_completion(struct nfs_pgio_header *hdr)
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;

+ bytes += req->wb_bytes;
nfs_list_remove_request(req);
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
- (pos >= (hdr->first_error & PAGE_MASK))) {
+ (hdr->good_bytes < bytes)) {
nfs_set_pageerror(page);
nfs_context_set_write_error(req->wb_context, hdr->error);
goto remove_req;
@@ -588,7 +589,6 @@ remove_req:
next:
nfs_unlock_request(req);
nfs_end_page_writeback(page);
- pos += PAGE_SIZE;
}
out:
hdr->release(hdr);
--
1.7.2.1


2012-04-20 18:36:49

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 13/28] NFS: create struct nfs_page_array

Both nfs_read_data and nfs_write_data devote several fields which
can be combined into a single shared struct.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 11 +++++----
fs/nfs/direct.c | 40 +++++++++++++++++++++----------------
fs/nfs/internal.h | 1 +
fs/nfs/pagelist.c | 13 ++++++++++++
fs/nfs/read.c | 22 +++++++-------------
fs/nfs/write.c | 22 +++++++-------------
include/linux/nfs_xdr.h | 14 +++++++-----
7 files changed, 67 insertions(+), 56 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 192e16a..3f58832 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -240,7 +240,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;

dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
- rdata->npages, f_offset, (unsigned int)rdata->args.count);
+ rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);

par = alloc_parallel(rdata);
if (!par)
@@ -250,7 +250,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)

isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */
- for (i = pg_index; i < rdata->npages; i++) {
+ for (i = pg_index; i < rdata->pages.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -283,7 +283,8 @@ bl_read_pagelist(struct nfs_read_data *rdata)
struct pnfs_block_extent *be_read;

be_read = (hole && cow_read) ? cow_read : be;
- bio = bl_add_page_to_bio(bio, rdata->npages - i, READ,
+ bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+ READ,
isect, pages[i], be_read,
bl_end_io_read, par);
if (IS_ERR(bio)) {
@@ -652,7 +653,7 @@ next_page:

/* Middle pages */
pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT;
- for (i = pg_index; i < wdata->npages; i++) {
+ for (i = pg_index; i < wdata->pages.npages; i++) {
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
@@ -686,7 +687,7 @@ next_page:
goto out;
}
}
- bio = bl_add_page_to_bio(bio, wdata->npages - i, WRITE,
+ bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
isect, pages[i], be,
bl_end_io_write, par);
if (IS_ERR(bio)) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 56176af..0faba4c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -252,11 +252,11 @@ static void nfs_direct_read_release(void *calldata)
} else {
dreq->count += data->res.count;
spin_unlock(&dreq->lock);
- nfs_direct_dirty_pages(data->pagevec,
+ nfs_direct_dirty_pages(data->pages.pagevec,
data->args.pgbase,
data->res.count);
}
- nfs_direct_release_pages(data->pagevec, data->npages);
+ nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);

if (put_dreq(dreq))
nfs_direct_complete(dreq);
@@ -273,8 +273,8 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
{
struct nfs_read_data *data = &rhdr->rpc_data;

- if (data->pagevec != data->page_array)
- kfree(data->pagevec);
+ if (data->pages.pagevec != data->pages.page_array)
+ kfree(data->pages.pagevec);
nfs_readhdr_free(&rhdr->header);
}

@@ -312,6 +312,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
do {
struct nfs_read_header *rhdr;
struct nfs_read_data *data;
+ struct nfs_page_array *pages;
size_t bytes;

pgbase = user_addr & ~PAGE_MASK;
@@ -322,24 +323,25 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
if (unlikely(!rhdr))
break;
data = &rhdr->rpc_data;
+ pages = &data->pages;

down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- data->npages, 1, 0, data->pagevec, NULL);
+ pages->npages, 1, 0, pages->pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0) {
nfs_direct_readhdr_release(rhdr);
break;
}
- if ((unsigned)result < data->npages) {
+ if ((unsigned)result < pages->npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(data->pagevec, result);
+ nfs_direct_release_pages(pages->pagevec, result);
nfs_direct_readhdr_release(rhdr);
break;
}
bytes -= pgbase;
- data->npages = result;
+ pages->npages = result;
}

get_dreq(dreq);
@@ -352,7 +354,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
data->args.lock_context = dreq->l_ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
- data->args.pages = data->pagevec;
+ data->args.pages = pages->pagevec;
data->args.count = bytes;
data->res.fattr = &data->fattr;
data->res.eof = 0;
@@ -462,8 +464,8 @@ static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
{
struct nfs_write_data *data = &whdr->rpc_data;

- if (data->pagevec != data->page_array)
- kfree(data->pagevec);
+ if (data->pages.pagevec != data->pages.page_array)
+ kfree(data->pages.pagevec);
nfs_writehdr_free(&whdr->header);
}

@@ -472,8 +474,10 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
while (!list_empty(&dreq->rewrite_list)) {
struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+ struct nfs_page_array *p = &whdr->rpc_data.pages;
+
list_del(&hdr->pages);
- nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages);
+ nfs_direct_release_pages(p->pagevec, p->npages);
nfs_direct_writehdr_release(whdr);
}
}
@@ -751,6 +755,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
do {
struct nfs_write_header *whdr;
struct nfs_write_data *data;
+ struct nfs_page_array *pages;
size_t bytes;

pgbase = user_addr & ~PAGE_MASK;
@@ -762,24 +767,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
break;

data = &whdr->rpc_data;
+ pages = &data->pages;

down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- data->npages, 0, 0, data->pagevec, NULL);
+ pages->npages, 0, 0, pages->pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0) {
nfs_direct_writehdr_release(whdr);
break;
}
- if ((unsigned)result < data->npages) {
+ if ((unsigned)result < pages->npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(data->pagevec, result);
+ nfs_direct_release_pages(pages->pagevec, result);
nfs_direct_writehdr_release(whdr);
break;
}
bytes -= pgbase;
- data->npages = result;
+ pages->npages = result;
}

get_dreq(dreq);
@@ -794,7 +800,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
data->args.lock_context = dreq->l_ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
- data->args.pages = data->pagevec;
+ data->args.pages = pages->pagevec;
data->args.count = bytes;
data->args.stable = sync;
data->res.fattr = &data->fattr;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9b2b8bf..0818e66 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -209,6 +209,7 @@ extern void nfs_destroy_writepagecache(void);

extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
+extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);

/* nfs2xdr.c */
extern int nfs_stat_to_errno(enum nfs_stat);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d21fcea..d349bd4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -26,6 +26,19 @@

static struct kmem_cache *nfs_page_cachep;

+bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+{
+ p->npages = pagecount;
+ if (pagecount <= ARRAY_SIZE(p->page_array))
+ p->pagevec = p->page_array;
+ else {
+ p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ if (!p->pagevec)
+ p->npages = 0;
+ }
+ return p->pagevec != NULL;
+}
+
static inline struct nfs_page *
nfs_page_alloc(void)
{
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d6d4682..f6ab30b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,16 +46,10 @@ struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)

INIT_LIST_HEAD(&hdr->pages);
INIT_LIST_HEAD(&data->list);
- data->npages = pagecount;
data->header = hdr;
- if (pagecount <= ARRAY_SIZE(data->page_array))
- data->pagevec = data->page_array;
- else {
- data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
- if (!data->pagevec) {
- kmem_cache_free(nfs_rdata_cachep, p);
- p = NULL;
- }
+ if (!nfs_pgarray_set(&data->pages, pagecount)) {
+ kmem_cache_free(nfs_rdata_cachep, p);
+ p = NULL;
}
}
return p;
@@ -71,8 +65,8 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
void nfs_readdata_release(struct nfs_read_data *rdata)
{
put_nfs_open_context(rdata->args.context);
- if (rdata->pagevec != rdata->page_array)
- kfree(rdata->pagevec);
+ if (rdata->pages.pagevec != rdata->pages.page_array)
+ kfree(rdata->pages.pagevec);
nfs_readhdr_free(rdata->header);
}

@@ -232,7 +226,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pagevec;
+ data->args.pages = data->pages.pagevec;
data->args.count = count;
data->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context;
@@ -318,7 +312,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
if (!rhdr)
goto out_bad;
data = &rhdr->rpc_data;
- data->pagevec[0] = page;
+ data->pages.pagevec[0] = page;
nfs_read_rpcsetup(req, data, len, offset);
list_add(&data->list, res);
requests++;
@@ -356,7 +350,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
}

data = &rhdr->rpc_data;
- pages = data->pagevec;
+ pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index dbb5c0a..2efae04 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,16 +80,10 @@ struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&hdr->pages);
INIT_LIST_HEAD(&data->list);
- data->npages = pagecount;
data->header = hdr;
- if (pagecount <= ARRAY_SIZE(data->page_array))
- data->pagevec = data->page_array;
- else {
- data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
- if (!data->pagevec) {
- mempool_free(p, nfs_wdata_mempool);
- p = NULL;
- }
+ if (!nfs_pgarray_set(&data->pages, pagecount)) {
+ mempool_free(p, nfs_wdata_mempool);
+ p = NULL;
}
}
return p;
@@ -104,8 +98,8 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
void nfs_writedata_release(struct nfs_write_data *wdata)
{
put_nfs_open_context(wdata->args.context);
- if (wdata->pagevec != wdata->page_array)
- kfree(wdata->pagevec);
+ if (wdata->pages.pagevec != wdata->pages.page_array)
+ kfree(wdata->pages.pagevec);
nfs_writehdr_free(wdata->header);
}

@@ -916,7 +910,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
/* pnfs_set_layoutcommit needs this */
data->mds_offset = data->args.offset;
data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pagevec;
+ data->args.pages = data->pages.pagevec;
data->args.count = count;
data->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context;
@@ -1011,7 +1005,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
if (!whdr)
goto out_bad;
data = &whdr->rpc_data;
- data->pagevec[0] = page;
+ data->pages.pagevec[0] = page;
nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
list_add(&data->list, res);
requests++;
@@ -1061,7 +1055,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
goto out;
}
data = &whdr->rpc_data;
- pages = data->pagevec;
+ pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0e31c44..060896f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1162,19 +1162,23 @@ struct nfs_page;

#define NFS_PAGEVEC_SIZE (8U)

+struct nfs_page_array {
+ struct page **pagevec;
+ unsigned int npages; /* Max length of pagevec */
+ struct page *page_array[NFS_PAGEVEC_SIZE];
+};
+
struct nfs_read_data {
struct nfs_pgio_header *header;
struct list_head list;
struct rpc_task task;
struct nfs_fattr fattr; /* fattr storage */
- struct page **pagevec;
- unsigned int npages; /* Max length of pagevec */
struct nfs_readargs args;
struct nfs_readres res;
unsigned long timestamp; /* For lease renewal */
int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
__u64 mds_offset;
- struct page *page_array[NFS_PAGEVEC_SIZE];
+ struct nfs_page_array pages;
struct nfs_client *ds_clp; /* pNFS data server */
};

@@ -1201,14 +1205,12 @@ struct nfs_write_data {
struct rpc_task task;
struct nfs_fattr fattr;
struct nfs_writeverf verf;
- struct page **pagevec;
- unsigned int npages; /* Max length of pagevec */
struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */
unsigned long timestamp; /* For lease renewal */
int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
__u64 mds_offset; /* Filelayout dense stripe */
- struct page *page_array[NFS_PAGEVEC_SIZE];
+ struct nfs_page_array pages;
struct nfs_client *ds_clp; /* pNFS data server */
};

--
1.7.2.1


2012-04-20 18:36:48

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 08/28] NFS: dprintks in directio code were referencing task after put

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 5897dfe..fb7fbaa 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -357,15 +357,15 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
break;
- rpc_put_task(task);

dprintk("NFS: %5u initiated direct read call "
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- data->task.tk_pid,
+ task->tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
+ rpc_put_task(task);

started += bytes;
user_addr += bytes;
@@ -784,15 +784,15 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
break;
- rpc_put_task(task);

dprintk("NFS: %5u initiated direct write call "
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- data->task.tk_pid,
+ task->tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
bytes,
(unsigned long long)data->args.offset);
+ rpc_put_task(task);

started += bytes;
user_addr += bytes;
--
1.7.2.1


2012-04-20 18:36:51

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 15/28] NFS: merge _full and _partial write rpc_ops

Decouple nfs_pgio_header and nfs_write_data, and have (possibly
multiple) nfs_write_datas each take a refcount on nfs_pgio_header.

For the moment keeps nfs_write_header as a way to preallocate a single
nfs_write_data with the nfs_pgio_header. The code doesn't need this,
and would be prettier without, but given the amount of churn I am
already introducing I didn't want to play with tuning new mempools.

This also fixes bug in pnfs_ld_handle_write_error. In the case of
desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing
replay attempt to do nothing.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 10 +-
fs/nfs/internal.h | 8 +-
fs/nfs/nfs4filelayout.c | 1 -
fs/nfs/nfs4proc.c | 4 +-
fs/nfs/pnfs.c | 58 +++++---
fs/nfs/write.c | 383 +++++++++++++++++++++--------------------------
include/linux/nfs_xdr.h | 2 +
7 files changed, 227 insertions(+), 239 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 90b00ce..22a40c4 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -768,11 +768,17 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
bytes = min(wsize,count);

result = -ENOMEM;
- whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes));
+ whdr = nfs_writehdr_alloc();
if (unlikely(!whdr))
break;

- data = &whdr->rpc_data;
+ data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes));
+ if (!data) {
+ nfs_writehdr_free(&whdr->header);
+ break;
+ }
+ data->header = &whdr->header;
+ atomic_inc(&data->header->refcnt);
pages = &data->pages;

down_read(&current->mm->mmap_sem);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ae550f5..d05e352 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -319,10 +319,14 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);

/* write.c */
-extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages);
+extern void nfs_async_write_error(struct list_head *head);
+extern struct nfs_write_header *nfs_writehdr_alloc(void);
extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
+extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount);
+extern void nfs_write_completion(struct nfs_pgio_header *hdr);
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct list_head *head);
+ struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 333e765..02d8170 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -314,7 +314,6 @@ static void filelayout_write_release(void *data)
{
struct nfs_write_data *wdata = data;

- put_lseg(wdata->header->lseg);
wdata->header->mds_ops->rpc_release(data);
}

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 16a3877..6365b02 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3376,8 +3376,6 @@ void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
struct inode *inode = hdr->inode;

dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(hdr->lseg);
- hdr->lseg = NULL;
data->ds_clp = NULL;
data->write_done_cb = nfs4_write_done_cb;
data->args.fh = NFS_FH(inode);
@@ -3393,7 +3391,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
{
struct nfs_server *server = NFS_SERVER(data->header->inode);

- if (data->header->lseg) {
+ if (data->ds_clp) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
} else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5947a90..2b89b54 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1199,7 +1199,9 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
- data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages);
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+ data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
+ &hdr->pages);
}

/*
@@ -1214,7 +1216,6 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
hdr->mds_ops->rpc_call_done(&data->task, data);
} else
pnfs_ld_handle_write_error(data);
- put_lseg(hdr->lseg);
hdr->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
@@ -1225,12 +1226,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
{
struct nfs_pgio_header *hdr = data->header;

- list_splice_tail_init(&hdr->pages, &desc->pg_list);
- if (hdr->req && list_empty(&hdr->req->wb_list))
- nfs_list_add_request(hdr->req, &desc->pg_list);
- nfs_pageio_reset_write_mds(desc);
- desc->pg_recoalesce = 1;
- put_lseg(hdr->lseg);
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ list_splice_tail_init(&hdr->pages, &desc->pg_list);
+ nfs_pageio_reset_write_mds(desc);
+ desc->pg_recoalesce = 1;
+ }
nfs_writedata_release(data);
}

@@ -1246,18 +1246,12 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
struct nfs_server *nfss = NFS_SERVER(inode);

hdr->mds_ops = call_ops;
- hdr->lseg = get_lseg(lseg);

dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
-
trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
- if (trypnfs == PNFS_NOT_ATTEMPTED) {
- put_lseg(hdr->lseg);
- hdr->lseg = NULL;
- } else
+ if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
-
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
}
@@ -1273,7 +1267,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
while (!list_empty(head)) {
enum pnfs_try_status trypnfs;

- data = list_entry(head->next, struct nfs_write_data, list);
+ data = list_first_entry(head, struct nfs_write_data, list);
list_del_init(&data->list);

trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1283,20 +1277,40 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
put_lseg(lseg);
}

+static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
+{
+ put_lseg(hdr->lseg);
+ nfs_writehdr_free(hdr);
+}
+
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_write_header *whdr;
+ struct nfs_pgio_header *hdr;
int ret;

- ret = nfs_generic_flush(desc, &head);
- if (ret != 0) {
+ whdr = nfs_writehdr_alloc();
+ if (!whdr) {
+ nfs_async_write_error(&desc->pg_list);
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
- return ret;
+ return -ENOMEM;
}
- pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
- return 0;
+ hdr = &whdr->header;
+ nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
+ hdr->lseg = get_lseg(desc->pg_lseg);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_flush(desc, hdr);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ } else
+ pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ nfs_write_completion(hdr);
+ return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2efae04..13b90b1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -42,8 +42,7 @@
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
struct inode *inode, int ioflags);
static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_partial_ops;
-static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;

static struct kmem_cache *nfs_wdata_cachep;
@@ -69,26 +68,47 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);

-struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(void)
{
struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);

if (p) {
struct nfs_pgio_header *hdr = &p->header;
- struct nfs_write_data *data = &p->rpc_data;

memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&data->list);
- data->header = hdr;
- if (!nfs_pgarray_set(&data->pages, pagecount)) {
- mempool_free(p, nfs_wdata_mempool);
- p = NULL;
- }
+ INIT_LIST_HEAD(&hdr->rpc_list);
+ spin_lock_init(&hdr->lock);
+ atomic_set(&hdr->refcnt, 0);
}
return p;
}

+struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
+{
+ struct nfs_write_data *data, *prealloc;
+
+ prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
+ if (prealloc->header == NULL)
+ data = prealloc;
+ else
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+
+ if (nfs_pgarray_set(&data->pages, pagecount)) {
+ data->header = hdr;
+ atomic_inc(&hdr->refcnt);
+ } else {
+ if (data != prealloc)
+ kfree(data);
+ data = NULL;
+ }
+out:
+ return data;
+}
+
void nfs_writehdr_free(struct nfs_pgio_header *hdr)
{
struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
@@ -97,10 +117,18 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)

void nfs_writedata_release(struct nfs_write_data *wdata)
{
+ struct nfs_pgio_header *hdr = wdata->header;
+ struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
+
put_nfs_open_context(wdata->args.context);
if (wdata->pages.pagevec != wdata->pages.page_array)
kfree(wdata->pages.pagevec);
- nfs_writehdr_free(wdata->header);
+ if (wdata != &write_header->rpc_data)
+ kfree(wdata);
+ else
+ wdata->header = NULL;
+ if (atomic_dec_and_test(&hdr->refcnt))
+ nfs_write_completion(hdr);
}

static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -511,20 +539,6 @@ int nfs_write_need_commit(struct nfs_write_data *data)
return data->verf.committed != NFS_FILE_SYNC;
}

-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
- struct nfs_write_data *data)
-{
- if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
- nfs_mark_request_commit(req, data->header->lseg);
- return 1;
- }
- if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
- nfs_mark_request_dirty(req);
- return 1;
- }
- return 0;
-}
#else
static void
nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
@@ -542,13 +556,43 @@ int nfs_write_need_commit(struct nfs_write_data *data)
return 0;
}

-static inline
-int nfs_reschedule_unstable_write(struct nfs_page *req,
- struct nfs_write_data *data)
+#endif
+
+void nfs_write_completion(struct nfs_pgio_header *hdr)
{
- return 0;
+ unsigned long pos = hdr->req->wb_index << PAGE_CACHE_SHIFT;
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out;
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct page *page = req->wb_page;
+
+ nfs_list_remove_request(req);
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+ (pos >= (hdr->first_error & PAGE_MASK))) {
+ nfs_set_pageerror(page);
+ nfs_context_set_write_error(req->wb_context, hdr->error);
+ goto remove_req;
+ }
+ if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+ nfs_mark_request_dirty(req);
+ goto next;
+ }
+ if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ nfs_mark_request_commit(req, hdr->lseg);
+ goto next;
+ }
+remove_req:
+ nfs_inode_remove_request(req);
+next:
+ nfs_unlock_request(req);
+ nfs_end_page_writeback(page);
+ pos += PAGE_SIZE;
+ }
+out:
+ hdr->release(hdr);
}
-#endif

#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int
@@ -813,17 +857,6 @@ int nfs_updatepage(struct file *file, struct page *page,
return status;
}

-static void nfs_writepage_release(struct nfs_page *req,
- struct nfs_write_data *data)
-{
- struct page *page = req->wb_page;
-
- if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
- nfs_inode_remove_request(req);
- nfs_unlock_request(req);
- nfs_end_page_writeback(page);
-}
-
static int flush_task_priority(int how)
{
switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
@@ -890,22 +923,16 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
/*
* Set up the argument/result storage required for the RPC call.
*/
-static void nfs_write_rpcsetup(struct nfs_page *req,
- struct nfs_write_data *data,
+static void nfs_write_rpcsetup(struct nfs_write_data *data,
unsigned int count, unsigned int offset,
int how)
{
- struct nfs_pgio_header *hdr = data->header;
- struct inode *inode = req->wb_context->dentry->d_inode;
+ struct nfs_page *req = data->header->req;

/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */

- hdr->req = req;
- hdr->inode = inode = req->wb_context->dentry->d_inode;
- hdr->cred = req->wb_context->cred;
-
- data->args.fh = NFS_FH(inode);
+ data->args.fh = NFS_FH(data->header->inode);
data->args.offset = req_offset(req) + offset;
/* pnfs_set_layoutcommit needs this */
data->mds_offset = data->args.offset;
@@ -919,7 +946,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
case 0:
break;
case FLUSH_COND_STABLE:
- if (nfs_need_commit(NFS_I(inode)))
+ if (nfs_need_commit(NFS_I(data->header->inode)))
break;
default:
data->args.stable = NFS_FILE_SYNC;
@@ -950,7 +977,7 @@ static int nfs_do_multiple_writes(struct list_head *head,
while (!list_empty(head)) {
int ret2;

- data = list_entry(head->next, struct nfs_write_data, list);
+ data = list_first_entry(head, struct nfs_write_data, list);
list_del_init(&data->list);

ret2 = nfs_do_write(data, call_ops, how);
@@ -973,15 +1000,26 @@ static void nfs_redirty_request(struct nfs_page *req)
nfs_end_page_writeback(page);
}

+void nfs_async_write_error(struct list_head *head)
+{
+ struct nfs_page *req;
+
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_redirty_request(req);
+ }
+}
+
/*
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+ struct nfs_page *req = hdr->req;
struct page *page = req->wb_page;
- struct nfs_write_header *whdr;
struct nfs_write_data *data;
size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
@@ -989,6 +1027,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
int ret = 0;

nfs_list_remove_request(req);
+ nfs_list_add_request(req, &hdr->pages);

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
@@ -1001,28 +1040,27 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
do {
size_t len = min(nbytes, wsize);

- whdr = nfs_writehdr_alloc(1);
- if (!whdr)
+ data = nfs_writedata_alloc(hdr, 1);
+ if (!data)
goto out_bad;
- data = &whdr->rpc_data;
data->pages.pagevec[0] = page;
- nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
- list_add(&data->list, res);
+ nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags);
+ list_add(&data->list, &hdr->rpc_list);
requests++;
nbytes -= len;
offset += len;
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
- desc->pg_rpc_callops = &nfs_write_partial_ops;
+ desc->pg_rpc_callops = &nfs_write_common_ops;
return ret;

out_bad:
- while (!list_empty(res)) {
- data = list_entry(res->next, struct nfs_write_data, list);
+ while (!list_empty(&hdr->rpc_list)) {
+ data = list_first_entry(&hdr->rpc_list, struct nfs_write_data, list);
list_del(&data->list);
nfs_writedata_release(data);
}
- nfs_redirty_request(req);
+ nfs_async_write_error(&hdr->pages);
return -ENOMEM;
}

@@ -1034,64 +1072,74 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
struct nfs_page *req;
struct page **pages;
- struct nfs_write_header *whdr;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
int ret = 0;

- whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!whdr) {
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_redirty_request(req);
- }
+ data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!data) {
+ nfs_async_write_error(head);
ret = -ENOMEM;
goto out;
}
- data = &whdr->rpc_data;
+
pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_list_add_request(req, &whdr->header.pages);
+ nfs_list_add_request(req, &hdr->pages);
*pages++ = req->wb_page;
}
- req = nfs_list_entry(whdr->header.pages.next);

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;

/* Set up the argument struct */
- nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
- list_add(&data->list, res);
- desc->pg_rpc_callops = &nfs_write_full_ops;
+ nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags);
+ list_add(&data->list, &hdr->rpc_list);
+ desc->pg_rpc_callops = &nfs_write_common_ops;
out:
return ret;
}

-int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(desc, head);
- return nfs_flush_one(desc, head);
+ return nfs_flush_multi(desc, hdr);
+ return nfs_flush_one(desc, hdr);
}

static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_write_header *whdr;
+ struct nfs_pgio_header *hdr;
int ret;

- ret = nfs_generic_flush(desc, &head);
+ whdr = nfs_writehdr_alloc();
+ if (!whdr) {
+ nfs_async_write_error(&desc->pg_list);
+ return -ENOMEM;
+ }
+ hdr = &whdr->header;
+ nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_flush(desc, hdr);
if (ret == 0)
- ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
- desc->pg_ioflags);
+ ret = nfs_do_multiple_writes(&hdr->rpc_list,
+ desc->pg_rpc_callops,
+ desc->pg_ioflags);
+ else
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ nfs_write_completion(hdr);
return ret;
}

@@ -1121,62 +1169,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
nfs_pageio_init_write_mds(pgio, inode, ioflags);
}

-/*
- * Handle a write reply that flushed part of a page.
- */
-static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
-
- dprintk("NFS: %5u write(%s/%lld %d@%lld)",
- task->tk_pid,
- data->header->inode->i_sb->s_id,
- (long long)
- NFS_FILEID(data->header->inode),
- data->header->req->wb_bytes,
- (long long)req_offset(data->header->req));
-
- nfs_writeback_done(task, data);
-}
-
-static void nfs_writeback_release_partial(void *calldata)
-{
- struct nfs_write_data *data = calldata;
- struct nfs_page *req = data->header->req;
- struct page *page = req->wb_page;
- int status = data->task.tk_status;
-
- if (status < 0) {
- nfs_set_pageerror(page);
- nfs_context_set_write_error(req->wb_context, status);
- dprintk(", error = %d\n", status);
- goto out;
- }
-
- if (nfs_write_need_commit(data)) {
- struct inode *inode = page->mapping->host;
-
- spin_lock(&inode->i_lock);
- if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
- /* Do nothing we need to resend the writes */
- } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
- memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
- dprintk(" defer commit\n");
- } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
- set_bit(PG_NEED_RESCHED, &req->wb_flags);
- clear_bit(PG_NEED_COMMIT, &req->wb_flags);
- dprintk(" server reboot detected\n");
- }
- spin_unlock(&inode->i_lock);
- } else
- dprintk(" OK\n");
-
-out:
- if (atomic_dec_and_test(&req->wb_complete))
- nfs_writepage_release(req, data);
- nfs_writedata_release(data);
-}
-
void nfs_write_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
@@ -1190,12 +1182,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}

-static const struct rpc_call_ops nfs_write_partial_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_writeback_done_partial,
- .rpc_release = nfs_writeback_release_partial,
-};
-
/*
* Handle a write reply that flushes a whole page.
*
@@ -1203,60 +1189,37 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
*/
-static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
+static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;

nfs_writeback_done(task, data);
}

-static void nfs_writeback_release_full(void *calldata)
+static void nfs_writeback_release_common(void *calldata)
{
struct nfs_write_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
int status = data->task.tk_status;
+ struct nfs_page *req = hdr->req;

- /* Update attributes as result of writeback. */
- while (!list_empty(&hdr->pages)) {
- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
- struct page *page = req->wb_page;
-
- nfs_list_remove_request(req);
-
- dprintk("NFS: %5u write (%s/%lld %d@%lld)",
- data->task.tk_pid,
- req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
- req->wb_bytes,
- (long long)req_offset(req));
-
- if (status < 0) {
- nfs_set_pageerror(page);
- nfs_context_set_write_error(req->wb_context, status);
- dprintk(", error = %d\n", status);
- goto remove_request;
- }
-
- if (nfs_write_need_commit(data)) {
+ if ((status >= 0) && nfs_write_need_commit(data)) {
+ spin_lock(&hdr->lock);
+ if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
+ ; /* Do nothing */
+ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
- nfs_mark_request_commit(req, hdr->lseg);
- dprintk(" marked for commit\n");
- goto next;
- }
- dprintk(" OK\n");
-remove_request:
- nfs_inode_remove_request(req);
- next:
- nfs_unlock_request(req);
- nfs_end_page_writeback(page);
+ else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+ set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
+ spin_unlock(&hdr->lock);
}
nfs_writedata_release(data);
}

-static const struct rpc_call_ops nfs_write_full_ops = {
+static const struct rpc_call_ops nfs_write_common_ops = {
.rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_writeback_done_full,
- .rpc_release = nfs_writeback_release_full,
+ .rpc_call_done = nfs_writeback_done_common,
+ .rpc_release = nfs_writeback_release_common,
};


@@ -1307,38 +1270,40 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
}
}
#endif
- /* Is this a short write? */
- if (task->tk_status >= 0 && resp->count < argp->count) {
+ if (task->tk_status < 0)
+ nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
+ else if (resp->count < argp->count) {
static unsigned long complain;

+ /* This a short write! */
nfs_inc_stats(inode, NFSIOS_SHORTWRITE);

/* Has the server at least made some progress? */
- if (resp->count != 0) {
- /* Was this an NFSv2 write or an NFSv3 stable write? */
- if (resp->verf->committed != NFS_UNSTABLE) {
- /* Resend from where the server left off */
- data->mds_offset += resp->count;
- argp->offset += resp->count;
- argp->pgbase += resp->count;
- argp->count -= resp->count;
- } else {
- /* Resend as a stable write in order to avoid
- * headaches in the case of a server crash.
- */
- argp->stable = NFS_FILE_SYNC;
+ if (resp->count == 0) {
+ if (time_before(complain, jiffies)) {
+ printk(KERN_WARNING
+ "NFS: Server wrote zero bytes, expected %u.\n",
+ argp->count);
+ complain = jiffies + 300 * HZ;
}
- rpc_restart_call_prepare(task);
+ nfs_set_pgio_error(data->header, -EIO, argp->offset);
+ task->tk_status = -EIO;
return;
}
- if (time_before(complain, jiffies)) {
- printk(KERN_WARNING
- "NFS: Server wrote zero bytes, expected %u.\n",
- argp->count);
- complain = jiffies + 300 * HZ;
+ /* Was this an NFSv2 write or an NFSv3 stable write? */
+ if (resp->verf->committed != NFS_UNSTABLE) {
+ /* Resend from where the server left off */
+ data->mds_offset += resp->count;
+ argp->offset += resp->count;
+ argp->pgbase += resp->count;
+ argp->count -= resp->count;
+ } else {
+ /* Resend as a stable write in order to avoid
+ * headaches in the case of a server crash.
+ */
+ argp->stable = NFS_FILE_SYNC;
}
- /* Can't do anything about it except throw an error. */
- task->tk_status = -EIO;
+ rpc_restart_call_prepare(task);
}
}

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bf8fc99..694cfa4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1187,6 +1187,8 @@ enum {
NFS_IOHDR_ERROR = 0,
NFS_IOHDR_EOF,
NFS_IOHDR_REDO,
+ NFS_IOHDR_NEED_COMMIT,
+ NFS_IOHDR_NEED_RESCHED,
};

struct nfs_pgio_header {
--
1.7.2.1


2012-04-20 18:36:50

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 11/28] NFS: use req_offset where appropriate

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4filelayout.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 31afd81..c536328 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -776,8 +776,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
!nfs_generic_pg_test(pgio, prev, req))
return false;

- p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
- r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
+ p_stripe = (u64)req_offset(prev);
+ r_stripe = (u64)req_offset(req);
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;

do_div(p_stripe, stripe_unit);
@@ -896,8 +896,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
* to store the value calculated in filelayout_write_pagelist
* and just use that here.
*/
- j = nfs4_fl_calc_j_index(lseg,
- (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
+ j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets;
list = &buckets[i].written;
--
1.7.2.1


2012-04-20 18:36:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 05/28] NFS4.1: make pnfs_ld_[read|write]_done consistent

The two functions had diverged quite a bit, with the write function
being a bit more robust than the read.

However, these still break badly in the desc->pg_bsize < PAGE_CACHE_SIZE case,
as then there is nothing hanging on the data->pages list, and the resend
ends up doing nothing. This will be fixed in a patch later in the series.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 64 +++++++++++++++++++++++++++++++++-----------------------
1 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b5d4515..e4aee9d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1189,6 +1189,17 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *
return 0;
}

+static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+{
+ dprintk("pnfs write error = %d\n", data->pnfs_error);
+ if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+ PNFS_LAYOUTRET_ON_ERROR) {
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
+ pnfs_return_layout(data->inode);
+ }
+ data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
+}
+
/*
* Called by non rpc-based layout drivers
*/
@@ -1197,19 +1208,8 @@ void pnfs_ld_write_done(struct nfs_write_data *data)
if (likely(!data->pnfs_error)) {
pnfs_set_layoutcommit(data);
data->mds_ops->rpc_call_done(&data->task, data);
- } else {
- dprintk("pnfs write error = %d\n", data->pnfs_error);
- if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
- PNFS_LAYOUTRET_ON_ERROR) {
- /* Don't lo_commit on error, Server will needs to
- * preform a file recovery.
- */
- clear_bit(NFS_INO_LAYOUTCOMMIT,
- &NFS_I(data->inode)->flags);
- pnfs_return_layout(data->inode);
- }
- data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
- }
+ } else
+ pnfs_ld_handle_write_error(data);
put_lseg(data->lseg);
data->mds_ops->rpc_release(data);
}
@@ -1293,26 +1293,38 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);

-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head)
{
struct nfs_pageio_descriptor pgio;
+ LIST_HEAD(failed);

- put_lseg(data->lseg);
- data->lseg = NULL;
- dprintk("pnfs write error = %d\n", data->pnfs_error);
- if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
- PNFS_LAYOUTRET_ON_ERROR)
- pnfs_return_layout(data->inode);
-
- nfs_pageio_init_read_mds(&pgio, data->inode);
-
- while (!list_empty(&data->pages)) {
- struct nfs_page *req = nfs_list_entry(data->pages.next);
+ /* Resend all requests through the MDS */
+ nfs_pageio_init_read_mds(&pgio, inode);
+ while (!list_empty(head)) {
+ struct nfs_page *req = nfs_list_entry(head->next);

nfs_list_remove_request(req);
- nfs_pageio_add_request(&pgio, req);
+ if (!nfs_pageio_add_request(&pgio, req))
+ nfs_list_add_request(req, &failed);
}
nfs_pageio_complete(&pgio);
+
+ if (!list_empty(&failed)) {
+ list_move(&failed, head);
+ return -EIO;
+ }
+ return 0;
+}
+
+static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+{
+ dprintk("pnfs read error = %d\n", data->pnfs_error);
+ if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+ PNFS_LAYOUTRET_ON_ERROR) {
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
+ pnfs_return_layout(data->inode);
+ }
+ data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages);
}

/*
--
1.7.2.1


2012-04-20 18:36:53

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 25/28] NFS: add dreq to nfs_commit_info

Need this to pass into nfs_commitdata_init, in order to keep data->dreq
accurate.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 2 ++
include/linux/nfs_xdr.h | 1 +
2 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 333d01d..44a93d8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -506,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
cinfo->lock = &inode->i_lock;
cinfo->mds = &NFS_I(inode)->commit_info;
cinfo->ds = pnfs_get_ds_info(inode);
+ cinfo->dreq = NULL;
cinfo->completion_ops = &nfs_commit_completion_ops;
}

@@ -1431,6 +1432,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
data->lseg = lseg; /* reference transferred */
data->mds_ops = &nfs_commit_ops;
data->completion_ops = cinfo->completion_ops;
+ data->dreq = cinfo->dreq;

data->args.fh = NFS_FH(data->inode);
/* Note: we always request a commit of the entire inode */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 756152f..3251c22 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1269,6 +1269,7 @@ struct nfs_commit_info {
spinlock_t *lock;
struct nfs_mds_commit_info *mds;
struct pnfs_ds_commit_info *ds;
+ struct nfs_direct_req *dreq; /* O_DIRECT request */
const struct nfs_commit_completion_ops *completion_ops;
};

--
1.7.2.1


2012-04-20 18:36:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 04/28] NFS: grab open context in direct read

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 481be7f..8a89423 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -260,7 +260,7 @@ static void nfs_direct_read_release(void *calldata)

if (put_dreq(dreq))
nfs_direct_complete(dreq);
- nfs_readdata_free(data);
+ nfs_readdata_release(data);
}

static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -337,7 +337,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
data->inode = inode;
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
+ data->args.context = get_nfs_open_context(ctx);
data->args.lock_context = dreq->l_ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
--
1.7.2.1


2012-04-20 18:36:51

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 19/28] NFS: prepare coalesce testing for directio

The coalesce code made assumptions that will no longer be true once
non-page aligned io occurs. This introduces no change in
current behavior, but allows for more general situations to come.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4filelayout.c | 12 ++++++++++++
fs/nfs/pagelist.c | 2 +-
fs/nfs/pnfs.c | 8 ++++++++
3 files changed, 21 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 02d8170..e40523f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -796,6 +796,16 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
{
BUG_ON(pgio->pg_lseg != NULL);

+ if (req->wb_offset != req->wb_pgbase) {
+ /*
+ * Handling unaligned pages is difficult, because have to
+ * somehow split a req in two in certain cases in the
+ * pg.test code. Avoid this by just not using pnfs
+ * in this case.
+ */
+ nfs_pageio_reset_read_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
@@ -815,6 +825,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,

BUG_ON(pgio->pg_lseg != NULL);

+ if (req->wb_offset != req->wb_pgbase)
+ goto out_mds;
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 5d01a16..b344946 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -280,7 +280,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
if (req->wb_context->state != prev->wb_context->state)
return false;
- if (req->wb_index != (prev->wb_index + 1))
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
return false;
if (req->wb_pgbase != 0)
return false;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 4fa43e0..5e11557 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1082,6 +1082,10 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
{
BUG_ON(pgio->pg_lseg != NULL);

+ if (req->wb_offset != req->wb_pgbase) {
+ nfs_pageio_reset_read_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1100,6 +1104,10 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
{
BUG_ON(pgio->pg_lseg != NULL);

+ if (req->wb_offset != req->wb_pgbase) {
+ nfs_pageio_reset_write_mds(pgio);
+ return;
+ }
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
--
1.7.2.1


2012-04-20 18:36:53

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 22/28] NFS: create nfs_generic_commit_list

Simple refactoring.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 15 ++++++++++++---
1 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 705bf01..2500f1c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1523,6 +1523,17 @@ static const struct rpc_call_ops nfs_commit_ops = {
.rpc_release = nfs_commit_release,
};

+static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+ int how)
+{
+ int status;
+
+ status = pnfs_commit_list(inode, head, how);
+ if (status == PNFS_NOT_ATTEMPTED)
+ status = nfs_commit_list(inode, head, how);
+ return status;
+}
+
int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
@@ -1536,9 +1547,7 @@ int nfs_commit_inode(struct inode *inode, int how)
if (res) {
int error;

- error = pnfs_commit_list(inode, &head, how);
- if (error == PNFS_NOT_ATTEMPTED)
- error = nfs_commit_list(inode, &head, how);
+ error = nfs_generic_commit_list(inode, &head, how);
if (error < 0)
return error;
if (!may_wait)
--
1.7.2.1


2012-04-20 18:36:54

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 28/28] direct write

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f69ef97..d44de2f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -727,7 +727,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
if (dreq->error != 0)
bit = NFS_IOHDR_ERROR;
else {
- dreq->count += (hdr->first_error - req_offset(hdr->req));
+ dreq->count += hdr->good_bytes;
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
bit = NFS_IOHDR_NEED_RESCHED;
--
1.7.2.1


2012-04-20 18:36:54

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 27/28] NFS: rewrite directio write to use async coalesce code

This also has the advantage that it allows directio to use pnfs.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 527 +++++++++++++++++++++--------------------------
fs/nfs/internal.h | 18 ++-
fs/nfs/nfs4filelayout.c | 44 ++++-
fs/nfs/pnfs.h | 17 ++
fs/nfs/write.c | 36 ++--
5 files changed, 319 insertions(+), 323 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4ba9a2c..f69ef97 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -56,6 +56,7 @@

#include "internal.h"
#include "iostat.h"
+#include "pnfs.h"

#define NFSDBG_FACILITY NFSDBG_VFS

@@ -81,16 +82,19 @@ struct nfs_direct_req {
struct completion completion; /* wait for i/o completion */

/* commit state */
- struct list_head rewrite_list; /* saved nfs_write_data structs */
- struct nfs_commit_data *commit_data; /* special write_data for commits */
+ struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
+ struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
+ struct work_struct work;
int flags;
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
struct nfs_writeverf verf; /* unstable write verifier */
};

+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
-static const struct rpc_call_ops nfs_write_direct_ops;
+static void nfs_direct_write_schedule_work(struct work_struct *work);

static inline void get_dreq(struct nfs_direct_req *dreq)
{
@@ -131,6 +135,16 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
page_cache_release(pages[i]);
}

+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+ struct nfs_direct_req *dreq)
+{
+ cinfo->lock = &dreq->lock;
+ cinfo->mds = &dreq->mds_cinfo;
+ cinfo->ds = &dreq->ds_cinfo;
+ cinfo->dreq = dreq;
+ cinfo->completion_ops = &nfs_direct_commit_completion_ops;
+}
+
static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
struct nfs_direct_req *dreq;
@@ -142,7 +156,11 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
kref_init(&dreq->kref);
kref_get(&dreq->kref);
init_completion(&dreq->completion);
- INIT_LIST_HEAD(&dreq->rewrite_list);
+ dreq->mds_cinfo.ncommit = 0;
+ atomic_set(&dreq->mds_cinfo.rpcs_out, 0);
+ INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
+ memset(&dreq->ds_cinfo, 0, sizeof(dreq->ds_cinfo));
dreq->iocb = NULL;
dreq->ctx = NULL;
dreq->l_ctx = NULL;
@@ -457,112 +475,60 @@ out:
return result;
}

-static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
-{
- struct nfs_write_data *data = &whdr->rpc_data;
-
- if (data->pages.pagevec != data->pages.page_array)
- kfree(data->pages.pagevec);
- nfs_writehdr_free(&whdr->header);
-}
-
-static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
-{
- while (!list_empty(&dreq->rewrite_list)) {
- struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
- struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
- struct nfs_page_array *p = &whdr->rpc_data.pages;
-
- list_del(&hdr->pages);
- nfs_direct_release_pages(p->pagevec, p->npages);
- nfs_direct_writehdr_release(whdr);
- }
-}
-
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
- struct inode *inode = dreq->inode;
- struct list_head *p;
- struct nfs_write_data *data;
- struct nfs_pgio_header *hdr;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = dreq->ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_write_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
+ struct nfs_pageio_descriptor desc;
+ struct nfs_page *req, *tmp;
+ LIST_HEAD(reqs);
+ struct nfs_commit_info cinfo;
+ LIST_HEAD(failed);
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo);
+ spin_lock(cinfo.lock);
+ nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0);
+ spin_unlock(cinfo.lock);

dreq->count = 0;
get_dreq(dreq);

- list_for_each(p, &dreq->rewrite_list) {
- hdr = list_entry(p, struct nfs_pgio_header, pages);
- data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data;
-
- get_dreq(dreq);
-
- /* Use stable writes */
- data->args.stable = NFS_FILE_SYNC;
-
- /*
- * Reset data->res.
- */
- nfs_fattr_init(&data->fattr);
- data->res.count = data->args.count;
- memset(&data->verf, 0, sizeof(data->verf));
-
- /*
- * Reuse data->task; data->args should not have changed
- * since the original request was sent.
- */
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- /*
- * We're called via an RPC callback, so BKL is already held.
- */
- task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
-
- dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
- }
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
+ &nfs_direct_write_completion_ops);
+ desc.pg_dreq = dreq;

- if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, inode);
-}
+ list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
+ if (!nfs_pageio_add_request(&desc, req)) {
+ nfs_list_add_request(req, &failed);
+ spin_lock(cinfo.lock);
+ dreq->flags = 0;
+ dreq->error = -EIO;
+ spin_unlock(cinfo.lock);
+ }
+ }
+ nfs_pageio_complete(&desc);

-static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
-{
- struct nfs_commit_data *data = calldata;
+ while (!list_empty(&failed)) {
+ page_cache_release(req->wb_page);
+ nfs_release_request(req);
+ nfs_unlock_request(req);
+ }

- /* Call the NFS version-specific code */
- NFS_PROTO(data->inode)->commit_done(task, data);
+ if (put_dreq(dreq))
+ nfs_direct_write_complete(dreq, dreq->inode);
}

-static void nfs_direct_commit_release(void *calldata)
+static void nfs_direct_commit_complete(struct nfs_commit_data *data)
{
- struct nfs_commit_data *data = calldata;
struct nfs_direct_req *dreq = data->dreq;
+ struct nfs_commit_info cinfo;
+ struct nfs_page *req;
int status = data->task.tk_status;

+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
if (status < 0) {
dprintk("NFS: %5u commit failed with error %d.\n",
- data->task.tk_pid, status);
+ data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
@@ -570,59 +536,49 @@ static void nfs_direct_commit_release(void *calldata)
}

dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
- nfs_direct_write_complete(dreq, data->inode);
- nfs_commit_free(data);
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+ nfs_list_remove_request(req);
+ if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) {
+ /* Note the rewrite will go through mds */
+ nfs_mark_request_commit(req, NULL, &cinfo);
+ } else {
+ page_cache_release(req->wb_page);
+ nfs_release_request(req);
+ }
+ nfs_unlock_request(req);
+ }
+
+ if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+ nfs_direct_write_complete(dreq, data->inode);
}

-static const struct rpc_call_ops nfs_commit_direct_ops = {
- .rpc_call_prepare = nfs_commit_prepare,
- .rpc_call_done = nfs_direct_commit_result,
- .rpc_release = nfs_direct_commit_release,
+static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
+{
+ /* There is no lock to clear */
+}
+
+static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
+ .completion = nfs_direct_commit_complete,
+ .error_cleanup = nfs_direct_error_cleanup,
};

static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- struct nfs_commit_data *data = dreq->commit_data;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = dreq->ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .task = &data->task,
- .rpc_client = NFS_CLIENT(dreq->inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_commit_direct_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
-
- data->inode = dreq->inode;
- data->cred = msg.rpc_cred;
-
- data->args.fh = NFS_FH(data->inode);
- data->args.offset = 0;
- data->args.count = 0;
- data->res.fattr = &data->fattr;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-
- NFS_PROTO(data->inode)->commit_setup(data, &msg);
-
- /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
- dreq->commit_data = NULL;
-
- dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
-
- task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ int res;
+ struct nfs_commit_info cinfo;
+ LIST_HEAD(mds_list);
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
+ res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
+ if (res < 0) /* res == -ENOMEM */
+ nfs_direct_write_reschedule(dreq);
}

-static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+static void nfs_direct_write_schedule_work(struct work_struct *work)
{
+ struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
int flags = dreq->flags;

dreq->flags = 0;
@@ -634,90 +590,29 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
nfs_direct_write_reschedule(dreq);
break;
default:
- if (dreq->commit_data != NULL)
- nfs_commit_free(dreq->commit_data);
- nfs_direct_free_writedata(dreq);
- nfs_zap_mapping(inode, inode->i_mapping);
+ nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
nfs_direct_complete(dreq);
}
}

-static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- dreq->commit_data = nfs_commitdata_alloc();
- if (dreq->commit_data != NULL)
- dreq->commit_data->dreq = dreq;
+ schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
}
+
#else
-static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
-{
- dreq->commit_data = NULL;
-}

static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_direct_free_writedata(dreq);
nfs_zap_mapping(inode, inode->i_mapping);
nfs_direct_complete(dreq);
}
#endif

-static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
-
- nfs_writeback_done(task, data);
-}
-
/*
* NB: Return the value of the first error return code. Subsequent
* errors after the first one are ignored.
*/
-static void nfs_direct_write_release(void *calldata)
-{
- struct nfs_write_data *data = calldata;
- struct nfs_pgio_header *hdr = data->header;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req;
- int status = data->task.tk_status;
-
- spin_lock(&dreq->lock);
-
- if (unlikely(status < 0)) {
- /* An error has occurred, so we should not commit */
- dreq->flags = 0;
- dreq->error = status;
- }
- if (unlikely(dreq->error != 0))
- goto out_unlock;
-
- dreq->count += data->res.count;
-
- if (data->res.verf->committed != NFS_FILE_SYNC) {
- switch (dreq->flags) {
- case 0:
- memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
- dreq->flags = NFS_ODIRECT_DO_COMMIT;
- break;
- case NFS_ODIRECT_DO_COMMIT:
- if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
- dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- }
- }
- }
-out_unlock:
- spin_unlock(&dreq->lock);
-
- if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, hdr->inode);
-}
-
-static const struct rpc_call_ops nfs_write_direct_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_direct_write_result,
- .rpc_release = nfs_direct_write_release,
-};
-
/*
* For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
* operation. If nfs_writedata_alloc() or get_user_pages() fails,
@@ -725,143 +620,181 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
* handled automatically by nfs_direct_write_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
- loff_t pos, int sync)
+ loff_t pos)
{
+ struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_write_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
size_t wsize = NFS_SERVER(inode)->wsize;
unsigned int pgbase;
int result;
ssize_t started = 0;
+ struct page **pagevec = NULL;
+ unsigned int npages;

do {
- struct nfs_write_header *whdr;
- struct nfs_write_data *data;
- struct nfs_page_array *pages;
size_t bytes;
+ int i;

pgbase = user_addr & ~PAGE_MASK;
- bytes = min(wsize,count);
+ bytes = min(max(wsize, PAGE_SIZE), count);

result = -ENOMEM;
- whdr = nfs_writehdr_alloc();
- if (unlikely(!whdr))
- break;
-
- data = nfs_writedata_alloc(&whdr->header, nfs_page_array_len(pgbase, bytes));
- if (!data) {
- nfs_writehdr_free(&whdr->header);
+ npages = nfs_page_array_len(pgbase, bytes);
+ if (!pagevec)
+ pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
+ if (!pagevec)
break;
- }
- data->header = &whdr->header;
- atomic_inc(&data->header->refcnt);
- pages = &data->pages;

down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- pages->npages, 0, 0, pages->pagevec, NULL);
+ npages, 0, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
- if (result < 0) {
- nfs_direct_writehdr_release(whdr);
+ if (result < 0)
break;
- }
- if ((unsigned)result < pages->npages) {
+
+ if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(pages->pagevec, result);
- nfs_direct_writehdr_release(whdr);
+ nfs_direct_release_pages(pagevec, result);
break;
}
bytes -= pgbase;
- pages->npages = result;
+ npages = result;
}

- get_dreq(dreq);
-
- list_move_tail(&whdr->header.pages, &dreq->rewrite_list);
-
- whdr->header.req = (struct nfs_page *) dreq;
- whdr->header.inode = inode;
- whdr->header.cred = msg.rpc_cred;
- data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
- data->args.lock_context = dreq->l_ctx;
- data->args.offset = pos;
- data->args.pgbase = pgbase;
- data->args.pages = pages->pagevec;
- data->args.count = bytes;
- data->args.stable = sync;
- data->res.fattr = &data->fattr;
- data->res.count = bytes;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- break;
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);

- dprintk("NFS: %5u initiated direct write call "
- "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- task->tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- bytes,
- (unsigned long long)data->args.offset);
- rpc_put_task(task);
-
- started += bytes;
- user_addr += bytes;
- pos += bytes;
-
- /* FIXME: Remove this useless math from the final patch */
- pgbase += bytes;
- pgbase &= ~PAGE_MASK;
- BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
- count -= bytes;
+ req = nfs_create_request(dreq->ctx, dreq->inode,
+ pagevec[i],
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ nfs_direct_release_pages(pagevec + i,
+ npages - i);
+ result = PTR_ERR(req);
+ break;
+ }
+ nfs_lock_request(req);
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(desc, req)) {
+ result = desc->pg_error;
+ nfs_unlock_request(req);
+ nfs_release_request(req);
+ nfs_direct_release_pages(pagevec + i,
+ npages - i);
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ started += req_len;
+ user_addr += req_len;
+ pos += req_len;
+ count -= req_len;
+ }
} while (count != 0);

+ kfree(pagevec);
+
if (started)
return started;
return result < 0 ? (ssize_t) result : -EFAULT;
}

+static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
+{
+ struct nfs_direct_req *dreq = hdr->dreq;
+ struct nfs_commit_info cinfo;
+ int bit = -1;
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out_put;
+
+ nfs_init_cinfo_from_dreq(&cinfo, dreq);
+
+ spin_lock(&dreq->lock);
+
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ dreq->flags = 0;
+ dreq->error = hdr->error;
+ }
+ if (dreq->error != 0)
+ bit = NFS_IOHDR_ERROR;
+ else {
+ dreq->count += (hdr->first_error - req_offset(hdr->req));
+ if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ bit = NFS_IOHDR_NEED_RESCHED;
+ } else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+ if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
+ bit = NFS_IOHDR_NEED_RESCHED;
+ else if (dreq->flags == 0) {
+ memcpy(&dreq->verf, &req->wb_verf,
+ sizeof(dreq->verf));
+ bit = NFS_IOHDR_NEED_COMMIT;
+ dreq->flags = NFS_ODIRECT_DO_COMMIT;
+ } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
+ if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ bit = NFS_IOHDR_NEED_RESCHED;
+ } else
+ bit = NFS_IOHDR_NEED_COMMIT;
+ }
+ }
+ }
+ spin_unlock(&dreq->lock);
+
+ while (!list_empty(&hdr->pages)) {
+ req = nfs_list_entry(hdr->pages.next);
+ nfs_list_remove_request(req);
+ switch (bit) {
+ case NFS_IOHDR_NEED_RESCHED:
+ case NFS_IOHDR_NEED_COMMIT:
+ nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+ break;
+ default:
+ page_cache_release(req->wb_page);
+ nfs_release_request(req);
+ }
+ nfs_unlock_request(req);
+ }
+
+out_put:
+ if (put_dreq(dreq))
+ nfs_direct_write_complete(dreq, hdr->inode);
+ hdr->release(hdr);
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
+ .error_cleanup = nfs_sync_pgio_error,
+ .init_hdr = nfs_direct_pgio_init,
+ .completion = nfs_direct_write_completion,
+};
+
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
- loff_t pos, int sync)
+ loff_t pos)
{
+ struct nfs_pageio_descriptor desc;
ssize_t result = 0;
size_t requested_bytes = 0;
unsigned long seg;

+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
+ &nfs_direct_write_completion_ops);
+ desc.pg_dreq = dreq;
get_dreq(dreq);

for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(dreq, vec,
- pos, sync);
+ result = nfs_direct_write_schedule_segment(&desc, vec, pos);
if (result < 0)
break;
requested_bytes += result;
@@ -869,6 +802,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
pos += vec->iov_len;
}
+ nfs_pageio_complete(&desc);

/*
* If no bytes were started, return the error, and let the
@@ -891,16 +825,10 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
- size_t wsize = NFS_SERVER(inode)->wsize;
- int sync = NFS_UNSTABLE;

dreq = nfs_direct_req_alloc();
if (!dreq)
goto out;
- nfs_alloc_commit_data(dreq);
-
- if (dreq->commit_data == NULL || count <= wsize)
- sync = NFS_FILE_SYNC;

dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@ -910,7 +838,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;

- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
+ result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
if (!result)
result = nfs_direct_wait(dreq);
out_release:
@@ -1030,10 +958,15 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
task_io_account_write(count);

retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
+ if (retval > 0) {
+ struct inode *inode = mapping->host;

- if (retval > 0)
iocb->ki_pos = pos + retval;
-
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < iocb->ki_pos)
+ i_size_write(inode, iocb->ki_pos);
+ spin_unlock(&inode->i_lock);
+ }
out:
return retval;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 3d45213..12d3818 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -320,10 +320,11 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);

/* write.c */
+extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern struct nfs_write_header *nfs_writehdr_alloc(void);
extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
-extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount);
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
@@ -346,6 +347,15 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
+int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max);
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo);
+void nfs_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+ int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
@@ -365,6 +375,10 @@ extern int nfs_migrate_page(struct address_space *,
#define nfs_migrate_page NULL
#endif

+/* direct.c */
+void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
+ struct nfs_direct_req *dreq);
+
/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_read_data *);
extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 26d1da4..806a55f 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -996,12 +996,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
}

static int
-filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
- struct nfs_commit_info *cinfo,
- int max)
+transfer_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max)
{
- struct list_head *src = &bucket->written;
- struct list_head *dst = &bucket->committing;
struct nfs_page *req, *tmp;
int ret = 0;

@@ -1014,9 +1011,22 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
nfs_list_add_request(req, dst);
ret++;
- if (ret == max)
+ if ((ret == max) && !cinfo->dreq)
break;
}
+ return ret;
+}
+
+static int
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo,
+ int max)
+{
+ struct list_head *src = &bucket->written;
+ struct list_head *dst = &bucket->committing;
+ int ret;
+
+ ret = transfer_commit_list(src, dst, cinfo, max);
if (ret) {
cinfo->ds->nwritten -= ret;
cinfo->ds->ncommitting += ret;
@@ -1046,6 +1056,27 @@ static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
return rv;
}

+/* Pull everything off the committing lists and dump into @dst */
+static void filelayout_recover_commit_reqs(struct list_head *dst,
+ struct nfs_commit_info *cinfo)
+{
+ struct pnfs_commit_bucket *b;
+ int i;
+
+ /* NOTE cinfo->lock is NOT held, relying on fact that this is
+ * only called on single thread per dreq.
+ * Can't take the lock because need to do put_lseg
+ */
+ for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
+ if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
+ BUG_ON(!list_empty(&b->written));
+ put_lseg(b->wlseg);
+ b->wlseg = NULL;
+ }
+ }
+ cinfo->ds->nwritten = 0;
+}
+
static unsigned int
alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
{
@@ -1170,6 +1201,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.mark_request_commit = filelayout_mark_request_commit,
.clear_request_commit = filelayout_clear_request_commit,
.scan_commit_lists = filelayout_scan_commit_lists,
+ .recover_commit_reqs = filelayout_recover_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4cd8760..8efbee7 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -102,6 +102,8 @@ struct pnfs_layoutdriver_type {
struct nfs_commit_info *cinfo);
int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
int max);
+ void (*recover_commit_reqs) (struct list_head *list,
+ struct nfs_commit_info *cinfo);
int (*commit_pagelist)(struct inode *inode,
struct list_head *mds_pages,
int how,
@@ -323,6 +325,15 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
}

+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+ struct nfs_commit_info *cinfo)
+{
+ if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
+ return;
+ NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
+}
+
/* Should the pNFS client commit and return the layout upon a setattr */
static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode)
@@ -456,6 +467,12 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
return 0;
}

+static inline void
+pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
+ struct nfs_commit_info *cinfo)
+{
+}
+
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{
return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 56db9e7..fec214b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -39,9 +39,6 @@
/*
* Local function declarations
*/
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
- struct inode *inode, int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops);
static void nfs_redirty_request(struct nfs_page *req);
static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
@@ -87,8 +84,8 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
return p;
}

-struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
+static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
{
struct nfs_write_data *data, *prealloc;

@@ -518,14 +515,17 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo,
struct inode *inode,
struct nfs_direct_req *dreq)
{
- nfs_init_cinfo_from_inode(cinfo, inode);
+ if (dreq)
+ nfs_init_cinfo_from_dreq(cinfo, dreq);
+ else
+ nfs_init_cinfo_from_inode(cinfo, inode);
}
EXPORT_SYMBOL_GPL(nfs_init_cinfo);

/*
* Add a request to the inode's commit list.
*/
-static void
+void
nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo)
{
@@ -567,7 +567,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)
}

#else
-static void
+void
nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo)
{
@@ -632,7 +632,7 @@ nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
}

/* cinfo->lock held by caller */
-static int
+int
nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_commit_info *cinfo, int max)
{
@@ -647,7 +647,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
nfs_request_remove_commit_list(req, cinfo);
nfs_list_add_request(req, dst);
ret++;
- if (ret == max)
+ if ((ret == max) && !cinfo->dreq)
break;
}
return ret;
@@ -662,7 +662,7 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
* Moves requests from the inode's 'commit' request list.
* The requests are *not* checked to ensure that they form a contiguous set.
*/
-static int
+int
nfs_scan_commit(struct inode *inode, struct list_head *dst,
struct nfs_commit_info *cinfo)
{
@@ -686,8 +686,8 @@ static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
return 0;
}

-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst,
- struct nfs_commit_info *cinfo)
+int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
return 0;
}
@@ -1202,9 +1202,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);

-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
+void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
@@ -1568,8 +1568,8 @@ static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
.error_cleanup = nfs_commit_clear_lock,
};

-static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
- int how, struct nfs_commit_info *cinfo)
+int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
+ int how, struct nfs_commit_info *cinfo)
{
int status;

--
1.7.2.1


2012-04-20 18:36:53

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 23/28] NFS: create struct nfs_commit_info

It is COMMIT that is handled the most differently between
the paged and direct paths. Create a structure that encapsulates
everything either path needs to know about the commit state.

We could use void to hide some of the layout driver stuff, but
Trond suggests pulling it out to ensure type checking, given the
huge changes being made, and the fact that it doesn't interfere
with other drivers.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/inode.c | 6 +-
fs/nfs/internal.h | 12 +++-
fs/nfs/nfs4filelayout.c | 119 ++++++++++++++++++++----------------
fs/nfs/nfs4filelayout.h | 14 +----
fs/nfs/pnfs.h | 72 ++++++++++++++-------
fs/nfs/write.c | 158 +++++++++++++++++++++++++++-------------------
include/linux/nfs_fs.h | 5 +-
include/linux/nfs_xdr.h | 27 ++++++++
8 files changed, 248 insertions(+), 165 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e8bbfa5..59a12c6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1547,7 +1547,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
nfsi->layout = NULL;
- atomic_set(&nfsi->commits_outstanding, 0);
+ atomic_set(&nfsi->commit_info.rpcs_out, 0);
#endif
}

@@ -1559,9 +1559,9 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&nfsi->open_files);
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
- INIT_LIST_HEAD(&nfsi->commit_list);
+ INIT_LIST_HEAD(&nfsi->commit_info.list);
nfsi->npages = 0;
- nfsi->ncommit = 0;
+ nfsi->commit_info.ncommit = 0;
atomic_set(&nfsi->silly_count, 1);
INIT_HLIST_HEAD(&nfsi->silly_list);
init_waitqueue_head(&nfsi->waitqueue);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 50d85e5..53d148b 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -346,12 +346,18 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg);
void nfs_retry_commit(struct list_head *page_list,
- struct pnfs_layout_segment *lseg);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
void nfs_commit_clear_lock(struct nfs_inode *nfsi);
void nfs_commitdata_release(struct nfs_commit_data *data);
void nfs_commit_release_pages(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
-void nfs_request_remove_commit_list(struct nfs_page *req);
+void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+ struct nfs_commit_info *cinfo);
+void nfs_request_remove_commit_list(struct nfs_page *req,
+ struct nfs_commit_info *cinfo);
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+ struct inode *inode,
+ struct nfs_direct_req *dreq);

#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e40523f..fe2cb55 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -347,9 +347,11 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
static void filelayout_commit_release(void *calldata)
{
struct nfs_commit_data *data = calldata;
+ struct nfs_commit_info cinfo;

nfs_commit_release_pages(data);
- if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding))
+ nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+ if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
nfs_commit_clear_lock(NFS_I(data->inode));
put_lseg(data->lseg);
nfs_commitdata_release(data);
@@ -695,17 +697,16 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)

static int
filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo,
gfp_t gfp_flags)
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
- struct nfs4_filelayout *flo = FILELAYOUT_FROM_HDR(lseg->pls_layout);
-
- struct nfs4_fl_commit_bucket *buckets;
+ struct pnfs_commit_bucket *buckets;
int size;

if (fl->commit_through_mds)
return 0;
- if (flo->commit_info.nbuckets != 0) {
+ if (cinfo->ds->nbuckets != 0) {
/* This assumes there is only one IOMODE_RW lseg. What
* we really want to do is have a layout_hdr level
* dictionary of <multipath_list4, fh> keys, each
@@ -718,25 +719,25 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
size = (fl->stripe_type == STRIPE_SPARSE) ?
fl->dsaddr->ds_num : fl->dsaddr->stripe_count;

- buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket),
+ buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
gfp_flags);
if (!buckets)
return -ENOMEM;
else {
int i;

- spin_lock(&lseg->pls_layout->plh_inode->i_lock);
- if (flo->commit_info.nbuckets != 0)
+ spin_lock(cinfo->lock);
+ if (cinfo->ds->nbuckets != 0)
kfree(buckets);
else {
- flo->commit_info.buckets = buckets;
- flo->commit_info.nbuckets = size;
+ cinfo->ds->buckets = buckets;
+ cinfo->ds->nbuckets = size;
for (i = 0; i < size; i++) {
INIT_LIST_HEAD(&buckets[i].written);
INIT_LIST_HEAD(&buckets[i].committing);
}
}
- spin_unlock(&lseg->pls_layout->plh_inode->i_lock);
+ spin_unlock(cinfo->lock);
return 0;
}
}
@@ -821,6 +822,7 @@ static void
filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
+ struct nfs_commit_info cinfo;
int status;

BUG_ON(pgio->pg_lseg != NULL);
@@ -836,7 +838,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
- status = filelayout_alloc_commit_info(pgio->pg_lseg, GFP_NOFS);
+ nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
+ status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
if (status < 0) {
put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -871,40 +874,42 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
* If this will make the bucket empty, it will need to put the lseg reference.
*/
static void
-filelayout_clear_request_commit(struct nfs_page *req)
+filelayout_clear_request_commit(struct nfs_page *req,
+ struct nfs_commit_info *cinfo)
{
struct pnfs_layout_segment *freeme = NULL;
- struct inode *inode = req->wb_context->dentry->d_inode;

- spin_lock(&inode->i_lock);
+ spin_lock(cinfo->lock);
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out;
+ cinfo->ds->nwritten--;
if (list_is_singular(&req->wb_list)) {
- struct nfs4_fl_commit_bucket *bucket;
+ struct pnfs_commit_bucket *bucket;

bucket = list_first_entry(&req->wb_list,
- struct nfs4_fl_commit_bucket,
+ struct pnfs_commit_bucket,
written);
freeme = bucket->wlseg;
bucket->wlseg = NULL;
}
out:
- nfs_request_remove_commit_list(req);
- spin_unlock(&inode->i_lock);
+ nfs_request_remove_commit_list(req, cinfo);
+ spin_unlock(cinfo->lock);
put_lseg(freeme);
}

static struct list_head *
filelayout_choose_commit_list(struct nfs_page *req,
- struct pnfs_layout_segment *lseg)
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j;
struct list_head *list;
- struct nfs4_fl_commit_bucket *buckets;
+ struct pnfs_commit_bucket *buckets;

if (fl->commit_through_mds)
- return &NFS_I(req->wb_context->dentry->d_inode)->commit_list;
+ return &cinfo->mds->list;

/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
@@ -914,7 +919,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
*/
j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
- buckets = FILELAYOUT_FROM_HDR(lseg->pls_layout)->commit_info.buckets;
+ buckets = cinfo->ds->buckets;
list = &buckets[i].written;
if (list_empty(list)) {
/* Non-empty buckets hold a reference on the lseg. That ref
@@ -926,17 +931,19 @@ filelayout_choose_commit_list(struct nfs_page *req,
buckets[i].wlseg = get_lseg(lseg);
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+ cinfo->ds->nwritten++;
return list;
}

static void
filelayout_mark_request_commit(struct nfs_page *req,
- struct pnfs_layout_segment *lseg)
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct list_head *list;

- list = filelayout_choose_commit_list(req, lseg);
- nfs_request_add_commit_list(req, list);
+ list = filelayout_choose_commit_list(req, lseg, cinfo);
+ nfs_request_add_commit_list(req, list, cinfo);
}

static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -993,8 +1000,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
}

static int
-filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
- spinlock_t *lock)
+filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
+ struct nfs_commit_info *cinfo,
+ int max)
{
struct list_head *src = &bucket->written;
struct list_head *dst = &bucket->committing;
@@ -1004,9 +1012,9 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
list_for_each_entry_safe(req, tmp, src, wb_list) {
if (!nfs_lock_request(req))
continue;
- if (cond_resched_lock(lock))
+ if (cond_resched_lock(cinfo->lock))
list_safe_reset_next(req, tmp, wb_list);
- nfs_request_remove_commit_list(req);
+ nfs_request_remove_commit_list(req, cinfo);
clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
nfs_list_add_request(req, dst);
ret++;
@@ -1014,6 +1022,8 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
break;
}
if (ret) {
+ cinfo->ds->nwritten -= ret;
+ cinfo->ds->ncommitting += ret;
bucket->clseg = bucket->wlseg;
if (list_empty(src))
bucket->wlseg = NULL;
@@ -1024,37 +1034,32 @@ filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max,
}

/* Move reqs from written to committing lists, returning count of number moved.
- * Note called with i_lock held.
+ * Note called with cinfo->lock held.
*/
-static int filelayout_scan_commit_lists(struct inode *inode, int max,
- spinlock_t *lock)
+static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo,
+ int max)
{
- struct nfs4_fl_commit_info *fl_cinfo;
int i, rv = 0, cnt;

- fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
- if (fl_cinfo->nbuckets == 0)
- goto out_done;
- for (i = 0; i < fl_cinfo->nbuckets && max != 0; i++) {
- cnt = filelayout_scan_ds_commit_list(&fl_cinfo->buckets[i],
- max, lock);
+ for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) {
+ cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i],
+ cinfo, max);
max -= cnt;
rv += cnt;
}
-out_done:
return rv;
}

static unsigned int
-alloc_ds_commits(struct inode *inode, struct list_head *list)
+alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
{
- struct nfs4_fl_commit_info *fl_cinfo;
- struct nfs4_fl_commit_bucket *bucket;
+ struct pnfs_ds_commit_info *fl_cinfo;
+ struct pnfs_commit_bucket *bucket;
struct nfs_commit_data *data;
int i, j;
unsigned int nreq = 0;

- fl_cinfo = &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+ fl_cinfo = cinfo->ds;
bucket = fl_cinfo->buckets;
for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
if (list_empty(&bucket->committing))
@@ -1073,7 +1078,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) {
if (list_empty(&bucket->committing))
continue;
- nfs_retry_commit(&bucket->committing, bucket->clseg);
+ nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
put_lseg(bucket->clseg);
bucket->clseg = NULL;
}
@@ -1084,7 +1089,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
/* This follows nfs_commit_list pretty closely */
static int
filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
- int how)
+ int how, struct nfs_commit_info *cinfo)
{
struct nfs_commit_data *data, *tmp;
LIST_HEAD(list);
@@ -1097,17 +1102,17 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
list_add(&data->pages, &list);
nreq++;
} else
- nfs_retry_commit(mds_pages, NULL);
+ nfs_retry_commit(mds_pages, NULL, cinfo);
}

- nreq += alloc_ds_commits(inode, &list);
+ nreq += alloc_ds_commits(cinfo, &list);

if (nreq == 0) {
nfs_commit_clear_lock(NFS_I(inode));
goto out;
}

- atomic_add(nreq, &NFS_I(inode)->commits_outstanding);
+ atomic_add(nreq, &cinfo->mds->rpcs_out);

list_for_each_entry_safe(data, tmp, &list, pages) {
list_del_init(&data->pages);
@@ -1116,14 +1121,15 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
nfs_initiate_commit(NFS_CLIENT(inode), data,
data->mds_ops, how);
} else {
- struct nfs4_fl_commit_info *fl_cinfo;
+ struct pnfs_commit_bucket *buckets;

- fl_cinfo = &FILELAYOUT_FROM_HDR(data->lseg->pls_layout)->commit_info;
- nfs_init_commit(data, &fl_cinfo->buckets[data->ds_commit_index].committing, data->lseg);
+ buckets = cinfo->ds->buckets;
+ nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg);
filelayout_initiate_commit(data, how);
}
}
out:
+ cinfo->ds->ncommitting = 0;
return PNFS_ATTEMPTED;
}

@@ -1148,6 +1154,12 @@ filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
kfree(FILELAYOUT_FROM_HDR(lo));
}

+static struct pnfs_ds_commit_info *
+filelayout_get_ds_info(struct inode *inode)
+{
+ return &FILELAYOUT_FROM_HDR(NFS_I(inode)->layout)->commit_info;
+}
+
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
@@ -1158,6 +1170,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.free_lseg = filelayout_free_lseg,
.pg_read_ops = &filelayout_pg_read_ops,
.pg_write_ops = &filelayout_pg_write_ops,
+ .get_ds_info = &filelayout_get_ds_info,
.mark_request_commit = filelayout_mark_request_commit,
.clear_request_commit = filelayout_clear_request_commit,
.scan_commit_lists = filelayout_scan_commit_lists,
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 333a3ac..96b89bb 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -74,18 +74,6 @@ struct nfs4_file_layout_dsaddr {
struct nfs4_pnfs_ds *ds_list[1];
};

-struct nfs4_fl_commit_bucket {
- struct list_head written;
- struct list_head committing;
- struct pnfs_layout_segment *wlseg;
- struct pnfs_layout_segment *clseg;
-};
-
-struct nfs4_fl_commit_info {
- int nbuckets;
- struct nfs4_fl_commit_bucket *buckets;
-};
-
struct nfs4_filelayout_segment {
struct pnfs_layout_segment generic_hdr;
u32 stripe_type;
@@ -100,7 +88,7 @@ struct nfs4_filelayout_segment {

struct nfs4_filelayout {
struct pnfs_layout_hdr generic_hdr;
- struct nfs4_fl_commit_info commit_info;
+ struct pnfs_ds_commit_info commit_info;
};

static inline struct nfs4_filelayout *
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 734e4ef..4cd8760 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,11 +94,18 @@ struct pnfs_layoutdriver_type {
const struct nfs_pageio_ops *pg_read_ops;
const struct nfs_pageio_ops *pg_write_ops;

+ struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode);
void (*mark_request_commit) (struct nfs_page *req,
- struct pnfs_layout_segment *lseg);
- void (*clear_request_commit) (struct nfs_page *req);
- int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock);
- int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
+ void (*clear_request_commit) (struct nfs_page *req,
+ struct nfs_commit_info *cinfo);
+ int (*scan_commit_lists) (struct nfs_commit_info *cinfo,
+ int max);
+ int (*commit_pagelist)(struct inode *inode,
+ struct list_head *mds_pages,
+ int how,
+ struct nfs_commit_info *cinfo);

/*
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
@@ -263,49 +270,57 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss)
}

static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+ struct nfs_commit_info *cinfo)
{
- if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
+ if (cinfo->ds == NULL || cinfo->ds->ncommitting == 0)
return PNFS_NOT_ATTEMPTED;
- return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
+ return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how, cinfo);
+}
+
+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+ struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
+
+ if (ld == NULL || ld->get_ds_info == NULL)
+ return NULL;
+ return ld->get_ds_info(inode);
}

static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;

if (lseg == NULL || ld->mark_request_commit == NULL)
return false;
- ld->mark_request_commit(req, lseg);
+ ld->mark_request_commit(req, lseg, cinfo);
return true;
}

static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;

if (ld == NULL || ld->clear_request_commit == NULL)
return false;
- ld->clear_request_commit(req);
+ ld->clear_request_commit(req, cinfo);
return true;
}

static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+ int max)
{
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
- int ret;
-
- if (ld == NULL || ld->scan_commit_lists == NULL)
+ if (cinfo->ds == NULL || cinfo->ds->nwritten == 0)
return 0;
- ret = ld->scan_commit_lists(inode, max, lock);
- if (ret != 0)
- set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
- return ret;
+ else
+ return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max);
}

/* Should the pNFS client commit and return the layout upon a setattr */
@@ -409,25 +424,34 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st
}

static inline int
-pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
+pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
+ struct nfs_commit_info *cinfo)
{
return PNFS_NOT_ATTEMPTED;
}

+static inline struct pnfs_ds_commit_info *
+pnfs_get_ds_info(struct inode *inode)
+{
+ return NULL;
+}
+
static inline bool
-pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
return false;
}

static inline bool
-pnfs_clear_request_commit(struct nfs_page *req)
+pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
return false;
}

static inline int
-pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock)
+pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo,
+ int max)
{
return 0;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2500f1c..18bf700 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -452,65 +452,79 @@ nfs_mark_request_dirty(struct nfs_page *req)
/**
* nfs_request_add_commit_list - add request to a commit list
* @req: pointer to a struct nfs_page
- * @head: commit list head
+ * @dst: commit list head
+ * @cinfo: holds list lock and accounting info
*
- * This sets the PG_CLEAN bit, updates the inode global count of
+ * This sets the PG_CLEAN bit, updates the cinfo count of
* number of outstanding requests requiring a commit as well as
* the MM page stats.
*
- * The caller must _not_ hold the inode->i_lock, but must be
+ * The caller must _not_ hold the cinfo->lock, but must be
* holding the nfs_page lock.
*/
void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head)
+nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
-
set_bit(PG_CLEAN, &(req)->wb_flags);
- spin_lock(&inode->i_lock);
- nfs_list_add_request(req, head);
- NFS_I(inode)->ncommit++;
- spin_unlock(&inode->i_lock);
+ spin_lock(cinfo->lock);
+ nfs_list_add_request(req, dst);
+ cinfo->mds->ncommit++;
+ spin_unlock(cinfo->lock);
inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
- __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ __mark_inode_dirty(req->wb_context->dentry->d_inode, I_DIRTY_DATASYNC);
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);

/**
* nfs_request_remove_commit_list - Remove request from a commit list
* @req: pointer to a nfs_page
+ * @cinfo: holds list lock and accounting info
*
- * This clears the PG_CLEAN bit, and updates the inode global count of
+ * This clears the PG_CLEAN bit, and updates the cinfo's count of
* number of outstanding requests requiring a commit
* It does not update the MM page stats.
*
- * The caller _must_ hold the inode->i_lock and the nfs_page lock.
+ * The caller _must_ hold the cinfo->lock and the nfs_page lock.
*/
void
-nfs_request_remove_commit_list(struct nfs_page *req)
+nfs_request_remove_commit_list(struct nfs_page *req,
+ struct nfs_commit_info *cinfo)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
-
if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
return;
nfs_list_remove_request(req);
- NFS_I(inode)->ncommit--;
+ cinfo->mds->ncommit--;
}
EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);

+static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
+ struct inode *inode)
+{
+ cinfo->lock = &inode->i_lock;
+ cinfo->mds = &NFS_I(inode)->commit_info;
+ cinfo->ds = pnfs_get_ds_info(inode);
+}
+
+void nfs_init_cinfo(struct nfs_commit_info *cinfo,
+ struct inode *inode,
+ struct nfs_direct_req *dreq)
+{
+ nfs_init_cinfo_from_inode(cinfo, inode);
+}
+EXPORT_SYMBOL_GPL(nfs_init_cinfo);

/*
* Add a request to the inode's commit list.
*/
static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
-
- if (pnfs_mark_request_commit(req, lseg))
+ if (pnfs_mark_request_commit(req, lseg, cinfo))
return;
- nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list);
+ nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
}

static void
@@ -525,11 +539,13 @@ nfs_clear_request_commit(struct nfs_page *req)
{
if (test_bit(PG_CLEAN, &req->wb_flags)) {
struct inode *inode = req->wb_context->dentry->d_inode;
+ struct nfs_commit_info cinfo;

- if (!pnfs_clear_request_commit(req)) {
- spin_lock(&inode->i_lock);
- nfs_request_remove_commit_list(req);
- spin_unlock(&inode->i_lock);
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ if (!pnfs_clear_request_commit(req, &cinfo)) {
+ spin_lock(cinfo.lock);
+ nfs_request_remove_commit_list(req, &cinfo);
+ spin_unlock(cinfo.lock);
}
nfs_clear_page_commit(req->wb_page);
}
@@ -545,7 +561,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)

#else
static void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
+nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
}

@@ -564,10 +581,12 @@ int nfs_write_need_commit(struct nfs_write_data *data)

static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
+ struct nfs_commit_info cinfo;
unsigned long bytes = 0;

if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
+ nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
@@ -585,7 +604,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
goto next;
}
if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
- nfs_mark_request_commit(req, hdr->lseg);
+ nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
}
remove_req:
@@ -599,16 +618,16 @@ out:
}

#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-static int
-nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long
+nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
- return nfsi->ncommit > 0;
+ return cinfo->mds->ncommit;
}

-/* i_lock held by caller */
+/* cinfo->lock held by caller */
static int
-nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
- spinlock_t *lock)
+nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
+ struct nfs_commit_info *cinfo, int max)
{
struct nfs_page *req, *tmp;
int ret = 0;
@@ -616,9 +635,9 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
list_for_each_entry_safe(req, tmp, src, wb_list) {
if (!nfs_lock_request(req))
continue;
- if (cond_resched_lock(lock))
+ if (cond_resched_lock(cinfo->lock))
list_safe_reset_next(req, tmp, wb_list);
- nfs_request_remove_commit_list(req);
+ nfs_request_remove_commit_list(req, cinfo);
nfs_list_add_request(req, dst);
ret++;
if (ret == max)
@@ -630,37 +649,38 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max,
/*
* nfs_scan_commit - Scan an inode for commit requests
* @inode: NFS inode to scan
- * @dst: destination list
+ * @dst: mds destination list
+ * @cinfo: mds and ds lists of reqs ready to commit
*
* Moves requests from the inode's 'commit' request list.
* The requests are *not* checked to ensure that they form a contiguous set.
*/
static int
-nfs_scan_commit(struct inode *inode, struct list_head *dst)
+nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
- struct nfs_inode *nfsi = NFS_I(inode);
int ret = 0;

- spin_lock(&inode->i_lock);
- if (nfsi->ncommit > 0) {
+ spin_lock(cinfo->lock);
+ if (cinfo->mds->ncommit > 0) {
const int max = INT_MAX;

- ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max,
- &inode->i_lock);
- ret += pnfs_scan_commit_lists(inode, max - ret,
- &inode->i_lock);
+ ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
+ cinfo, max);
+ ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(cinfo->lock);
return ret;
}

#else
-static inline int nfs_need_commit(struct nfs_inode *nfsi)
+static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return 0;
}

-static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst)
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst,
+ struct nfs_commit_info *cinfo)
{
return 0;
}
@@ -929,7 +949,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
*/
static void nfs_write_rpcsetup(struct nfs_write_data *data,
unsigned int count, unsigned int offset,
- int how)
+ int how, struct nfs_commit_info *cinfo)
{
struct nfs_page *req = data->header->req;

@@ -950,7 +970,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data,
case 0:
break;
case FLUSH_COND_STABLE:
- if (nfs_need_commit(NFS_I(data->header->inode)))
+ if (nfs_reqs_to_commit(cinfo))
break;
default:
data->args.stable = NFS_FILE_SYNC;
@@ -1034,12 +1054,14 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
unsigned int offset;
int requests = 0;
int ret = 0;
+ struct nfs_commit_info cinfo;

+ nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
nfs_list_remove_request(req);
nfs_list_add_request(req, &hdr->pages);

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
+ (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
desc->pg_count > wsize))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;

@@ -1053,7 +1075,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
if (!data)
goto out_bad;
data->pages.pagevec[0] = page;
- nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags);
+ nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
list_add(&data->list, &hdr->rpc_list);
requests++;
nbytes -= len;
@@ -1088,6 +1110,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
int ret = 0;
+ struct nfs_commit_info cinfo;

data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
desc->pg_count));
@@ -1097,6 +1120,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
goto out;
}

+ nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
@@ -1106,11 +1130,11 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
}

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
+ (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
desc->pg_ioflags &= ~FLUSH_COND_STABLE;

/* Set up the argument struct */
- nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags);
+ nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
list_add(&data->list, &hdr->rpc_list);
desc->pg_rpc_callops = &nfs_write_common_ops;
out:
@@ -1417,14 +1441,15 @@ void nfs_init_commit(struct nfs_commit_data *data,
EXPORT_SYMBOL_GPL(nfs_init_commit);

void nfs_retry_commit(struct list_head *page_list,
- struct pnfs_layout_segment *lseg)
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct nfs_page *req;

while (!list_empty(page_list)) {
req = nfs_list_entry(page_list->next);
nfs_list_remove_request(req);
- nfs_mark_request_commit(req, lseg);
+ nfs_mark_request_commit(req, lseg, cinfo);
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
BDI_RECLAIMABLE);
@@ -1437,7 +1462,8 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
* Commit dirty pages
*/
static int
-nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how,
+ struct nfs_commit_info *cinfo)
{
struct nfs_commit_data *data;

@@ -1450,7 +1476,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
nfs_init_commit(data, head, NULL);
return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
out_bad:
- nfs_retry_commit(head, NULL);
+ nfs_retry_commit(head, NULL, cinfo);
nfs_commit_clear_lock(NFS_I(inode));
return -ENOMEM;
}
@@ -1524,30 +1550,32 @@ static const struct rpc_call_ops nfs_commit_ops = {
};

static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
- int how)
+ int how, struct nfs_commit_info *cinfo)
{
int status;

- status = pnfs_commit_list(inode, head, how);
+ status = pnfs_commit_list(inode, head, how, cinfo);
if (status == PNFS_NOT_ATTEMPTED)
- status = nfs_commit_list(inode, head, how);
+ status = nfs_commit_list(inode, head, how, cinfo);
return status;
}

int nfs_commit_inode(struct inode *inode, int how)
{
LIST_HEAD(head);
+ struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
int res;

res = nfs_commit_set_lock(NFS_I(inode), may_wait);
if (res <= 0)
goto out_mark_dirty;
- res = nfs_scan_commit(inode, &head);
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+ res = nfs_scan_commit(inode, &head, &cinfo);
if (res) {
int error;

- error = nfs_generic_commit_list(inode, &head, how);
+ error = nfs_generic_commit_list(inode, &head, how, &cinfo);
if (error < 0)
return error;
if (!may_wait)
@@ -1578,14 +1606,14 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
int ret = 0;

/* no commits means nothing needs to be done */
- if (!nfsi->ncommit)
+ if (!nfsi->commit_info.ncommit)
return ret;

if (wbc->sync_mode == WB_SYNC_NONE) {
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
- if (nfsi->ncommit <= (nfsi->npages >> 1))
+ if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
goto out_mark_dirty;

/* don't wait for the COMMIT response */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8d3a2b8..8a88c16 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -179,8 +179,7 @@ struct nfs_inode {
__be32 cookieverf[2];

unsigned long npages;
- unsigned long ncommit;
- struct list_head commit_list;
+ struct nfs_mds_commit_info commit_info;

/* Open contexts for shared mmap writes */
struct list_head open_files;
@@ -201,7 +200,6 @@ struct nfs_inode {

/* pNFS layout information */
struct pnfs_layout_hdr *layout;
- atomic_t commits_outstanding;
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE
struct fscache_cookie *fscache;
@@ -230,7 +228,6 @@ struct nfs_inode {
#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
-#define NFS_INO_PNFS_COMMIT (8) /* use pnfs code for commit */
#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 938d30a..2884ced 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1074,6 +1074,21 @@ struct nfstime4 {
};

#ifdef CONFIG_NFS_V4_1
+
+struct pnfs_commit_bucket {
+ struct list_head written;
+ struct list_head committing;
+ struct pnfs_layout_segment *wlseg;
+ struct pnfs_layout_segment *clseg;
+};
+
+struct pnfs_ds_commit_info {
+ int nwritten;
+ int ncommitting;
+ int nbuckets;
+ struct pnfs_commit_bucket *buckets;
+};
+
#define NFS4_EXCHANGE_ID_LEN (48)
struct nfs41_exchange_id_args {
struct nfs_client *client;
@@ -1237,6 +1252,18 @@ struct nfs_write_header {
struct nfs_write_data rpc_data;
};

+struct nfs_mds_commit_info {
+ atomic_t rpcs_out;
+ unsigned long ncommit;
+ struct list_head list;
+};
+
+struct nfs_commit_info {
+ spinlock_t *lock;
+ struct nfs_mds_commit_info *mds;
+ struct pnfs_ds_commit_info *ds;
+};
+
struct nfs_commit_data {
struct rpc_task task;
struct inode *inode;
--
1.7.2.1


2012-04-20 18:36:52

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 20/28] NFS: rewrite directio read to use async coalesce code

This also has the advantage that it allows directio to use pnfs.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 248 +++++++++++++++++++++------------------------
fs/nfs/internal.h | 5 +-
fs/nfs/pagelist.c | 7 +-
fs/nfs/read.c | 10 +-
include/linux/nfs_page.h | 1 +
include/linux/nfs_xdr.h | 4 +-
6 files changed, 131 insertions(+), 144 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 22a40c4..d713234 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -124,22 +124,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
return -EINVAL;
}

-static void nfs_direct_dirty_pages(struct page **pages, unsigned int pgbase, size_t count)
-{
- unsigned int npages;
- unsigned int i;
-
- if (count == 0)
- return;
- pages += (pgbase >> PAGE_SHIFT);
- npages = (count + (pgbase & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- for (i = 0; i < npages; i++) {
- struct page *page = pages[i];
- if (!PageCompound(page))
- set_page_dirty(page);
- }
-}
-
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
{
unsigned int i;
@@ -226,58 +210,85 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
nfs_direct_req_release(dreq);
}

-/*
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete. This could be long *after* we are woken up in
- * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
+void nfs_direct_readpage_release(struct nfs_page *req)
{
- struct nfs_read_data *data = calldata;
-
- nfs_readpage_result(task, data);
+ dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+ req->wb_context->dentry->d_inode->i_sb->s_id,
+ (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ req->wb_bytes,
+ (long long)req_offset(req));
+ nfs_release_request(req);
}

-static void nfs_direct_read_release(void *calldata)
+static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
{
+ unsigned long pos = req_offset(hdr->req);
+ struct nfs_direct_req *dreq = hdr->dreq;

- struct nfs_read_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req;
- int status = data->task.tk_status;
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out_put;

spin_lock(&dreq->lock);
- if (unlikely(status < 0)) {
- dreq->error = status;
- spin_unlock(&dreq->lock);
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
+ (hdr->first_error == pos))
+ dreq->error = hdr->error;
+ else
+ dreq->count += (hdr->first_error - pos);
+ spin_unlock(&dreq->lock);
+
+ if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct page *page = req->wb_page;
+
+ nfs_list_remove_request(req);
+ nfs_direct_readpage_release(req);
+ if (!PageCompound(page))
+ set_page_dirty(page);
+ page_cache_release(page);
+ }
} else {
- dreq->count += data->res.count;
- spin_unlock(&dreq->lock);
- nfs_direct_dirty_pages(data->pages.pagevec,
- data->args.pgbase,
- data->res.count);
+ pos &= PAGE_MASK;
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ if (pos < (hdr->first_error & PAGE_MASK))
+ if (!PageCompound(req->wb_page))
+ set_page_dirty(req->wb_page);
+ page_cache_release(req->wb_page);
+ nfs_list_remove_request(req);
+ nfs_direct_readpage_release(req);
+ pos += PAGE_SIZE;
+ }
}
- nfs_direct_release_pages(data->pages.pagevec, data->pages.npages);
-
+out_put:
if (put_dreq(dreq))
nfs_direct_complete(dreq);
- nfs_readdata_release(data);
+ hdr->release(hdr);
}

-static const struct rpc_call_ops nfs_read_direct_ops = {
- .rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_direct_read_result,
- .rpc_release = nfs_direct_read_release,
-};
-
-static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
+static void nfs_sync_pgio_error(struct list_head *head)
{
- struct nfs_read_data *data = &rhdr->rpc_data;
+ struct nfs_page *req;

- if (data->pages.pagevec != data->pages.page_array)
- kfree(data->pages.pagevec);
- nfs_readhdr_free(&rhdr->header);
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_release_request(req);
+ }
}

+static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
+{
+ get_dreq(hdr->dreq);
+}
+
+static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
+ .error_cleanup = nfs_sync_pgio_error,
+ .init_hdr = nfs_direct_pgio_init,
+ .completion = nfs_direct_read_completion,
+};
+
/*
* For each rsize'd chunk of the user's buffer, dispatch an NFS READ
* operation. If nfs_readdata_alloc() or get_user_pages() fails,
@@ -285,118 +296,85 @@ static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
+static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
loff_t pos)
{
+ struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
struct inode *inode = ctx->dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_cred = ctx->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
- .rpc_message = &msg,
- .callback_ops = &nfs_read_direct_ops,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC,
- };
unsigned int pgbase;
int result;
ssize_t started = 0;
+ struct page **pagevec = NULL;
+ unsigned int npages;

do {
- struct nfs_read_header *rhdr;
- struct nfs_read_data *data;
- struct nfs_page_array *pages;
size_t bytes;
+ int i;

pgbase = user_addr & ~PAGE_MASK;
- bytes = min(rsize,count);
+ bytes = min(max(rsize, PAGE_SIZE), count);

result = -ENOMEM;
- rhdr = nfs_readhdr_alloc();
- if (unlikely(!rhdr))
- break;
- data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
- if (!data) {
- nfs_readhdr_free(&rhdr->header);
+ npages = nfs_page_array_len(pgbase, bytes);
+ if (!pagevec)
+ pagevec = kmalloc(npages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (!pagevec)
break;
- }
- data->header = &rhdr->header;
- atomic_inc(&data->header->refcnt);
- pages = &data->pages;
-
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
- pages->npages, 1, 0, pages->pagevec, NULL);
+ npages, 1, 0, pagevec, NULL);
up_read(&current->mm->mmap_sem);
- if (result < 0) {
- nfs_direct_readhdr_release(rhdr);
+ if (result < 0)
break;
- }
- if ((unsigned)result < pages->npages) {
+ if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
- nfs_direct_release_pages(pages->pagevec, result);
- nfs_direct_readhdr_release(rhdr);
+ nfs_direct_release_pages(pagevec, result);
break;
}
bytes -= pgbase;
- pages->npages = result;
+ npages = result;
}

- get_dreq(dreq);
-
- rhdr->header.req = (struct nfs_page *) dreq;
- rhdr->header.inode = inode;
- rhdr->header.cred = msg.rpc_cred;
- data->args.fh = NFS_FH(inode);
- data->args.context = get_nfs_open_context(ctx);
- data->args.lock_context = dreq->l_ctx;
- data->args.offset = pos;
- data->args.pgbase = pgbase;
- data->args.pages = pages->pagevec;
- data->args.count = bytes;
- data->res.fattr = &data->fattr;
- data->res.eof = 0;
- data->res.count = bytes;
- nfs_fattr_init(&data->fattr);
- msg.rpc_argp = &data->args;
- msg.rpc_resp = &data->res;
-
- task_setup_data.task = &data->task;
- task_setup_data.callback_data = data;
- NFS_PROTO(inode)->read_setup(data, &msg);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- break;
-
- dprintk("NFS: %5u initiated direct read call "
- "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
- task->tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- bytes,
- (unsigned long long)data->args.offset);
- rpc_put_task(task);
-
- started += bytes;
- user_addr += bytes;
- pos += bytes;
- /* FIXME: Remove this unnecessary math from final patch */
- pgbase += bytes;
- pgbase &= ~PAGE_MASK;
- BUG_ON(pgbase != (user_addr & ~PAGE_MASK));
-
- count -= bytes;
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min(bytes, PAGE_SIZE - pgbase);
+ /* XXX do we need to do the eof zeroing found in async_filler? */
+ req = nfs_create_request(dreq->ctx, dreq->inode,
+ pagevec[i],
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ nfs_direct_release_pages(pagevec + i,
+ npages - i);
+ result = PTR_ERR(req);
+ break;
+ }
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(desc, req)) {
+ result = desc->pg_error;
+ nfs_release_request(req);
+ nfs_direct_release_pages(pagevec + i,
+ npages - i);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ started += req_len;
+ user_addr += req_len;
+ pos += req_len;
+ count -= req_len;
+ }
} while (count != 0);

+ kfree(pagevec);
+
if (started)
return started;
return result < 0 ? (ssize_t) result : -EFAULT;
@@ -407,15 +385,19 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
unsigned long nr_segs,
loff_t pos)
{
+ struct nfs_pageio_descriptor desc;
ssize_t result = -EINVAL;
size_t requested_bytes = 0;
unsigned long seg;

+ nfs_pageio_init_read(&desc, dreq->inode,
+ &nfs_direct_read_completion_ops);
get_dreq(dreq);
+ desc.pg_dreq = dreq;

for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(dreq, vec, pos);
+ result = nfs_direct_read_schedule_segment(&desc, vec, pos);
if (result < 0)
break;
requested_bytes += result;
@@ -424,6 +406,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
pos += vec->iov_len;
}

+ nfs_pageio_complete(&desc);
+
/*
* If no bytes were started, return the error, and let the
* generic layer handle the completion.
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f431e26..50d85e5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -304,8 +304,9 @@ struct nfs_pgio_completion_ops;
/* read.c */
extern struct nfs_read_header *nfs_readhdr_alloc(void);
extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
-extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount);
+extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern int nfs_initiate_read(struct rpc_clnt *clnt,
struct nfs_read_data *data,
const struct rpc_call_ops *call_ops);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index b344946..d6aa8368 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -48,8 +48,11 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->cred = hdr->req->wb_context->cred;
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = desc->pg_count;
+ hdr->dreq = desc->pg_dreq;
hdr->release = release;
hdr->completion_ops = desc->pg_completion_ops;
+ if (hdr->completion_ops->init_hdr)
+ hdr->completion_ops->init_hdr(hdr);
}

void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
@@ -116,9 +119,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_page = page;
req->wb_index = page->index;
page_cache_get(page);
- BUG_ON(PagePrivate(page));
- BUG_ON(!PageLocked(page));
- BUG_ON(page->mapping->host != inode);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -257,6 +257,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
+ desc->pg_dreq = NULL;
}

/**
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 5e78af1..35e2dce 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -51,8 +51,8 @@ struct nfs_read_header *nfs_readhdr_alloc()
return rhdr;
}

-struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
+static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
{
struct nfs_read_data *data, *prealloc;

@@ -123,9 +123,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);

-static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
+void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
nfs_pageio_init_read_mds(pgio, inode, compl_ops);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 0a5b63f..f9ee9eb 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -68,6 +68,7 @@ struct nfs_pageio_descriptor {
const struct rpc_call_ops *pg_rpc_callops;
const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
+ struct nfs_direct_req *pg_dreq;
};

#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2597f90..938d30a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1203,6 +1203,7 @@ struct nfs_pgio_header {
const struct rpc_call_ops *mds_ops;
void (*release) (struct nfs_pgio_header *hdr);
const struct nfs_pgio_completion_ops *completion_ops;
+ struct nfs_direct_req *dreq;
spinlock_t lock;
/* fields protected by lock */
int pnfs_error;
@@ -1216,8 +1217,6 @@ struct nfs_read_header {
struct nfs_read_data rpc_data;
};

-struct nfs_direct_req;
-
struct nfs_write_data {
struct nfs_pgio_header *header;
struct list_head list;
@@ -1259,6 +1258,7 @@ struct nfs_commit_data {

struct nfs_pgio_completion_ops {
void (*error_cleanup)(struct list_head *head);
+ void (*init_hdr)(struct nfs_pgio_header *hdr);
void (*completion)(struct nfs_pgio_header *hdr);
};

--
1.7.2.1


2012-04-20 18:36:51

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 17/28] NFS: create completion structure to pass into page_init functions

Factors out the code that will need to change when directio
starts using these code paths. This will allow directio to use
the generic pagein and flush routines

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/internal.h | 11 +++++------
fs/nfs/pagelist.c | 3 +++
fs/nfs/pnfs.c | 39 +++++++++++++++++++++++++--------------
fs/nfs/pnfs.h | 6 ++++--
fs/nfs/read.c | 36 ++++++++++++++++++++++--------------
fs/nfs/write.c | 41 ++++++++++++++++++++++++++---------------
include/linux/nfs_page.h | 2 ++
include/linux/nfs_xdr.h | 6 ++++++
8 files changed, 93 insertions(+), 51 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d05e352..f431e26 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -300,11 +300,10 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif

+struct nfs_pgio_completion_ops;
/* read.c */
-extern void nfs_async_read_error(struct list_head *head);
extern struct nfs_read_header *nfs_readhdr_alloc(void);
extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
-extern void nfs_read_completion(struct nfs_pgio_header *hdr);
extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
unsigned int pagecount);
extern int nfs_initiate_read(struct rpc_clnt *clnt,
@@ -314,21 +313,21 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode);
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);

/* write.c */
-extern void nfs_async_write_error(struct list_head *head);
extern struct nfs_write_header *nfs_writehdr_alloc(void);
extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
extern struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
unsigned int pagecount);
-extern void nfs_write_completion(struct nfs_pgio_header *hdr);
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags);
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_commit_data *p);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index cd4c038..4cf2a68 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
hdr->io_start = req_offset(hdr->req);
hdr->good_bytes = desc->pg_count;
hdr->release = release;
+ hdr->completion_ops = desc->pg_completion_ops;
}

void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
@@ -240,6 +241,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
+ const struct nfs_pgio_completion_ops *compl_ops,
size_t bsize,
int io_flags)
{
@@ -252,6 +254,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_recoalesce = 0;
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
+ desc->pg_completion_ops = compl_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2b89b54..4fa43e0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1113,26 +1113,31 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);

bool
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;

if (ld == NULL)
return false;
- nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+ nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
+ server->rsize, 0);
return true;
}

bool
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+ int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;

if (ld == NULL)
return false;
- nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+ nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
+ server->wsize, ioflags);
return true;
}

@@ -1162,13 +1167,15 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);

-static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head)
+static int pnfs_write_done_resend_to_mds(struct inode *inode,
+ struct list_head *head,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_pageio_descriptor pgio;
LIST_HEAD(failed);

/* Resend all requests through the MDS */
- nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE);
+ nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);

@@ -1201,7 +1208,8 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode,
- &hdr->pages);
+ &hdr->pages,
+ hdr->completion_ops);
}

/*
@@ -1292,7 +1300,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)

whdr = nfs_writehdr_alloc();
if (!whdr) {
- nfs_async_write_error(&desc->pg_list);
+ desc->pg_completion_ops->error_cleanup(&hdr->pages);
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return -ENOMEM;
@@ -1309,18 +1317,20 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
} else
pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
if (atomic_dec_and_test(&hdr->refcnt))
- nfs_write_completion(hdr);
+ hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);

-static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head)
+static int pnfs_read_done_resend_to_mds(struct inode *inode,
+ struct list_head *head,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
struct nfs_pageio_descriptor pgio;
LIST_HEAD(failed);

/* Resend all requests through the MDS */
- nfs_pageio_init_read_mds(&pgio, inode);
+ nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);

@@ -1349,7 +1359,8 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
}
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
- &hdr->pages);
+ &hdr->pages,
+ hdr->completion_ops);
}

/*
@@ -1443,7 +1454,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)

rhdr = nfs_readhdr_alloc();
if (!rhdr) {
- nfs_async_read_error(&desc->pg_list);
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM;
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
@@ -1461,7 +1472,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
} else
pnfs_do_multiple_reads(desc, &hdr->rpc_list);
if (atomic_dec_and_test(&hdr->refcnt))
- nfs_read_completion(hdr);
+ hdr->completion_ops->completion(hdr);
return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 442ebf6..734e4ef 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -168,8 +168,10 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);

-bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
-bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
+ const struct nfs_pgio_completion_ops *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
+ int, const struct nfs_pgio_completion_ops *);

void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c9633b2..5e78af1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,6 +31,7 @@

static const struct nfs_pageio_ops nfs_pageio_read_ops;
static const struct rpc_call_ops nfs_read_common_ops;
+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;

static struct kmem_cache *nfs_rdata_cachep;

@@ -95,7 +96,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
else
rdata->header = NULL;
if (atomic_dec_and_test(&hdr->refcnt))
- nfs_read_completion(hdr);
+ hdr->completion_ops->completion(hdr);
}

static
@@ -108,9 +109,10 @@ int nfs_return_empty_page(struct page *page)
}

void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+ nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
NFS_SERVER(inode)->rsize, 0);
}

@@ -122,10 +124,11 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);

static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode)
+ struct inode *inode,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- if (!pnfs_pageio_init_read(pgio, inode))
- nfs_pageio_init_read_mds(pgio, inode);
+ if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
+ nfs_pageio_init_read_mds(pgio, inode, compl_ops);
}

int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
@@ -146,7 +149,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);

- nfs_pageio_init_read(&pgio, inode);
+ nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
return 0;
@@ -170,7 +173,7 @@ static void nfs_readpage_release(struct nfs_page *req)
}

/* Note io was page aligned */
-void nfs_read_completion(struct nfs_pgio_header *hdr)
+static void nfs_read_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;

@@ -300,7 +303,7 @@ nfs_do_multiple_reads(struct list_head *head,
return ret;
}

-void
+static void
nfs_async_read_error(struct list_head *head)
{
struct nfs_page *req;
@@ -312,6 +315,11 @@ nfs_async_read_error(struct list_head *head)
}
}

+static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
+ .error_cleanup = nfs_async_read_error,
+ .completion = nfs_read_completion,
+};
+
/*
* Generate multiple requests to fill a single page.
*
@@ -362,7 +370,7 @@ out_bad:
list_del(&data->list);
nfs_readdata_release(data);
}
- nfs_async_read_error(&hdr->pages);
+ desc->pg_completion_ops->error_cleanup(&hdr->pages);
return -ENOMEM;
}

@@ -378,7 +386,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
desc->pg_count));
if (!data) {
- nfs_async_read_error(head);
+ desc->pg_completion_ops->error_cleanup(head);
ret = -ENOMEM;
goto out;
}
@@ -414,7 +422,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)

rhdr = nfs_readhdr_alloc();
if (!rhdr) {
- nfs_async_read_error(&desc->pg_list);
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM;
}
hdr = &rhdr->header;
@@ -427,7 +435,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
else
set_bit(NFS_IOHDR_REDO, &hdr->flags);
if (atomic_dec_and_test(&hdr->refcnt))
- nfs_read_completion(hdr);
+ hdr->completion_ops->completion(hdr);
return ret;
}

@@ -652,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */

- nfs_pageio_init_read(&pgio, inode);
+ nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);

ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 076075e..1503972 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -40,10 +40,12 @@
* Local function declarations
*/
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc,
- struct inode *inode, int ioflags);
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops);
static void nfs_redirty_request(struct nfs_page *req);
static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;

static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -128,7 +130,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
else
wdata->header = NULL;
if (atomic_dec_and_test(&hdr->refcnt))
- nfs_write_completion(hdr);
+ hdr->completion_ops->completion(hdr);
}

static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -337,7 +339,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
struct nfs_pageio_descriptor pgio;
int err;

- nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc));
+ nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+ &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
@@ -380,7 +383,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)

nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);

- nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
+ nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
+ &nfs_async_write_completion_ops);
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);

@@ -558,7 +562,7 @@ int nfs_write_need_commit(struct nfs_write_data *data)

#endif

-void nfs_write_completion(struct nfs_pgio_header *hdr)
+static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;

@@ -1000,7 +1004,7 @@ static void nfs_redirty_request(struct nfs_page *req)
nfs_end_page_writeback(page);
}

-void nfs_async_write_error(struct list_head *head)
+static void nfs_async_write_error(struct list_head *head)
{
struct nfs_page *req;

@@ -1011,6 +1015,11 @@ void nfs_async_write_error(struct list_head *head)
}
}

+static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
+ .error_cleanup = nfs_async_write_error,
+ .completion = nfs_write_completion,
+};
+
/*
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
@@ -1060,7 +1069,7 @@ out_bad:
list_del(&data->list);
nfs_writedata_release(data);
}
- nfs_async_write_error(&hdr->pages);
+ desc->pg_completion_ops->error_cleanup(&hdr->pages);
return -ENOMEM;
}

@@ -1084,7 +1093,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
desc->pg_count));
if (!data) {
- nfs_async_write_error(head);
+ desc->pg_completion_ops->error_cleanup(head);
ret = -ENOMEM;
goto out;
}
@@ -1125,7 +1134,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)

whdr = nfs_writehdr_alloc();
if (!whdr) {
- nfs_async_write_error(&desc->pg_list);
+ desc->pg_completion_ops->error_cleanup(&hdr->pages);
return -ENOMEM;
}
hdr = &whdr->header;
@@ -1139,7 +1148,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
else
set_bit(NFS_IOHDR_REDO, &hdr->flags);
if (atomic_dec_and_test(&hdr->refcnt))
- nfs_write_completion(hdr);
+ hdr->completion_ops->completion(hdr);
return ret;
}

@@ -1149,9 +1158,10 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
};

void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags)
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+ nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
NFS_SERVER(inode)->wsize, ioflags);
}

@@ -1163,10 +1173,11 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);

static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags)
+ struct inode *inode, int ioflags,
+ const struct nfs_pgio_completion_ops *compl_ops)
{
- if (!pnfs_pageio_init_write(pgio, inode, ioflags))
- nfs_pageio_init_write_mds(pgio, inode, ioflags);
+ if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
+ nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
}

void nfs_write_prepare(struct rpc_task *task, void *calldata)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 5c52034..bc5b7a5 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -67,6 +67,7 @@ struct nfs_pageio_descriptor {
int pg_ioflags;
int pg_error;
const struct rpc_call_ops *pg_rpc_callops;
+ const struct nfs_pgio_completion_ops *pg_completion_ops;
struct pnfs_layout_segment *pg_lseg;
};

@@ -83,6 +84,7 @@ extern void nfs_release_request(struct nfs_page *req);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
+ const struct nfs_pgio_completion_ops *compl_ops,
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 694cfa4..2597f90 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1202,6 +1202,7 @@ struct nfs_pgio_header {
loff_t io_start;
const struct rpc_call_ops *mds_ops;
void (*release) (struct nfs_pgio_header *hdr);
+ const struct nfs_pgio_completion_ops *completion_ops;
spinlock_t lock;
/* fields protected by lock */
int pnfs_error;
@@ -1256,6 +1257,11 @@ struct nfs_commit_data {
int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
};

+struct nfs_pgio_completion_ops {
+ void (*error_cleanup)(struct list_head *head);
+ void (*completion)(struct nfs_pgio_header *hdr);
+};
+
struct nfs_unlinkdata {
struct hlist_node list;
struct nfs_removeargs args;
--
1.7.2.1


2012-04-20 18:36:53

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 24/28] NFS: create nfs_commit_completion_ops

Factors out the code that needs to change when directio
starts using these code paths.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/internal.h | 5 ++---
fs/nfs/nfs4filelayout.c | 12 ++++--------
fs/nfs/write.c | 31 +++++++++++++++++++++----------
include/linux/nfs_xdr.h | 9 +++++++++
4 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 53d148b..3d45213 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -344,13 +344,12 @@ extern int nfs_initiate_commit(struct rpc_clnt *clnt,
int how);
extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
- struct pnfs_layout_segment *lseg);
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo);
-void nfs_commit_clear_lock(struct nfs_inode *nfsi);
void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_commit_release_pages(struct nfs_commit_data *data);
void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
struct nfs_commit_info *cinfo);
void nfs_request_remove_commit_list(struct nfs_page *req,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fe2cb55..26d1da4 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -347,12 +347,8 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
static void filelayout_commit_release(void *calldata)
{
struct nfs_commit_data *data = calldata;
- struct nfs_commit_info cinfo;

- nfs_commit_release_pages(data);
- nfs_init_cinfo(&cinfo, data->inode, data->dreq);
- if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
- nfs_commit_clear_lock(NFS_I(data->inode));
+ data->completion_ops->completion(data);
put_lseg(data->lseg);
nfs_commitdata_release(data);
}
@@ -1108,7 +1104,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
nreq += alloc_ds_commits(cinfo, &list);

if (nreq == 0) {
- nfs_commit_clear_lock(NFS_I(inode));
+ cinfo->completion_ops->error_cleanup(NFS_I(inode));
goto out;
}

@@ -1117,14 +1113,14 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
list_for_each_entry_safe(data, tmp, &list, pages) {
list_del_init(&data->pages);
if (!data->lseg) {
- nfs_init_commit(data, mds_pages, NULL);
+ nfs_init_commit(data, mds_pages, NULL, cinfo);
nfs_initiate_commit(NFS_CLIENT(inode), data,
data->mds_ops, how);
} else {
struct pnfs_commit_bucket *buckets;

buckets = cinfo->ds->buckets;
- nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg);
+ nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo);
filelayout_initiate_commit(data, how);
}
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 18bf700..333d01d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static void nfs_redirty_request(struct nfs_page *req);
static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
+static const struct nfs_commit_completion_ops nfs_commit_completion_ops;

static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -505,6 +506,7 @@ static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
cinfo->lock = &inode->i_lock;
cinfo->mds = &NFS_I(inode)->commit_info;
cinfo->ds = pnfs_get_ds_info(inode);
+ cinfo->completion_ops = &nfs_commit_completion_ops;
}

void nfs_init_cinfo(struct nfs_commit_info *cinfo,
@@ -1358,13 +1360,12 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
return (ret < 0) ? ret : 1;
}

-void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
{
clear_bit(NFS_INO_COMMIT, &nfsi->flags);
smp_mb__after_clear_bit();
wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
}
-EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);

void nfs_commitdata_release(struct nfs_commit_data *data)
{
@@ -1413,8 +1414,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
* Set up the argument/result storage required for the RPC call.
*/
void nfs_init_commit(struct nfs_commit_data *data,
- struct list_head *head,
- struct pnfs_layout_segment *lseg)
+ struct list_head *head,
+ struct pnfs_layout_segment *lseg,
+ struct nfs_commit_info *cinfo)
{
struct nfs_page *first = nfs_list_entry(head->next);
struct inode *inode = first->wb_context->dentry->d_inode;
@@ -1428,6 +1430,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
data->cred = first->wb_context->cred;
data->lseg = lseg; /* reference transferred */
data->mds_ops = &nfs_commit_ops;
+ data->completion_ops = cinfo->completion_ops;

data->args.fh = NFS_FH(data->inode);
/* Note: we always request a commit of the entire inode */
@@ -1473,11 +1476,12 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
goto out_bad;

/* Set up the argument struct */
- nfs_init_commit(data, head, NULL);
+ nfs_init_commit(data, head, NULL, cinfo);
+ atomic_inc(&cinfo->mds->rpcs_out);
return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
out_bad:
nfs_retry_commit(head, NULL, cinfo);
- nfs_commit_clear_lock(NFS_I(inode));
+ cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}

@@ -1495,10 +1499,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_done(task, data);
}

-void nfs_commit_release_pages(struct nfs_commit_data *data)
+static void nfs_commit_release_pages(struct nfs_commit_data *data)
{
struct nfs_page *req;
int status = data->task.tk_status;
+ struct nfs_commit_info cinfo;

while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1531,15 +1536,16 @@ void nfs_commit_release_pages(struct nfs_commit_data *data)
next:
nfs_unlock_request(req);
}
+ nfs_init_cinfo(&cinfo, data->inode, data->dreq);
+ if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+ nfs_commit_clear_lock(NFS_I(data->inode));
}
-EXPORT_SYMBOL_GPL(nfs_commit_release_pages);

static void nfs_commit_release(void *calldata)
{
struct nfs_commit_data *data = calldata;

- nfs_commit_release_pages(data);
- nfs_commit_clear_lock(NFS_I(data->inode));
+ data->completion_ops->completion(data);
nfs_commitdata_release(calldata);
}

@@ -1549,6 +1555,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
.rpc_release = nfs_commit_release,
};

+static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
+ .completion = nfs_commit_release_pages,
+ .error_cleanup = nfs_commit_clear_lock,
+};
+
static int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo)
{
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2884ced..756152f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1258,10 +1258,18 @@ struct nfs_mds_commit_info {
struct list_head list;
};

+struct nfs_commit_data;
+struct nfs_inode;
+struct nfs_commit_completion_ops {
+ void (*error_cleanup) (struct nfs_inode *nfsi);
+ void (*completion) (struct nfs_commit_data *data);
+};
+
struct nfs_commit_info {
spinlock_t *lock;
struct nfs_mds_commit_info *mds;
struct pnfs_ds_commit_info *ds;
+ const struct nfs_commit_completion_ops *completion_ops;
};

struct nfs_commit_data {
@@ -1280,6 +1288,7 @@ struct nfs_commit_data {
struct nfs_client *ds_clp; /* pNFS data server */
int ds_commit_index;
const struct rpc_call_ops *mds_ops;
+ const struct nfs_commit_completion_ops *completion_ops;
int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
};

--
1.7.2.1


2012-04-20 18:36:50

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 12/28] NFS: create common nfs_pgio_header for both read and write

In order to avoid duplicating all the data in nfs_read_data whenever we
split it up into multiple RPC calls (either due to a short read result
or due to rsize < PAGE_SIZE), we split out the bits that are the same
per RPC call into a separate "header" structure.

The goal this patch moves towards is to have a single header
refcounted by several rpc_data structures. Thus, want to always refer
from rpc_data to the header, and not the other way. This patch comes
close to that ideal, but the directio code currently needs some
special casing, isolated in the nfs_direct_[read_write]hdr_release()
functions. This will be dealt with in a future patch.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/blocklayout/blocklayout.c | 79 ++++++++++++++++-------------
fs/nfs/direct.c | 73 ++++++++++++++++++--------
fs/nfs/internal.h | 4 ++
fs/nfs/nfs3proc.c | 14 +++--
fs/nfs/nfs4filelayout.c | 40 ++++++++------
fs/nfs/nfs4proc.c | 44 +++++++++-------
fs/nfs/objlayout/objio_osd.c | 16 +++---
fs/nfs/objlayout/objlayout.c | 19 ++++---
fs/nfs/pnfs.c | 102 +++++++++++++++++++++----------------
fs/nfs/proc.c | 10 +++-
fs/nfs/read.c | 89 ++++++++++++++++++--------------
fs/nfs/write.c | 104 +++++++++++++++++++++-----------------
include/linux/nfs_fs.h | 12 ----
include/linux/nfs_xdr.h | 48 ++++++++++--------
14 files changed, 376 insertions(+), 278 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 9c94297..192e16a 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -185,7 +185,6 @@ static void bl_end_io_read(struct bio *bio, int err)
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct nfs_read_data *rdata = (struct nfs_read_data *)par->data;

do {
struct page *page = bvec->bv_page;
@@ -196,9 +195,12 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(page);
} while (bvec >= bio->bi_io_vec);
if (!uptodate) {
- if (!rdata->pnfs_error)
- rdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(rdata->lseg);
+ struct nfs_read_data *rdata = par->data;
+ struct nfs_pgio_header *header = rdata->header;
+
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -219,7 +221,7 @@ bl_end_par_io_read(void *data, int unused)
{
struct nfs_read_data *rdata = data;

- rdata->task.tk_status = rdata->pnfs_error;
+ rdata->task.tk_status = rdata->header->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work);
}
@@ -227,6 +229,7 @@ bl_end_par_io_read(void *data, int unused)
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *header = rdata->header;
int i, hole;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -254,10 +257,10 @@ bl_read_pagelist(struct nfs_read_data *rdata)
bl_put_extent(cow_read);
bio = bl_submit_bio(READ, bio);
/* Get the next one */
- be = bl_find_get_extent(BLK_LSEG2EXT(rdata->lseg),
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
isect, &cow_read);
if (!be) {
- rdata->pnfs_error = -EIO;
+ header->pnfs_error = -EIO;
goto out;
}
extent_length = be->be_length -
@@ -284,7 +287,7 @@ bl_read_pagelist(struct nfs_read_data *rdata)
isect, pages[i], be_read,
bl_end_io_read, par);
if (IS_ERR(bio)) {
- rdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
@@ -292,9 +295,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
}
- if ((isect << SECTOR_SHIFT) >= rdata->inode->i_size) {
+ if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1;
- rdata->res.count = rdata->inode->i_size - f_offset;
+ rdata->res.count = header->inode->i_size - f_offset;
} else {
rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
}
@@ -343,7 +346,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;

do {
struct page *page = bvec->bv_page;
@@ -356,9 +358,12 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
} while (bvec >= bio->bi_io_vec);

if (unlikely(!uptodate)) {
- if (!wdata->pnfs_error)
- wdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(wdata->lseg);
+ struct nfs_write_data *data = par->data;
+ struct nfs_pgio_header *header = data->header;
+
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -368,12 +373,13 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_write_data *wdata = (struct nfs_write_data *)par->data;
+ struct nfs_write_data *data = par->data;
+ struct nfs_pgio_header *header = data->header;

if (!uptodate) {
- if (!wdata->pnfs_error)
- wdata->pnfs_error = -EIO;
- pnfs_set_lo_fail(wdata->lseg);
+ if (!header->pnfs_error)
+ header->pnfs_error = -EIO;
+ pnfs_set_lo_fail(header->lseg);
}
bio_put(bio);
put_parallel(par);
@@ -389,9 +395,9 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
- if (likely(!wdata->pnfs_error)) {
+ if (likely(!wdata->header->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
wdata->args.offset, wdata->args.count);
}
pnfs_ld_write_done(wdata);
@@ -402,12 +408,12 @@ static void bl_end_par_io_write(void *data, int num_se)
{
struct nfs_write_data *wdata = data;

- if (unlikely(wdata->pnfs_error)) {
- bl_free_short_extents(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+ if (unlikely(wdata->header->pnfs_error)) {
+ bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
num_se);
}

- wdata->task.tk_status = wdata->pnfs_error;
+ wdata->task.tk_status = wdata->header->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work);
@@ -538,6 +544,7 @@ check_page:
static enum pnfs_try_status
bl_write_pagelist(struct nfs_write_data *wdata, int sync)
{
+ struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
@@ -550,7 +557,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
pgoff_t index;
u64 temp;
int npg_per_block =
- NFS_SERVER(wdata->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
+ NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;

dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
@@ -564,7 +571,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
/* At this point, have to be more careful with error handling */

isect = (sector_t) ((offset & (long)PAGE_CACHE_MASK) >> SECTOR_SHIFT);
- be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__);
goto out_mds;
@@ -595,10 +602,10 @@ fill_invalid_ext:
dprintk("%s zero %dth page: index %lu isect %llu\n",
__func__, npg_zero, index,
(unsigned long long)isect);
- page = bl_find_get_zeroing_page(wdata->inode, index,
+ page = bl_find_get_zeroing_page(header->inode, index,
cow_read);
if (unlikely(IS_ERR(page))) {
- wdata->pnfs_error = PTR_ERR(page);
+ header->pnfs_error = PTR_ERR(page);
goto out;
} else if (page == NULL)
goto next_page;
@@ -610,7 +617,7 @@ fill_invalid_ext:
__func__, ret);
end_page_writeback(page);
page_cache_release(page);
- wdata->pnfs_error = ret;
+ header->pnfs_error = ret;
goto out;
}
if (likely(!bl_push_one_short_extent(be->be_inval)))
@@ -618,11 +625,11 @@ fill_invalid_ext:
else {
end_page_writeback(page);
page_cache_release(page);
- wdata->pnfs_error = -ENOMEM;
+ header->pnfs_error = -ENOMEM;
goto out;
}
/* FIXME: This should be done in bi_end_io */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ mark_extents_written(BLK_LSEG2EXT(header->lseg),
page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE);

@@ -630,7 +637,7 @@ fill_invalid_ext:
isect, page, be,
bl_end_io_write_zero, par);
if (IS_ERR(bio)) {
- wdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
@@ -651,10 +658,10 @@ next_page:
bl_put_extent(be);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
- be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg),
+ be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
isect, NULL);
if (!be || !is_writable(be, isect)) {
- wdata->pnfs_error = -EINVAL;
+ header->pnfs_error = -EINVAL;
goto out;
}
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
@@ -662,7 +669,7 @@ next_page:
be->be_inval)))
par->bse_count++;
else {
- wdata->pnfs_error = -ENOMEM;
+ header->pnfs_error = -ENOMEM;
goto out;
}
}
@@ -675,7 +682,7 @@ next_page:
if (unlikely(ret)) {
dprintk("%s bl_mark_sectors_init fail %d\n",
__func__, ret);
- wdata->pnfs_error = ret;
+ header->pnfs_error = ret;
goto out;
}
}
@@ -683,7 +690,7 @@ next_page:
isect, pages[i], be,
bl_end_io_write, par);
if (IS_ERR(bio)) {
- wdata->pnfs_error = PTR_ERR(bio);
+ header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index fb7fbaa..56176af 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -242,7 +242,7 @@ static void nfs_direct_read_release(void *calldata)
{

struct nfs_read_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *)data->header->req;
int status = data->task.tk_status;

spin_lock(&dreq->lock);
@@ -269,6 +269,15 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
.rpc_release = nfs_direct_read_release,
};

+static void nfs_direct_readhdr_release(struct nfs_read_header *rhdr)
+{
+ struct nfs_read_data *data = &rhdr->rpc_data;
+
+ if (data->pagevec != data->page_array)
+ kfree(data->pagevec);
+ nfs_readhdr_free(&rhdr->header);
+}
+
/*
* For each rsize'd chunk of the user's buffer, dispatch an NFS READ
* operation. If nfs_readdata_alloc() or get_user_pages() fails,
@@ -301,6 +310,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
ssize_t started = 0;

do {
+ struct nfs_read_header *rhdr;
struct nfs_read_data *data;
size_t bytes;

@@ -308,23 +318,24 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
bytes = min(rsize,count);

result = -ENOMEM;
- data = nfs_readdata_alloc(nfs_page_array_len(pgbase, bytes));
- if (unlikely(!data))
+ rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes));
+ if (unlikely(!rhdr))
break;
+ data = &rhdr->rpc_data;

down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
data->npages, 1, 0, data->pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0) {
- nfs_readdata_free(data);
+ nfs_direct_readhdr_release(rhdr);
break;
}
if ((unsigned)result < data->npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
nfs_direct_release_pages(data->pagevec, result);
- nfs_readdata_free(data);
+ nfs_direct_readhdr_release(rhdr);
break;
}
bytes -= pgbase;
@@ -333,9 +344,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,

get_dreq(dreq);

- data->req = (struct nfs_page *) dreq;
- data->inode = inode;
- data->cred = msg.rpc_cred;
+ rhdr->header.req = (struct nfs_page *) dreq;
+ rhdr->header.inode = inode;
+ rhdr->header.cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.context = get_nfs_open_context(ctx);
data->args.lock_context = dreq->l_ctx;
@@ -447,13 +458,23 @@ out:
return result;
}

+static void nfs_direct_writehdr_release(struct nfs_write_header *whdr)
+{
+ struct nfs_write_data *data = &whdr->rpc_data;
+
+ if (data->pagevec != data->page_array)
+ kfree(data->pagevec);
+ nfs_writehdr_free(&whdr->header);
+}
+
static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
{
while (!list_empty(&dreq->rewrite_list)) {
- struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
- list_del(&data->pages);
- nfs_direct_release_pages(data->pagevec, data->npages);
- nfs_writedata_free(data);
+ struct nfs_pgio_header *hdr = list_entry(dreq->rewrite_list.next, struct nfs_pgio_header, pages);
+ struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+ list_del(&hdr->pages);
+ nfs_direct_release_pages(whdr->rpc_data.pagevec, whdr->rpc_data.npages);
+ nfs_direct_writehdr_release(whdr);
}
}

@@ -463,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
struct inode *inode = dreq->inode;
struct list_head *p;
struct nfs_write_data *data;
+ struct nfs_pgio_header *hdr;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_cred = dreq->ctx->cred,
@@ -479,7 +501,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
get_dreq(dreq);

list_for_each(p, &dreq->rewrite_list) {
- data = list_entry(p, struct nfs_write_data, pages);
+ hdr = list_entry(p, struct nfs_pgio_header, pages);
+ data = &(container_of(hdr, struct nfs_write_header, header))->rpc_data;

get_dreq(dreq);

@@ -652,7 +675,8 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
static void nfs_direct_write_release(void *calldata)
{
struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ struct nfs_pgio_header *hdr = data->header;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) hdr->req;
int status = data->task.tk_status;

spin_lock(&dreq->lock);
@@ -684,7 +708,7 @@ out_unlock:
spin_unlock(&dreq->lock);

if (put_dreq(dreq))
- nfs_direct_write_complete(dreq, data->inode);
+ nfs_direct_write_complete(dreq, hdr->inode);
}

static const struct rpc_call_ops nfs_write_direct_ops = {
@@ -725,6 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
ssize_t started = 0;

do {
+ struct nfs_write_header *whdr;
struct nfs_write_data *data;
size_t bytes;

@@ -732,23 +757,25 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
bytes = min(wsize,count);

result = -ENOMEM;
- data = nfs_writedata_alloc(nfs_page_array_len(pgbase, bytes));
- if (unlikely(!data))
+ whdr = nfs_writehdr_alloc(nfs_page_array_len(pgbase, bytes));
+ if (unlikely(!whdr))
break;

+ data = &whdr->rpc_data;
+
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, user_addr,
data->npages, 0, 0, data->pagevec, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0) {
- nfs_writedata_free(data);
+ nfs_direct_writehdr_release(whdr);
break;
}
if ((unsigned)result < data->npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
nfs_direct_release_pages(data->pagevec, result);
- nfs_writedata_free(data);
+ nfs_direct_writehdr_release(whdr);
break;
}
bytes -= pgbase;
@@ -757,11 +784,11 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,

get_dreq(dreq);

- list_move_tail(&data->pages, &dreq->rewrite_list);
+ list_move_tail(&whdr->header.pages, &dreq->rewrite_list);

- data->req = (struct nfs_page *) dreq;
- data->inode = inode;
- data->cred = msg.rpc_cred;
+ whdr->header.req = (struct nfs_page *) dreq;
+ whdr->header.inode = inode;
+ whdr->header.cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
data->args.lock_context = dreq->l_ctx;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index abdf40c..9b2b8bf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -296,6 +296,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);

struct nfs_pageio_descriptor;
/* read.c */
+extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages);
+extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
extern int nfs_initiate_read(struct rpc_clnt *clnt,
struct nfs_read_data *data,
const struct rpc_call_ops *call_ops);
@@ -309,6 +311,8 @@ extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);

/* write.c */
+extern struct nfs_write_header *nfs_writehdr_alloc(unsigned int npages);
+extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct list_head *head);
extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index b1daca7..56dcefc 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -811,11 +811,13 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,

static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
- if (nfs3_async_handle_jukebox(task, data->inode))
+ struct inode *inode = data->header->inode;
+
+ if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;

- nfs_invalidate_atime(data->inode);
- nfs_refresh_inode(data->inode, &data->fattr);
+ nfs_invalidate_atime(inode);
+ nfs_refresh_inode(inode, &data->fattr);
return 0;
}

@@ -831,10 +833,12 @@ static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da

static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
- if (nfs3_async_handle_jukebox(task, data->inode))
+ struct inode *inode = data->header->inode;
+
+ if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
return 0;
}

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index c536328..ad1d680 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -148,6 +148,7 @@ wait_on_recovery:
static int filelayout_read_done_cb(struct rpc_task *task,
struct nfs_read_data *data)
{
+ struct nfs_pgio_header *hdr = data->header;
int reset = 0;

dprintk("%s DS read\n", __func__);
@@ -157,7 +158,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
- pnfs_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(hdr->lseg);
nfs4_reset_read(task, data);
}
rpc_restart_call_prepare(task);
@@ -175,13 +176,15 @@ static int filelayout_read_done_cb(struct rpc_task *task,
static void
filelayout_set_layoutcommit(struct nfs_write_data *wdata)
{
- if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
+ struct nfs_pgio_header *hdr = wdata->header;
+
+ if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
wdata->res.verf->committed == NFS_FILE_SYNC)
return;

pnfs_set_layoutcommit(wdata);
- dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
- (unsigned long) NFS_I(wdata->inode)->layout->plh_lwb);
+ dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+ (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}

/*
@@ -210,27 +213,28 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);

/* Note this may cause RPC to be resent */
- rdata->mds_ops->rpc_call_done(task, data);
+ rdata->header->mds_ops->rpc_call_done(task, data);
}

static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
struct nfs_read_data *rdata = data;

- rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
}

static void filelayout_read_release(void *data)
{
struct nfs_read_data *rdata = data;

- put_lseg(rdata->lseg);
- rdata->mds_ops->rpc_release(data);
+ put_lseg(rdata->header->lseg);
+ rdata->header->mds_ops->rpc_release(data);
}

static int filelayout_write_done_cb(struct rpc_task *task,
struct nfs_write_data *data)
{
+ struct nfs_pgio_header *hdr = data->header;
int reset = 0;

if (filelayout_async_handle_error(task, data->args.context->state,
@@ -238,7 +242,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
- pnfs_set_lo_fail(data->lseg);
+ pnfs_set_lo_fail(hdr->lseg);
nfs4_reset_write(task, data);
}
rpc_restart_call_prepare(task);
@@ -297,22 +301,22 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
struct nfs_write_data *wdata = data;

/* Note this may cause RPC to be resent */
- wdata->mds_ops->rpc_call_done(task, data);
+ wdata->header->mds_ops->rpc_call_done(task, data);
}

static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
struct nfs_write_data *wdata = data;

- rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics);
+ rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
}

static void filelayout_write_release(void *data)
{
struct nfs_write_data *wdata = data;

- put_lseg(wdata->lseg);
- wdata->mds_ops->rpc_release(data);
+ put_lseg(wdata->header->lseg);
+ wdata->header->mds_ops->rpc_release(data);
}

static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@@ -377,7 +381,8 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_read_data *data)
{
- struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs_pgio_header *hdr = data->header;
+ struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
@@ -385,7 +390,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
int status;

dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
- __func__, data->inode->i_ino,
+ __func__, hdr->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);

if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
@@ -423,7 +428,8 @@ filelayout_read_pagelist(struct nfs_read_data *data)
static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_write_data *data, int sync)
{
- struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs_pgio_header *hdr = data->header;
+ struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
@@ -445,7 +451,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
return PNFS_NOT_ATTEMPTED;
}
dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
- data->inode->i_ino, sync, (size_t) data->args.count, offset,
+ hdr->inode->i_ino, sync, (size_t) data->args.count, offset,
ds->ds_remotestr);

data->write_done_cb = filelayout_write_done_cb;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 54f6268..75d5959 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3281,12 +3281,12 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,

void __nfs4_read_done_cb(struct nfs_read_data *data)
{
- nfs_invalidate_atime(data->inode);
+ nfs_invalidate_atime(data->header->inode);
}

static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
{
- struct nfs_server *server = NFS_SERVER(data->inode);
+ struct nfs_server *server = NFS_SERVER(data->header->inode);

if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -3321,7 +3321,7 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message

static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
{
- if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+ if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
&data->args.seq_args,
&data->res.seq_res,
task))
@@ -3332,22 +3332,25 @@ static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_da
/* Reset the the nfs_read_data to send the read to the MDS. */
void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
{
+ struct nfs_pgio_header *hdr = data->header;
+ struct inode *inode = hdr->inode;
+
dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(data->lseg);
- data->lseg = NULL;
+ put_lseg(hdr->lseg);
+ hdr->lseg = NULL;
+ data->ds_clp = NULL;
/* offsets will differ in the dense stripe case */
data->args.offset = data->mds_offset;
- data->ds_clp = NULL;
- data->args.fh = NFS_FH(data->inode);
+ data->args.fh = NFS_FH(inode);
data->read_done_cb = nfs4_read_done_cb;
- task->tk_ops = data->mds_ops;
- rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+ task->tk_ops = hdr->mds_ops;
+ rpc_task_reset_client(task, NFS_CLIENT(inode));
}
EXPORT_SYMBOL_GPL(nfs4_reset_read);

static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
{
- struct inode *inode = data->inode;
+ struct inode *inode = data->header->inode;

if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task);
@@ -3371,25 +3374,28 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
/* Reset the the nfs_write_data to send the write to the MDS. */
void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
{
+ struct nfs_pgio_header *hdr = data->header;
+ struct inode *inode = hdr->inode;
+
dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(data->lseg);
- data->lseg = NULL;
- data->ds_clp = NULL;
+ put_lseg(hdr->lseg);
+ hdr->lseg = NULL;
+ data->ds_clp = NULL;
data->write_done_cb = nfs4_write_done_cb;
- data->args.fh = NFS_FH(data->inode);
+ data->args.fh = NFS_FH(inode);
data->args.bitmask = data->res.server->cache_consistency_bitmask;
data->args.offset = data->mds_offset;
data->res.fattr = &data->fattr;
- task->tk_ops = data->mds_ops;
- rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+ task->tk_ops = hdr->mds_ops;
+ rpc_task_reset_client(task, NFS_CLIENT(inode));
}
EXPORT_SYMBOL_GPL(nfs4_reset_write);

static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
- struct nfs_server *server = NFS_SERVER(data->inode);
+ struct nfs_server *server = NFS_SERVER(data->header->inode);

- if (data->lseg) {
+ if (data->header->lseg) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
} else
@@ -3405,7 +3411,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag

static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
{
- if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+ if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
&data->args.seq_args,
&data->res.seq_res,
task))
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4bff4a3..fbf4874 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -440,11 +440,12 @@ static void _read_done(struct ore_io_state *ios, void *private)

int objio_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
int ret;

- ret = objio_alloc_io_state(NFS_I(rdata->inode)->layout, true,
- rdata->lseg, rdata->args.pages, rdata->args.pgbase,
+ ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
+ hdr->lseg, rdata->args.pages, rdata->args.pgbase,
rdata->args.offset, rdata->args.count, rdata,
GFP_KERNEL, &objios);
if (unlikely(ret))
@@ -483,12 +484,12 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
struct nfs_write_data *wdata = objios->oir.rpcdata;
+ struct address_space *mapping = wdata->header->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
- struct page *page = find_get_page(wdata->inode->i_mapping, index);
+ struct page *page = find_get_page(mapping, index);

if (!page) {
- page = find_or_create_page(wdata->inode->i_mapping,
- index, GFP_NOFS);
+ page = find_or_create_page(mapping, index, GFP_NOFS);
if (unlikely(!page)) {
dprintk("%s: grab_cache_page Failed index=0x%lx\n",
__func__, index);
@@ -518,11 +519,12 @@ static const struct _ore_r4w_op _r4w_op = {

int objio_write_pagelist(struct nfs_write_data *wdata, int how)
{
+ struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
int ret;

- ret = objio_alloc_io_state(NFS_I(wdata->inode)->layout, false,
- wdata->lseg, wdata->args.pages, wdata->args.pgbase,
+ ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
+ hdr->lseg, wdata->args.pages, wdata->args.pgbase,
wdata->args.offset, wdata->args.count, wdata, GFP_NOFS,
&objios);
if (unlikely(ret))
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 8d45f1c..3f83fc2 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -258,7 +258,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
if (status >= 0)
rdata->res.count = status;
else
- rdata->pnfs_error = status;
+ rdata->header->pnfs_error = status;
objlayout_iodone(oir);
/* must not use oir after this point */

@@ -279,12 +279,14 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
enum pnfs_try_status
objlayout_read_pagelist(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
+ struct inode *inode = hdr->inode;
loff_t offset = rdata->args.offset;
size_t count = rdata->args.count;
int err;
loff_t eof;

- eof = i_size_read(rdata->inode);
+ eof = i_size_read(inode);
if (unlikely(offset + count > eof)) {
if (offset >= eof) {
err = 0;
@@ -297,17 +299,17 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
}

rdata->res.eof = (offset + count) >= eof;
- _fix_verify_io_params(rdata->lseg, &rdata->args.pages,
+ _fix_verify_io_params(hdr->lseg, &rdata->args.pages,
&rdata->args.pgbase,
rdata->args.offset, rdata->args.count);

dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
- __func__, rdata->inode->i_ino, offset, count, rdata->res.eof);
+ __func__, inode->i_ino, offset, count, rdata->res.eof);

err = objio_read_pagelist(rdata);
out:
if (unlikely(err)) {
- rdata->pnfs_error = err;
+ hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
@@ -340,7 +342,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
wdata->res.count = status;
wdata->verf.committed = oir->committed;
} else {
- wdata->pnfs_error = status;
+ wdata->header->pnfs_error = status;
}
objlayout_iodone(oir);
/* must not use oir after this point */
@@ -363,15 +365,16 @@ enum pnfs_try_status
objlayout_write_pagelist(struct nfs_write_data *wdata,
int how)
{
+ struct nfs_pgio_header *hdr = wdata->header;
int err;

- _fix_verify_io_params(wdata->lseg, &wdata->args.pages,
+ _fix_verify_io_params(hdr->lseg, &wdata->args.pages,
&wdata->args.pgbase,
wdata->args.offset, wdata->args.count);

err = objio_write_pagelist(wdata, how);
if (unlikely(err)) {
- wdata->pnfs_error = err;
+ hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err);
return PNFS_NOT_ATTEMPTED;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e4aee9d..b00170a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1191,13 +1191,15 @@ static int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *

static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
{
- dprintk("pnfs write error = %d\n", data->pnfs_error);
- if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+ struct nfs_pgio_header *hdr = data->header;
+
+ dprintk("pnfs write error = %d\n", hdr->pnfs_error);
+ if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
- pnfs_return_layout(data->inode);
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+ pnfs_return_layout(hdr->inode);
}
- data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages);
+ data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, &hdr->pages);
}

/*
@@ -1205,13 +1207,15 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
*/
void pnfs_ld_write_done(struct nfs_write_data *data)
{
- if (likely(!data->pnfs_error)) {
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (!hdr->pnfs_error) {
pnfs_set_layoutcommit(data);
- data->mds_ops->rpc_call_done(&data->task, data);
+ hdr->mds_ops->rpc_call_done(&data->task, data);
} else
pnfs_ld_handle_write_error(data);
- put_lseg(data->lseg);
- data->mds_ops->rpc_release(data);
+ put_lseg(hdr->lseg);
+ hdr->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_write_done);

@@ -1219,12 +1223,14 @@ static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_write_data *data)
{
- list_splice_tail_init(&data->pages, &desc->pg_list);
- if (data->req && list_empty(&data->req->wb_list))
- nfs_list_add_request(data->req, &desc->pg_list);
+ struct nfs_pgio_header *hdr = data->header;
+
+ list_splice_tail_init(&hdr->pages, &desc->pg_list);
+ if (hdr->req && list_empty(&hdr->req->wb_list))
+ nfs_list_add_request(hdr->req, &desc->pg_list);
nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1;
- put_lseg(data->lseg);
+ put_lseg(hdr->lseg);
nfs_writedata_release(data);
}

@@ -1234,20 +1240,21 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
struct pnfs_layout_segment *lseg,
int how)
{
- struct inode *inode = wdata->inode;
+ struct nfs_pgio_header *hdr = wdata->header;
+ struct inode *inode = hdr->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);

- wdata->mds_ops = call_ops;
- wdata->lseg = get_lseg(lseg);
+ hdr->mds_ops = call_ops;
+ hdr->lseg = get_lseg(lseg);

dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);

trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
if (trypnfs == PNFS_NOT_ATTEMPTED) {
- put_lseg(wdata->lseg);
- wdata->lseg = NULL;
+ put_lseg(hdr->lseg);
+ hdr->lseg = NULL;
} else
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);

@@ -1318,13 +1325,15 @@ static int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *h

static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
{
- dprintk("pnfs read error = %d\n", data->pnfs_error);
- if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+ struct nfs_pgio_header *hdr = data->header;
+
+ dprintk("pnfs read error = %d\n", hdr->pnfs_error);
+ if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) {
- clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
- pnfs_return_layout(data->inode);
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
+ pnfs_return_layout(hdr->inode);
}
- data->task.tk_status = pnfs_read_done_resend_to_mds(data->inode, &data->pages);
+ data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages);
}

/*
@@ -1332,13 +1341,15 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
*/
void pnfs_ld_read_done(struct nfs_read_data *data)
{
- if (likely(!data->pnfs_error)) {
+ struct nfs_pgio_header *hdr = data->header;
+
+ if (likely(!hdr->pnfs_error)) {
__nfs4_read_done_cb(data);
- data->mds_ops->rpc_call_done(&data->task, data);
+ hdr->mds_ops->rpc_call_done(&data->task, data);
} else
pnfs_ld_handle_read_error(data);
- put_lseg(data->lseg);
- data->mds_ops->rpc_release(data);
+ put_lseg(hdr->lseg);
+ hdr->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);

@@ -1346,9 +1357,11 @@ static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_read_data *data)
{
- list_splice_tail_init(&data->pages, &desc->pg_list);
- if (data->req && list_empty(&data->req->wb_list))
- nfs_list_add_request(data->req, &desc->pg_list);
+ struct nfs_pgio_header *hdr = data->header;
+
+ list_splice_tail_init(&hdr->pages, &desc->pg_list);
+ if (hdr->req && list_empty(&hdr->req->wb_list))
+ nfs_list_add_request(hdr->req, &desc->pg_list);
nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1;
nfs_readdata_release(data);
@@ -1362,20 +1375,21 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
- struct inode *inode = rdata->inode;
+ struct nfs_pgio_header *hdr = rdata->header;
+ struct inode *inode = hdr->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;

- rdata->mds_ops = call_ops;
- rdata->lseg = get_lseg(lseg);
+ hdr->mds_ops = call_ops;
+ hdr->lseg = get_lseg(lseg);

dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);

trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
if (trypnfs == PNFS_NOT_ATTEMPTED) {
- put_lseg(rdata->lseg);
- rdata->lseg = NULL;
+ put_lseg(hdr->lseg);
+ hdr->lseg = NULL;
} else {
nfs_inc_stats(inode, NFSIOS_PNFS_READ);
}
@@ -1450,30 +1464,32 @@ EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void
pnfs_set_layoutcommit(struct nfs_write_data *wdata)
{
- struct nfs_inode *nfsi = NFS_I(wdata->inode);
+ struct nfs_pgio_header *hdr = wdata->header;
+ struct inode *inode = hdr->inode;
+ struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos = wdata->mds_offset + wdata->res.count;
bool mark_as_dirty = false;

- spin_lock(&nfsi->vfs_inode.i_lock);
+ spin_lock(&inode->i_lock);
if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
mark_as_dirty = true;
dprintk("%s: Set layoutcommit for inode %lu ",
- __func__, wdata->inode->i_ino);
+ __func__, inode->i_ino);
}
- if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) {
+ if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
/* references matched in nfs4_layoutcommit_release */
- get_lseg(wdata->lseg);
+ get_lseg(hdr->lseg);
}
if (end_pos > nfsi->layout->plh_lwb)
nfsi->layout->plh_lwb = end_pos;
- spin_unlock(&nfsi->vfs_inode.i_lock);
+ spin_unlock(&inode->i_lock);
dprintk("%s: lseg %p end_pos %llu\n",
- __func__, wdata->lseg, nfsi->layout->plh_lwb);
+ __func__, hdr->lseg, nfsi->layout->plh_lwb);

/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
* will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
if (mark_as_dirty)
- mark_inode_dirty_sync(wdata->inode);
+ mark_inode_dirty_sync(inode);
}
EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);

diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index bf80503..22ee705 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -641,12 +641,14 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,

static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
+ struct inode *inode = data->header->inode;
+
if (nfs_async_handle_expired_key(task))
return -EAGAIN;

- nfs_invalidate_atime(data->inode);
+ nfs_invalidate_atime(inode);
if (task->tk_status >= 0) {
- nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_refresh_inode(inode, data->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes
*/
@@ -668,11 +670,13 @@ static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat

static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
+ struct inode *inode = data->header->inode;
+
if (nfs_async_handle_expired_key(task))
return -EAGAIN;

if (task->tk_status >= 0)
- nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr);
+ nfs_post_op_update_inode_force_wcc(inode, data->res.fattr);
return 0;
}

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4ddba67..d6d4682 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,19 +35,24 @@ static const struct rpc_call_ops nfs_read_full_ops;

static struct kmem_cache *nfs_rdata_cachep;

-struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
{
- struct nfs_read_data *p;
+ struct nfs_read_header *p;

p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
if (p) {
- INIT_LIST_HEAD(&p->pages);
- p->npages = pagecount;
- if (pagecount <= ARRAY_SIZE(p->page_array))
- p->pagevec = p->page_array;
+ struct nfs_pgio_header *hdr = &p->header;
+ struct nfs_read_data *data = &p->rpc_data;
+
+ INIT_LIST_HEAD(&hdr->pages);
+ INIT_LIST_HEAD(&data->list);
+ data->npages = pagecount;
+ data->header = hdr;
+ if (pagecount <= ARRAY_SIZE(data->page_array))
+ data->pagevec = data->page_array;
else {
- p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
- if (!p->pagevec) {
+ data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
+ if (!data->pagevec) {
kmem_cache_free(nfs_rdata_cachep, p);
p = NULL;
}
@@ -56,17 +61,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
return p;
}

-void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readhdr_free(struct nfs_pgio_header *hdr)
{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- kmem_cache_free(nfs_rdata_cachep, p);
+ struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
+
+ kmem_cache_free(nfs_rdata_cachep, rhdr);
}

void nfs_readdata_release(struct nfs_read_data *rdata)
{
put_nfs_open_context(rdata->args.context);
- nfs_readdata_free(rdata);
+ if (rdata->pagevec != rdata->page_array)
+ kfree(rdata->pagevec);
+ nfs_readhdr_free(rdata->header);
}

static
@@ -173,13 +180,13 @@ int nfs_initiate_read(struct rpc_clnt *clnt,
struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
- struct inode *inode = data->inode;
+ struct inode *inode = data->header->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = data->cred,
+ .rpc_cred = data->header->cred,
};
struct rpc_task_setup task_setup_data = {
.task = &data->task,
@@ -216,11 +223,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
unsigned int count, unsigned int offset)
{
- struct inode *inode = req->wb_context->dentry->d_inode;
+ struct inode *inode = data->header->inode;

- data->req = req;
- data->inode = inode;
- data->cred = req->wb_context->cred;
+ data->header->req = req;
+ data->header->inode = inode;
+ data->header->cred = req->wb_context->cred;

data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -239,7 +246,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
static int nfs_do_read(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
- struct inode *inode = data->args.context->dentry->d_inode;
+ struct inode *inode = data->header->inode;

return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops);
}
@@ -293,6 +300,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
+ struct nfs_read_header *rhdr;
struct nfs_read_data *data;
size_t rsize = desc->pg_bsize, nbytes;
unsigned int offset;
@@ -306,9 +314,10 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
do {
size_t len = min(nbytes,rsize);

- data = nfs_readdata_alloc(1);
- if (!data)
+ rhdr = nfs_readhdr_alloc(1);
+ if (!rhdr)
goto out_bad;
+ data = &rhdr->rpc_data;
data->pagevec[0] = page;
nfs_read_rpcsetup(req, data, len, offset);
list_add(&data->list, res);
@@ -333,26 +342,28 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *
{
struct nfs_page *req;
struct page **pages;
+ struct nfs_read_header *rhdr;
struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
int ret = 0;

- data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
+ rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!rhdr) {
nfs_async_read_error(head);
ret = -ENOMEM;
goto out;
}

+ data = &rhdr->rpc_data;
pages = data->pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_list_add_request(req, &data->pages);
+ nfs_list_add_request(req, &rhdr->header.pages);
*pages++ = req->wb_page;
}
- req = nfs_list_entry(data->pages.next);
+ req = nfs_list_entry(rhdr->header.pages.next);

nfs_read_rpcsetup(req, data, desc->pg_count, 0);
list_add(&data->list, res);
@@ -390,20 +401,21 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
*/
int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
+ struct inode *inode = data->header->inode;
int status;

dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
task->tk_status);

- status = NFS_PROTO(data->inode)->read_done(task, data);
+ status = NFS_PROTO(inode)->read_done(task, data);
if (status != 0)
return status;

- nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+ nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);

if (task->tk_status == -ESTALE) {
- set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
- nfs_mark_for_revalidate(data->inode);
+ set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ nfs_mark_for_revalidate(inode);
}
return 0;
}
@@ -417,7 +429,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
return;

/* This is a short read! */
- nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+ nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count == 0)
return;
@@ -449,7 +461,7 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
static void nfs_readpage_release_partial(void *calldata)
{
struct nfs_read_data *data = calldata;
- struct nfs_page *req = data->req;
+ struct nfs_page *req = data->header->req;
struct page *page = req->wb_page;
int status = data->task.tk_status;

@@ -461,13 +473,13 @@ static void nfs_readpage_release_partial(void *calldata)
SetPageUptodate(page);
nfs_readpage_release(req);
}
- nfs_readdata_release(calldata);
+ nfs_readdata_release(data);
}

void nfs_read_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
- NFS_PROTO(data->inode)->read_rpc_prepare(task, data);
+ NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
}

static const struct rpc_call_ops nfs_read_partial_ops = {
@@ -524,9 +536,10 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
static void nfs_readpage_release_full(void *calldata)
{
struct nfs_read_data *data = calldata;
+ struct nfs_pgio_header *hdr = data->header;

- while (!list_empty(&data->pages)) {
- struct nfs_page *req = nfs_list_entry(data->pages.next);
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);

nfs_list_remove_request(req);
nfs_readpage_release(req);
@@ -685,7 +698,7 @@ out:
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
- sizeof(struct nfs_read_data),
+ sizeof(struct nfs_read_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 76735dd..dbb5c0a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -69,19 +69,24 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);

-struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+struct nfs_write_header *nfs_writehdr_alloc(unsigned int pagecount)
{
- struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
+ struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);

if (p) {
+ struct nfs_pgio_header *hdr = &p->header;
+ struct nfs_write_data *data = &p->rpc_data;
+
memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&p->pages);
- p->npages = pagecount;
- if (pagecount <= ARRAY_SIZE(p->page_array))
- p->pagevec = p->page_array;
+ INIT_LIST_HEAD(&hdr->pages);
+ INIT_LIST_HEAD(&data->list);
+ data->npages = pagecount;
+ data->header = hdr;
+ if (pagecount <= ARRAY_SIZE(data->page_array))
+ data->pagevec = data->page_array;
else {
- p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
- if (!p->pagevec) {
+ data->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
+ if (!data->pagevec) {
mempool_free(p, nfs_wdata_mempool);
p = NULL;
}
@@ -90,17 +95,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
return p;
}

-void nfs_writedata_free(struct nfs_write_data *p)
+void nfs_writehdr_free(struct nfs_pgio_header *hdr)
{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
- mempool_free(p, nfs_wdata_mempool);
+ struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+ mempool_free(whdr, nfs_wdata_mempool);
}

void nfs_writedata_release(struct nfs_write_data *wdata)
{
put_nfs_open_context(wdata->args.context);
- nfs_writedata_free(wdata);
+ if (wdata->pagevec != wdata->page_array)
+ kfree(wdata->pagevec);
+ nfs_writehdr_free(wdata->header);
}

static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@@ -507,9 +513,8 @@ static inline
int nfs_write_need_commit(struct nfs_write_data *data)
{
if (data->verf.committed == NFS_DATA_SYNC)
- return data->lseg == NULL;
- else
- return data->verf.committed != NFS_FILE_SYNC;
+ return data->header->lseg == NULL;
+ return data->verf.committed != NFS_FILE_SYNC;
}

static inline
@@ -517,7 +522,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req,
struct nfs_write_data *data)
{
if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
- nfs_mark_request_commit(req, data->lseg);
+ nfs_mark_request_commit(req, data->header->lseg);
return 1;
}
if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -841,13 +846,13 @@ int nfs_initiate_write(struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops,
int how)
{
- struct inode *inode = data->inode;
+ struct inode *inode = data->header->inode;
int priority = flush_task_priority(how);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = data->cred,
+ .rpc_cred = data->header->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = clnt,
@@ -896,14 +901,15 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
unsigned int count, unsigned int offset,
int how)
{
+ struct nfs_pgio_header *hdr = data->header;
struct inode *inode = req->wb_context->dentry->d_inode;

/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */

- data->req = req;
- data->inode = inode = req->wb_context->dentry->d_inode;
- data->cred = req->wb_context->cred;
+ hdr->req = req;
+ hdr->inode = inode = req->wb_context->dentry->d_inode;
+ hdr->cred = req->wb_context->cred;

data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -935,7 +941,7 @@ static int nfs_do_write(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
int how)
{
- struct inode *inode = data->args.context->dentry->d_inode;
+ struct inode *inode = data->header->inode;

return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how);
}
@@ -981,6 +987,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
{
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
+ struct nfs_write_header *whdr;
struct nfs_write_data *data;
size_t wsize = desc->pg_bsize, nbytes;
unsigned int offset;
@@ -1000,9 +1007,10 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head
do {
size_t len = min(nbytes, wsize);

- data = nfs_writedata_alloc(1);
- if (!data)
+ whdr = nfs_writehdr_alloc(1);
+ if (!whdr)
goto out_bad;
+ data = &whdr->rpc_data;
data->pagevec[0] = page;
nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags);
list_add(&data->list, res);
@@ -1036,13 +1044,14 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
{
struct nfs_page *req;
struct page **pages;
+ struct nfs_write_header *whdr;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
int ret = 0;

- data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
+ whdr = nfs_writehdr_alloc(nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!whdr) {
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
@@ -1051,14 +1060,15 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r
ret = -ENOMEM;
goto out;
}
+ data = &whdr->rpc_data;
pages = data->pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_list_add_request(req, &data->pages);
+ nfs_list_add_request(req, &whdr->header.pages);
*pages++ = req->wb_page;
}
- req = nfs_list_entry(data->pages.next);
+ req = nfs_list_entry(whdr->header.pages.next);

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
@@ -1126,10 +1136,11 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)

dprintk("NFS: %5u write(%s/%lld %d@%lld)",
task->tk_pid,
- data->req->wb_context->dentry->d_inode->i_sb->s_id,
+ data->header->inode->i_sb->s_id,
(long long)
- NFS_FILEID(data->req->wb_context->dentry->d_inode),
- data->req->wb_bytes, (long long)req_offset(data->req));
+ NFS_FILEID(data->header->inode),
+ data->header->req->wb_bytes,
+ (long long)req_offset(data->header->req));

nfs_writeback_done(task, data);
}
@@ -1137,7 +1148,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
static void nfs_writeback_release_partial(void *calldata)
{
struct nfs_write_data *data = calldata;
- struct nfs_page *req = data->req;
+ struct nfs_page *req = data->header->req;
struct page *page = req->wb_page;
int status = data->task.tk_status;

@@ -1169,13 +1180,13 @@ static void nfs_writeback_release_partial(void *calldata)
out:
if (atomic_dec_and_test(&req->wb_complete))
nfs_writepage_release(req, data);
- nfs_writedata_release(calldata);
+ nfs_writedata_release(data);
}

void nfs_write_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
- NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
+ NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
}

void nfs_commit_prepare(struct rpc_task *task, void *calldata)
@@ -1208,11 +1219,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
static void nfs_writeback_release_full(void *calldata)
{
struct nfs_write_data *data = calldata;
+ struct nfs_pgio_header *hdr = data->header;
int status = data->task.tk_status;

/* Update attributes as result of writeback. */
- while (!list_empty(&data->pages)) {
- struct nfs_page *req = nfs_list_entry(data->pages.next);
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;

nfs_list_remove_request(req);
@@ -1233,7 +1245,7 @@ static void nfs_writeback_release_full(void *calldata)

if (nfs_write_need_commit(data)) {
memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
- nfs_mark_request_commit(req, data->lseg);
+ nfs_mark_request_commit(req, hdr->lseg);
dprintk(" marked for commit\n");
goto next;
}
@@ -1244,7 +1256,7 @@ remove_request:
nfs_unlock_request(req);
nfs_end_page_writeback(page);
}
- nfs_writedata_release(calldata);
+ nfs_writedata_release(data);
}

static const struct rpc_call_ops nfs_write_full_ops = {
@@ -1261,6 +1273,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
+ struct inode *inode = data->header->inode;
int status;

dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
@@ -1273,10 +1286,10 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
* another writer had changed the file, but some applications
* depend on tighter cache coherency when writing.
*/
- status = NFS_PROTO(data->inode)->write_done(task, data);
+ status = NFS_PROTO(inode)->write_done(task, data);
if (status != 0)
return;
- nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);

#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
@@ -1294,7 +1307,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
- NFS_SERVER(data->inode)->nfs_client->cl_hostname,
+ NFS_SERVER(inode)->nfs_client->cl_hostname,
resp->verf->committed, argp->stable);
complain = jiffies + 300 * HZ;
}
@@ -1304,7 +1317,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
if (task->tk_status >= 0 && resp->count < argp->count) {
static unsigned long complain;

- nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+ nfs_inc_stats(inode, NFSIOS_SHORTWRITE);

/* Has the server at least made some progress? */
if (resp->count != 0) {
@@ -1333,7 +1346,6 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
- return;
}


@@ -1745,7 +1757,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
- sizeof(struct nfs_write_data),
+ sizeof(struct nfs_write_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d5d68f3..8d3a2b8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -569,12 +569,6 @@ nfs_have_writebacks(struct inode *inode)
}

/*
- * Allocate nfs_write_data structures
- */
-extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
-extern void nfs_writedata_free(struct nfs_write_data *);
-
-/*
* linux/fs/nfs/read.c
*/
extern int nfs_readpage(struct file *, struct page *);
@@ -585,12 +579,6 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
struct page *);

/*
- * Allocate nfs_read_data structures
- */
-extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
-extern void nfs_readdata_free(struct nfs_read_data *);
-
-/*
* linux/fs/nfs3proc.c
*/
#ifdef CONFIG_NFS_V3_ACL
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2aff66d..0e31c44 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1163,52 +1163,58 @@ struct nfs_page;
#define NFS_PAGEVEC_SIZE (8U)

struct nfs_read_data {
+ struct nfs_pgio_header *header;
+ struct list_head list;
struct rpc_task task;
- struct inode *inode;
- struct rpc_cred *cred;
struct nfs_fattr fattr; /* fattr storage */
- struct list_head pages; /* Coalesced read requests */
- struct list_head list; /* lists of struct nfs_read_data */
- struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
struct nfs_readargs args;
struct nfs_readres res;
unsigned long timestamp; /* For lease renewal */
- struct pnfs_layout_segment *lseg;
- struct nfs_client *ds_clp; /* pNFS data server */
- const struct rpc_call_ops *mds_ops;
int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
__u64 mds_offset;
- int pnfs_error;
struct page *page_array[NFS_PAGEVEC_SIZE];
+ struct nfs_client *ds_clp; /* pNFS data server */
+};
+
+struct nfs_pgio_header {
+ struct inode *inode;
+ struct rpc_cred *cred;
+ struct list_head pages;
+ struct nfs_page *req;
+ struct pnfs_layout_segment *lseg;
+ const struct rpc_call_ops *mds_ops;
+ int pnfs_error;
+};
+
+struct nfs_read_header {
+ struct nfs_pgio_header header;
+ struct nfs_read_data rpc_data;
};

struct nfs_direct_req;

struct nfs_write_data {
+ struct nfs_pgio_header *header;
+ struct list_head list;
struct rpc_task task;
- struct inode *inode;
- struct rpc_cred *cred;
struct nfs_fattr fattr;
struct nfs_writeverf verf;
- struct list_head pages; /* Coalesced requests we wish to flush */
- struct list_head list; /* lists of struct nfs_write_data */
- struct nfs_page *req; /* multi ops per nfs_page */
struct page **pagevec;
unsigned int npages; /* Max length of pagevec */
struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */
- struct pnfs_layout_segment *lseg;
- struct nfs_client *ds_clp; /* pNFS data server */
- const struct rpc_call_ops *mds_ops;
- int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
-#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
-#endif
+ int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
__u64 mds_offset; /* Filelayout dense stripe */
- int pnfs_error;
struct page *page_array[NFS_PAGEVEC_SIZE];
+ struct nfs_client *ds_clp; /* pNFS data server */
+};
+
+struct nfs_write_header {
+ struct nfs_pgio_header header;
+ struct nfs_write_data rpc_data;
};

struct nfs_commit_data {
--
1.7.2.1


2012-04-20 18:36:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 02/28] NFS: put open context on error in nfs_pagein_multi

Cc: <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/read.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9a0e8ef..0a4be28 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -322,7 +322,7 @@ out_bad:
while (!list_empty(res)) {
data = list_entry(res->next, struct nfs_read_data, list);
list_del(&data->list);
- nfs_readdata_free(data);
+ nfs_readdata_release(data);
}
nfs_readpage_release(req);
return -ENOMEM;
--
1.7.2.1


2012-04-20 18:36:52

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 18/28] NFS: remove unused wb_complete field from struct nfs_page

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pagelist.c | 1 -
fs/nfs/write.c | 1 -
include/linux/nfs_page.h | 1 -
3 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4cf2a68..5d01a16 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -114,7 +114,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
- atomic_set(&req->wb_complete, 0);
req->wb_index = page->index;
page_cache_get(page);
BUG_ON(PagePrivate(page));
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1503972..705bf01 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1059,7 +1059,6 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
nbytes -= len;
offset += len;
} while (nbytes != 0);
- atomic_set(&req->wb_complete, requests);
desc->pg_rpc_callops = &nfs_write_common_ops;
return ret;

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index bc5b7a5..0a5b63f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -36,7 +36,6 @@ struct nfs_page {
struct page *wb_page; /* page to read in/write out */
struct nfs_open_context *wb_context; /* File state context info */
struct nfs_lock_context *wb_lock_context; /* lock context info */
- atomic_t wb_complete; /* i/os we're waiting for */
pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */
unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
wb_pgbase, /* Start of page data */
--
1.7.2.1


2012-04-20 18:36:47

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 03/28] NFS: put open context on error in nfs_flush_multi

Cc: <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9b8d4d4..c074623 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1019,7 +1019,7 @@ out_bad:
while (!list_empty(res)) {
data = list_entry(res->next, struct nfs_write_data, list);
list_del(&data->list);
- nfs_writedata_free(data);
+ nfs_writedata_release(data);
}
nfs_redirty_request(req);
return -ENOMEM;
--
1.7.2.1


2012-04-20 18:36:47

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 01/28] NFS: check for req==NULL in nfs_try_to_update_request cleanup

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2c68818..9b8d4d4 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -682,7 +682,8 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
req->wb_bytes = rqend - req->wb_offset;
out_unlock:
spin_unlock(&inode->i_lock);
- nfs_clear_request_commit(req);
+ if (req)
+ nfs_clear_request_commit(req);
return req;
out_flushme:
spin_unlock(&inode->i_lock);
--
1.7.2.1


2012-04-20 18:36:50

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 14/28] NFS: merge _full and _partial read rpc_ops

Decouple nfs_pgio_header and nfs_read_data, and have (possibly
multiple) nfs_read_datas each take a refcount on nfs_pgio_header.

For the moment keeps nfs_read_header as a way to preallocate a single
nfs_read_data with the nfs_pgio_header. The code doesn't need this,
and would be prettier without, but given the amount of churn I am
already introducing I didn't want to play with tuning new mempools.

This also fixes bug in pnfs_ld_handle_read_error. In the case of
desc->pg_bsize < PAGE_CACHE_SIZE, the pages list was empty, causing
replay attempt to do nothing.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 10 +-
fs/nfs/internal.h | 15 ++-
fs/nfs/nfs4filelayout.c | 1 -
fs/nfs/nfs4proc.c | 2 -
fs/nfs/pagelist.c | 24 ++++
fs/nfs/pnfs.c | 55 +++++---
fs/nfs/read.c | 338 +++++++++++++++++++++-------------------------
include/linux/nfs_page.h | 1 -
include/linux/nfs_xdr.h | 16 +++
9 files changed, 252 insertions(+), 210 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0faba4c..90b00ce 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -319,10 +319,16 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
bytes = min(rsize,count);

result = -ENOMEM;
- rhdr = nfs_readhdr_alloc(nfs_page_array_len(pgbase, bytes));
+ rhdr = nfs_readhdr_alloc();
if (unlikely(!rhdr))
break;
- data = &rhdr->rpc_data;
+ data = nfs_readdata_alloc(&rhdr->header, nfs_page_array_len(pgbase, bytes));
+ if (!data) {
+ nfs_readhdr_free(&rhdr->header);
+ break;
+ }
+ data->header = &rhdr->header;
+ atomic_inc(&data->header->refcnt);
pages = &data->pages;

down_read(&current->mm->mmap_sem);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0818e66..ae550f5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -199,6 +199,7 @@ struct vfsmount *nfs_do_refmount(struct dentry *dentry)
extern struct svc_version nfs4_callback_version1;
extern struct svc_version nfs4_callback_version4;

+struct nfs_pageio_descriptor;
/* pagelist.c */
extern int __init nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void);
@@ -210,6 +211,10 @@ extern void nfs_destroy_writepagecache(void);
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
+extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr,
+ void (*release)(struct nfs_pgio_header *hdr));
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);

/* nfs2xdr.c */
extern int nfs_stat_to_errno(enum nfs_stat);
@@ -295,17 +300,19 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif

-struct nfs_pageio_descriptor;
/* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(unsigned int npages);
+extern void nfs_async_read_error(struct list_head *head);
+extern struct nfs_read_header *nfs_readhdr_alloc(void);
extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
+extern void nfs_read_completion(struct nfs_pgio_header *hdr);
+extern struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount);
extern int nfs_initiate_read(struct rpc_clnt *clnt,
struct nfs_read_data *data,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct list_head *head);
-
+ struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index ad1d680..333e765 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -227,7 +227,6 @@ static void filelayout_read_release(void *data)
{
struct nfs_read_data *rdata = data;

- put_lseg(rdata->header->lseg);
rdata->header->mds_ops->rpc_release(data);
}

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 75d5959..16a3877 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3336,8 +3336,6 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
struct inode *inode = hdr->inode;

dprintk("%s Reset task for i/o through\n", __func__);
- put_lseg(hdr->lseg);
- hdr->lseg = NULL;
data->ds_clp = NULL;
/* offsets will differ in the dense stripe case */
data->args.offset = data->mds_offset;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d349bd4..cd4c038 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -39,6 +39,30 @@ bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
return p->pagevec != NULL;
}

+void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr,
+ void (*release)(struct nfs_pgio_header *hdr))
+{
+ hdr->req = nfs_list_entry(desc->pg_list.next);
+ hdr->inode = desc->pg_inode;
+ hdr->cred = hdr->req->wb_context->cred;
+ hdr->io_start = req_offset(hdr->req);
+ hdr->good_bytes = desc->pg_count;
+ hdr->release = release;
+}
+
+void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
+{
+ spin_lock(&hdr->lock);
+ if (pos < hdr->io_start + hdr->good_bytes) {
+ set_bit(NFS_IOHDR_ERROR, &hdr->flags);
+ clear_bit(NFS_IOHDR_EOF, &hdr->flags);
+ hdr->good_bytes = pos - hdr->io_start;
+ hdr->error = error;
+ }
+ spin_unlock(&hdr->lock);
+}
+
static inline struct nfs_page *
nfs_page_alloc(void)
{
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b00170a..5947a90 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1333,7 +1333,9 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(hdr->inode)->flags);
pnfs_return_layout(hdr->inode);
}
- data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, &hdr->pages);
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
+ data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode,
+ &hdr->pages);
}

/*
@@ -1348,7 +1350,6 @@ void pnfs_ld_read_done(struct nfs_read_data *data)
hdr->mds_ops->rpc_call_done(&data->task, data);
} else
pnfs_ld_handle_read_error(data);
- put_lseg(hdr->lseg);
hdr->mds_ops->rpc_release(data);
}
EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
@@ -1359,11 +1360,11 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
{
struct nfs_pgio_header *hdr = data->header;

- list_splice_tail_init(&hdr->pages, &desc->pg_list);
- if (hdr->req && list_empty(&hdr->req->wb_list))
- nfs_list_add_request(hdr->req, &desc->pg_list);
- nfs_pageio_reset_read_mds(desc);
- desc->pg_recoalesce = 1;
+ if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
+ list_splice_tail_init(&hdr->pages, &desc->pg_list);
+ nfs_pageio_reset_read_mds(desc);
+ desc->pg_recoalesce = 1;
+ }
nfs_readdata_release(data);
}

@@ -1381,18 +1382,13 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
enum pnfs_try_status trypnfs;

hdr->mds_ops = call_ops;
- hdr->lseg = get_lseg(lseg);

dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);

trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
- if (trypnfs == PNFS_NOT_ATTEMPTED) {
- put_lseg(hdr->lseg);
- hdr->lseg = NULL;
- } else {
+ if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_READ);
- }
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
}
@@ -1408,7 +1404,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
while (!list_empty(head)) {
enum pnfs_try_status trypnfs;

- data = list_entry(head->next, struct nfs_read_data, list);
+ data = list_first_entry(head, struct nfs_read_data, list);
list_del_init(&data->list);

trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1418,20 +1414,41 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
put_lseg(lseg);
}

+static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
+{
+ put_lseg(hdr->lseg);
+ nfs_readhdr_free(hdr);
+}
+
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_read_header *rhdr;
+ struct nfs_pgio_header *hdr;
int ret;

- ret = nfs_generic_pagein(desc, &head);
- if (ret != 0) {
+ rhdr = nfs_readhdr_alloc();
+ if (!rhdr) {
+ nfs_async_read_error(&desc->pg_list);
+ ret = -ENOMEM;
put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
}
- pnfs_do_multiple_reads(desc, &head);
- return 0;
+ hdr = &rhdr->header;
+ nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
+ hdr->lseg = get_lseg(desc->pg_lseg);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_pagein(desc, hdr);
+ if (ret != 0) {
+ put_lseg(desc->pg_lseg);
+ desc->pg_lseg = NULL;
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ } else
+ pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ nfs_read_completion(hdr);
+ return ret;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f6ab30b..c9633b2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,29 +30,49 @@
#define NFSDBG_FACILITY NFSDBG_PAGECACHE

static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_partial_ops;
-static const struct rpc_call_ops nfs_read_full_ops;
+static const struct rpc_call_ops nfs_read_common_ops;

static struct kmem_cache *nfs_rdata_cachep;

-struct nfs_read_header *nfs_readhdr_alloc(unsigned int pagecount)
+struct nfs_read_header *nfs_readhdr_alloc()
{
- struct nfs_read_header *p;
+ struct nfs_read_header *rhdr;

- p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
- if (p) {
- struct nfs_pgio_header *hdr = &p->header;
- struct nfs_read_data *data = &p->rpc_data;
+ rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
+ if (rhdr) {
+ struct nfs_pgio_header *hdr = &rhdr->header;

INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&data->list);
+ INIT_LIST_HEAD(&hdr->rpc_list);
+ spin_lock_init(&hdr->lock);
+ atomic_set(&hdr->refcnt, 0);
+ }
+ return rhdr;
+}
+
+struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
+{
+ struct nfs_read_data *data, *prealloc;
+
+ prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
+ if (prealloc->header == NULL)
+ data = prealloc;
+ else
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+
+ if (nfs_pgarray_set(&data->pages, pagecount)) {
data->header = hdr;
- if (!nfs_pgarray_set(&data->pages, pagecount)) {
- kmem_cache_free(nfs_rdata_cachep, p);
- p = NULL;
- }
+ atomic_inc(&hdr->refcnt);
+ } else {
+ if (data != prealloc)
+ kfree(data);
+ data = NULL;
}
- return p;
+out:
+ return data;
}

void nfs_readhdr_free(struct nfs_pgio_header *hdr)
@@ -64,10 +84,18 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)

void nfs_readdata_release(struct nfs_read_data *rdata)
{
+ struct nfs_pgio_header *hdr = rdata->header;
+ struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
+
put_nfs_open_context(rdata->args.context);
if (rdata->pages.pagevec != rdata->pages.page_array)
kfree(rdata->pages.pagevec);
- nfs_readhdr_free(rdata->header);
+ if (rdata != &read_header->rpc_data)
+ kfree(rdata);
+ else
+ rdata->header = NULL;
+ if (atomic_dec_and_test(&hdr->refcnt))
+ nfs_read_completion(hdr);
}

static
@@ -79,35 +107,6 @@ int nfs_return_empty_page(struct page *page)
return 0;
}

-static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
-{
- unsigned int remainder = data->args.count - data->res.count;
- unsigned int base = data->args.pgbase + data->res.count;
- unsigned int pglen;
- struct page **pages;
-
- if (data->res.eof == 0 || remainder == 0)
- return;
- /*
- * Note: "remainder" can never be negative, since we check for
- * this in the XDR code.
- */
- pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
- base &= ~PAGE_CACHE_MASK;
- pglen = PAGE_CACHE_SIZE - base;
- for (;;) {
- if (remainder <= pglen) {
- zero_user(*pages, base, remainder);
- break;
- }
- zero_user(*pages, base, pglen);
- pages++;
- remainder -= pglen;
- pglen = PAGE_CACHE_SIZE;
- base = 0;
- }
-}
-
void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode)
{
@@ -170,6 +169,46 @@ static void nfs_readpage_release(struct nfs_page *req)
nfs_release_request(req);
}

+/* Note io was page aligned */
+void nfs_read_completion(struct nfs_pgio_header *hdr)
+{
+ unsigned long bytes = 0;
+
+ if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
+ goto out;
+ if (!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+ struct page *page = req->wb_page;
+
+ if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
+ if (bytes > hdr->good_bytes)
+ zero_user(page, 0, PAGE_SIZE);
+ else if (hdr->good_bytes - bytes < PAGE_SIZE)
+ zero_user_segment(page,
+ hdr->good_bytes & ~PAGE_MASK,
+ PAGE_SIZE);
+ }
+ SetPageUptodate(page);
+ nfs_list_remove_request(req);
+ nfs_readpage_release(req);
+ bytes += PAGE_SIZE;
+ }
+ } else {
+ while (!list_empty(&hdr->pages)) {
+ struct nfs_page *req = nfs_list_entry(hdr->pages.next);
+
+ bytes += req->wb_bytes;
+ if (bytes <= hdr->good_bytes)
+ SetPageUptodate(req->wb_page);
+ nfs_list_remove_request(req);
+ nfs_readpage_release(req);
+ }
+ }
+out:
+ hdr->release(hdr);
+}
+
int nfs_initiate_read(struct rpc_clnt *clnt,
struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
@@ -214,16 +253,12 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
/*
* Set up the NFS read request struct
*/
-static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+static void nfs_read_rpcsetup(struct nfs_read_data *data,
unsigned int count, unsigned int offset)
{
- struct inode *inode = data->header->inode;
-
- data->header->req = req;
- data->header->inode = inode;
- data->header->cred = req->wb_context->cred;
+ struct nfs_page *req = data->header->req;

- data->args.fh = NFS_FH(inode);
+ data->args.fh = NFS_FH(data->header->inode);
data->args.offset = req_offset(req) + offset;
data->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pages.pagevec;
@@ -255,7 +290,7 @@ nfs_do_multiple_reads(struct list_head *head,
while (!list_empty(head)) {
int ret2;

- data = list_entry(head->next, struct nfs_read_data, list);
+ data = list_first_entry(head, struct nfs_read_data, list);
list_del_init(&data->list);

ret2 = nfs_do_read(data, call_ops);
@@ -265,7 +300,7 @@ nfs_do_multiple_reads(struct list_head *head,
return ret;
}

-static void
+void
nfs_async_read_error(struct list_head *head)
{
struct nfs_page *req;
@@ -290,11 +325,11 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
- struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
+ struct nfs_page *req = hdr->req;
struct page *page = req->wb_page;
- struct nfs_read_header *rhdr;
struct nfs_read_data *data;
size_t rsize = desc->pg_bsize, nbytes;
unsigned int offset;
@@ -302,85 +337,97 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head
int ret = 0;

nfs_list_remove_request(req);
+ nfs_list_add_request(req, &hdr->pages);

offset = 0;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);

- rhdr = nfs_readhdr_alloc(1);
- if (!rhdr)
+ data = nfs_readdata_alloc(hdr, 1);
+ if (!data)
goto out_bad;
- data = &rhdr->rpc_data;
data->pages.pagevec[0] = page;
- nfs_read_rpcsetup(req, data, len, offset);
- list_add(&data->list, res);
+ nfs_read_rpcsetup(data, len, offset);
+ list_add(&data->list, &hdr->rpc_list);
requests++;
nbytes -= len;
offset += len;
} while(nbytes != 0);
- atomic_set(&req->wb_complete, requests);
- desc->pg_rpc_callops = &nfs_read_partial_ops;
+ desc->pg_rpc_callops = &nfs_read_common_ops;
return ret;
out_bad:
- while (!list_empty(res)) {
- data = list_entry(res->next, struct nfs_read_data, list);
+ while (!list_empty(&hdr->rpc_list)) {
+ data = list_first_entry(&hdr->rpc_list, struct nfs_read_data, list);
list_del(&data->list);
nfs_readdata_release(data);
}
- nfs_readpage_release(req);
+ nfs_async_read_error(&hdr->pages);
return -ENOMEM;
}

-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
struct nfs_page *req;
struct page **pages;
- struct nfs_read_header *rhdr;
- struct nfs_read_data *data;
+ struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
int ret = 0;

- rhdr = nfs_readhdr_alloc(nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!rhdr) {
+ data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!data) {
nfs_async_read_error(head);
ret = -ENOMEM;
goto out;
}

- data = &rhdr->rpc_data;
pages = data->pages.pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
- nfs_list_add_request(req, &rhdr->header.pages);
+ nfs_list_add_request(req, &hdr->pages);
*pages++ = req->wb_page;
}
- req = nfs_list_entry(rhdr->header.pages.next);

- nfs_read_rpcsetup(req, data, desc->pg_count, 0);
- list_add(&data->list, res);
- desc->pg_rpc_callops = &nfs_read_full_ops;
+ nfs_read_rpcsetup(data, desc->pg_count, 0);
+ list_add(&data->list, &hdr->rpc_list);
+ desc->pg_rpc_callops = &nfs_read_common_ops;
out:
return ret;
}

-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
{
if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_pagein_multi(desc, head);
- return nfs_pagein_one(desc, head);
+ return nfs_pagein_multi(desc, hdr);
+ return nfs_pagein_one(desc, hdr);
}

static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- LIST_HEAD(head);
+ struct nfs_read_header *rhdr;
+ struct nfs_pgio_header *hdr;
int ret;

- ret = nfs_generic_pagein(desc, &head);
+ rhdr = nfs_readhdr_alloc();
+ if (!rhdr) {
+ nfs_async_read_error(&desc->pg_list);
+ return -ENOMEM;
+ }
+ hdr = &rhdr->header;
+ nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_pagein(desc, hdr);
if (ret == 0)
- ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+ ret = nfs_do_multiple_reads(&hdr->rpc_list,
+ desc->pg_rpc_callops);
+ else
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ nfs_read_completion(hdr);
return ret;
}

@@ -419,15 +466,13 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;

- if (resp->eof || resp->count == argp->count)
- return;
-
/* This is a short read! */
nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
- if (resp->count == 0)
+ if (resp->count == 0) {
+ nfs_set_pgio_error(data->header, -EIO, argp->offset);
return;
-
+ }
/* Yes, so retry the read at the end of the data */
data->mds_offset += resp->count;
argp->offset += resp->count;
@@ -436,38 +481,34 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
rpc_restart_call_prepare(task);
}

-/*
- * Handle a read reply that fills part of a page.
- */
-static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
-
+ struct nfs_pgio_header *hdr = data->header;
+
+ /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
if (nfs_readpage_result(task, data) != 0)
return;
if (task->tk_status < 0)
- return;
-
- nfs_readpage_truncate_uninitialised_page(data);
- nfs_readpage_retry(task, data);
+ nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
+ else if (data->res.eof) {
+ loff_t bound;
+
+ bound = data->args.offset + data->res.count;
+ spin_lock(&hdr->lock);
+ if (bound < hdr->io_start + hdr->good_bytes) {
+ set_bit(NFS_IOHDR_EOF, &hdr->flags);
+ clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
+ hdr->good_bytes = bound - hdr->io_start;
+ }
+ spin_unlock(&hdr->lock);
+ } else if (data->res.count != data->args.count)
+ nfs_readpage_retry(task, data);
}

-static void nfs_readpage_release_partial(void *calldata)
+static void nfs_readpage_release_common(void *calldata)
{
- struct nfs_read_data *data = calldata;
- struct nfs_page *req = data->header->req;
- struct page *page = req->wb_page;
- int status = data->task.tk_status;
-
- if (status < 0)
- set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags);
-
- if (atomic_dec_and_test(&req->wb_complete)) {
- if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags))
- SetPageUptodate(page);
- nfs_readpage_release(req);
- }
- nfs_readdata_release(data);
+ nfs_readdata_release(calldata);
}

void nfs_read_prepare(struct rpc_task *task, void *calldata)
@@ -476,75 +517,10 @@ void nfs_read_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
}

-static const struct rpc_call_ops nfs_read_partial_ops = {
- .rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_readpage_result_partial,
- .rpc_release = nfs_readpage_release_partial,
-};
-
-static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
-{
- unsigned int count = data->res.count;
- unsigned int base = data->args.pgbase;
- struct page **pages;
-
- if (data->res.eof)
- count = data->args.count;
- if (unlikely(count == 0))
- return;
- pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
- base &= ~PAGE_CACHE_MASK;
- count += base;
- for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
- SetPageUptodate(*pages);
- if (count == 0)
- return;
- /* Was this a short read? */
- if (data->res.eof || data->res.count == data->args.count)
- SetPageUptodate(*pages);
-}
-
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
-{
- struct nfs_read_data *data = calldata;
-
- if (nfs_readpage_result(task, data) != 0)
- return;
- if (task->tk_status < 0)
- return;
- /*
- * Note: nfs_readpage_retry may change the values of
- * data->args. In the multi-page case, we therefore need
- * to ensure that we call nfs_readpage_set_pages_uptodate()
- * first.
- */
- nfs_readpage_truncate_uninitialised_page(data);
- nfs_readpage_set_pages_uptodate(data);
- nfs_readpage_retry(task, data);
-}
-
-static void nfs_readpage_release_full(void *calldata)
-{
- struct nfs_read_data *data = calldata;
- struct nfs_pgio_header *hdr = data->header;
-
- while (!list_empty(&hdr->pages)) {
- struct nfs_page *req = nfs_list_entry(hdr->pages.next);
-
- nfs_list_remove_request(req);
- nfs_readpage_release(req);
- }
- nfs_readdata_release(calldata);
-}
-
-static const struct rpc_call_ops nfs_read_full_ops = {
+static const struct rpc_call_ops nfs_read_common_ops = {
.rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_readpage_result_full,
- .rpc_release = nfs_readpage_release_full,
+ .rpc_call_done = nfs_readpage_result_common,
+ .rpc_release = nfs_readpage_release_common,
};

/*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eac30d6..5c52034 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -27,7 +27,6 @@ enum {
PG_CLEAN,
PG_NEED_COMMIT,
PG_NEED_RESCHED,
- PG_PARTIAL_READ_FAILED,
PG_COMMIT_TO_DS,
};

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 060896f..bf8fc99 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1182,14 +1182,30 @@ struct nfs_read_data {
struct nfs_client *ds_clp; /* pNFS data server */
};

+/* used as flag bits in nfs_pgio_header */
+enum {
+ NFS_IOHDR_ERROR = 0,
+ NFS_IOHDR_EOF,
+ NFS_IOHDR_REDO,
+};
+
struct nfs_pgio_header {
struct inode *inode;
struct rpc_cred *cred;
struct list_head pages;
+ struct list_head rpc_list;
+ atomic_t refcnt;
struct nfs_page *req;
struct pnfs_layout_segment *lseg;
+ loff_t io_start;
const struct rpc_call_ops *mds_ops;
+ void (*release) (struct nfs_pgio_header *hdr);
+ spinlock_t lock;
+ /* fields protected by lock */
int pnfs_error;
+ int error; /* merge with pnfs_error */
+ unsigned long good_bytes; /* boundary of good data */
+ unsigned long flags;
};

struct nfs_read_header {
--
1.7.2.1


2012-04-20 18:36:48

by Fred Isaman

[permalink] [raw]
Subject: [PATCH v2 07/28] NFS: add a struct nfs_commit_data to replace nfs_write_data in commits

Commits don't need the vectors of pages, etc. that writes do. Split out
a separate structure for the commit operation.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/direct.c | 17 +++++-------
fs/nfs/internal.h | 13 +++++----
fs/nfs/nfs3proc.c | 10 ++++++-
fs/nfs/nfs3xdr.c | 6 ++--
fs/nfs/nfs4filelayout.c | 65 +++++++++++++++++++++++++++++++++-------------
fs/nfs/nfs4proc.c | 23 ++++++++++++----
fs/nfs/nfs4xdr.c | 8 +++---
fs/nfs/proc.c | 8 +++++-
fs/nfs/write.c | 50 +++++++++++++++++++++--------------
include/linux/nfs_fs.h | 4 +-
include/linux/nfs_xdr.h | 45 ++++++++++++++++++++++++++++++--
11 files changed, 173 insertions(+), 76 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8a89423..5897dfe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -82,7 +82,7 @@ struct nfs_direct_req {

/* commit state */
struct list_head rewrite_list; /* saved nfs_write_data structs */
- struct nfs_write_data * commit_data; /* special write_data for commits */
+ struct nfs_commit_data *commit_data; /* special write_data for commits */
int flags;
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
@@ -524,7 +524,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)

static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
{
- struct nfs_write_data *data = calldata;
+ struct nfs_commit_data *data = calldata;

/* Call the NFS version-specific code */
NFS_PROTO(data->inode)->commit_done(task, data);
@@ -532,8 +532,8 @@ static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)

static void nfs_direct_commit_release(void *calldata)
{
- struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ struct nfs_commit_data *data = calldata;
+ struct nfs_direct_req *dreq = data->dreq;
int status = data->task.tk_status;

if (status < 0) {
@@ -551,14 +551,14 @@ static void nfs_direct_commit_release(void *calldata)
}

static const struct rpc_call_ops nfs_commit_direct_ops = {
- .rpc_call_prepare = nfs_write_prepare,
+ .rpc_call_prepare = nfs_commit_prepare,
.rpc_call_done = nfs_direct_commit_result,
.rpc_release = nfs_direct_commit_release,
};

static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
- struct nfs_write_data *data = dreq->commit_data;
+ struct nfs_commit_data *data = dreq->commit_data;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
@@ -581,9 +581,6 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
data->args.fh = NFS_FH(data->inode);
data->args.offset = 0;
data->args.count = 0;
- data->args.context = dreq->ctx;
- data->args.lock_context = dreq->l_ctx;
- data->res.count = 0;
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
@@ -625,7 +622,7 @@ static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
{
dreq->commit_data = nfs_commitdata_alloc();
if (dreq->commit_data != NULL)
- dreq->commit_data->req = (struct nfs_page *) dreq;
+ dreq->commit_data->dreq = dreq;
}
#else
static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2476dc6..87e899d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -314,24 +314,25 @@ extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_writedata_release(struct nfs_write_data *wdata);
-extern void nfs_commit_free(struct nfs_write_data *p);
+extern void nfs_commit_free(struct nfs_commit_data *p);
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops,
int how);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_initiate_commit(struct nfs_write_data *data,
- struct rpc_clnt *clnt,
+extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_initiate_commit(struct rpc_clnt *clnt,
+ struct nfs_commit_data *data,
const struct rpc_call_ops *call_ops,
int how);
-extern void nfs_init_commit(struct nfs_write_data *data,
+extern void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg);
void nfs_retry_commit(struct list_head *page_list,
struct pnfs_layout_segment *lseg);
void nfs_commit_clear_lock(struct nfs_inode *nfsi);
-void nfs_commitdata_release(void *data);
-void nfs_commit_release_pages(struct nfs_write_data *data);
+void nfs_commitdata_release(struct nfs_commit_data *data);
+void nfs_commit_release_pages(struct nfs_commit_data *data);
void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head);
void nfs_request_remove_commit_list(struct nfs_page *req);

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 5242eae..b1daca7 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -848,7 +848,12 @@ static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
rpc_call_start(task);
}

-static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+ rpc_call_start(task);
+}
+
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
{
if (nfs3_async_handle_jukebox(task, data->inode))
return -EAGAIN;
@@ -856,7 +861,7 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}

-static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
}
@@ -907,6 +912,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
+ .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
.commit_done = nfs3_commit_done,
.lock = nfs3_proc_lock,
.clear_acl_cache = nfs3_forget_cached_acls,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a77cc9a..01e53e9 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1287,7 +1287,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
* };
*/
static void encode_commit3args(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_commitargs *args)
{
__be32 *p;

@@ -1300,7 +1300,7 @@ static void encode_commit3args(struct xdr_stream *xdr,

static void nfs3_xdr_enc_commit3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_commitargs *args)
{
encode_commit3args(xdr, args);
}
@@ -2319,7 +2319,7 @@ out_status:
*/
static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_commitres *result)
{
enum nfs_stat status;
int error;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 15aeba2..675ce3b 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -250,7 +250,7 @@ static int filelayout_write_done_cb(struct rpc_task *task,
}

/* Fake up some data that will cause nfs_commit_release to retry the writes. */
-static void prepare_to_resend_writes(struct nfs_write_data *data)
+static void prepare_to_resend_writes(struct nfs_commit_data *data)
{
struct nfs_page *first = nfs_list_entry(data->pages.next);

@@ -261,11 +261,11 @@ static void prepare_to_resend_writes(struct nfs_write_data *data)
}

static int filelayout_commit_done_cb(struct rpc_task *task,
- struct nfs_write_data *data)
+ struct nfs_commit_data *data)
{
int reset = 0;

- if (filelayout_async_handle_error(task, data->args.context->state,
+ if (filelayout_async_handle_error(task, data->context->state,
data->ds_clp, &reset) == -EAGAIN) {
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
@@ -315,15 +315,42 @@ static void filelayout_write_release(void *data)
wdata->mds_ops->rpc_release(data);
}

-static void filelayout_commit_release(void *data)
+static void filelayout_commit_prepare(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ struct nfs_commit_data *wdata = data;

- nfs_commit_release_pages(wdata);
- if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
- nfs_commit_clear_lock(NFS_I(wdata->inode));
- put_lseg(wdata->lseg);
- nfs_commitdata_release(wdata);
+ if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+ &wdata->args.seq_args, &wdata->res.seq_res,
+ task))
+ return;
+
+ rpc_call_start(task);
+}
+
+static void filelayout_write_commit_done(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *wdata = data;
+
+ /* Note this may cause RPC to be resent */
+ wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_commit_count_stats(struct rpc_task *task, void *data)
+{
+ struct nfs_commit_data *cdata = data;
+
+ rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics);
+}
+
+static void filelayout_commit_release(void *calldata)
+{
+ struct nfs_commit_data *data = calldata;
+
+ nfs_commit_release_pages(data);
+ if (atomic_dec_and_test(&NFS_I(data->inode)->commits_outstanding))
+ nfs_commit_clear_lock(NFS_I(data->inode));
+ put_lseg(data->lseg);
+ nfs_commitdata_release(data);
}

static const struct rpc_call_ops filelayout_read_call_ops = {
@@ -341,9 +368,9 @@ static const struct rpc_call_ops filelayout_write_call_ops = {
};

static const struct rpc_call_ops filelayout_commit_call_ops = {
- .rpc_call_prepare = filelayout_write_prepare,
- .rpc_call_done = filelayout_write_call_done,
- .rpc_count_stats = filelayout_write_count_stats,
+ .rpc_call_prepare = filelayout_commit_prepare,
+ .rpc_call_done = filelayout_write_commit_done,
+ .rpc_count_stats = filelayout_commit_count_stats,
.rpc_release = filelayout_commit_release,
};

@@ -922,7 +949,7 @@ select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
return flseg->fh_array[i];
}

-static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
+static int filelayout_initiate_commit(struct nfs_commit_data *data, int how)
{
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
@@ -941,12 +968,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
return -EAGAIN;
}
dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
- data->write_done_cb = filelayout_commit_done_cb;
+ data->commit_done_cb = filelayout_commit_done_cb;
data->ds_clp = ds->ds_clp;
fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
if (fh)
data->args.fh = fh;
- return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
+ return nfs_initiate_commit(ds->ds_clp->cl_rpcclient, data,
&filelayout_commit_call_ops, how);
}

@@ -1008,7 +1035,7 @@ alloc_ds_commits(struct inode *inode, struct list_head *list)
{
struct nfs4_fl_commit_info *fl_cinfo;
struct nfs4_fl_commit_bucket *bucket;
- struct nfs_write_data *data;
+ struct nfs_commit_data *data;
int i, j;
unsigned int nreq = 0;

@@ -1044,7 +1071,7 @@ static int
filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
int how)
{
- struct nfs_write_data *data, *tmp;
+ struct nfs_commit_data *data, *tmp;
LIST_HEAD(list);
unsigned int nreq = 0;

@@ -1071,7 +1098,7 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
list_del_init(&data->pages);
if (!data->lseg) {
nfs_init_commit(data, mds_pages, NULL);
- nfs_initiate_commit(data, NFS_CLIENT(inode),
+ nfs_initiate_commit(NFS_CLIENT(inode), data,
data->mds_ops, how);
} else {
struct nfs4_fl_commit_info *fl_cinfo;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f82bde0..54f6268 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3413,7 +3413,17 @@ static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_
rpc_call_start(task);
}

-static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+ if (nfs4_setup_sequence(NFS_SERVER(data->inode),
+ &data->args.seq_args,
+ &data->res.seq_res,
+ task))
+ return;
+ rpc_call_start(task);
+}
+
+static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data)
{
struct inode *inode = data->inode;

@@ -3425,14 +3435,14 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *dat
return 0;
}

-static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_commit_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
- return data->write_done_cb(task, data);
+ return data->commit_done_cb(task, data);
}

-static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->inode);

@@ -3441,8 +3451,8 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
data->res.fattr = NULL;
} else
data->args.bitmask = server->cache_consistency_bitmask;
- if (!data->write_done_cb)
- data->write_done_cb = nfs4_commit_done_cb;
+ if (data->commit_done_cb == NULL)
+ data->commit_done_cb = nfs4_commit_done_cb;
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
@@ -6498,6 +6508,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
.write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
+ .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
.commit_done = nfs4_commit_done,
.lock = nfs4_proc_lock,
.clear_acl_cache = nfs4_zap_acl_attr,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c74fdb1..c7ed9cb 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1103,7 +1103,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
encode_nfs4_stateid(xdr, arg->stateid);
}

-static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr)
{
__be32 *p;

@@ -2447,7 +2447,7 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
* a COMMIT request
*/
static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeargs *args)
+ struct nfs_commitargs *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -4101,7 +4101,7 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
}

-static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
{
int status;

@@ -6336,7 +6336,7 @@ out:
* Decode COMMIT response
*/
static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_writeres *res)
+ struct nfs_commitres *res)
{
struct compound_hdr hdr;
int status;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b63b6f4..bf80503 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -688,8 +688,13 @@ static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_d
rpc_call_start(task);
}

+static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
+{
+ BUG();
+}
+
static void
-nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
+nfs_proc_commit_setup(struct nfs_commit_data *data, struct rpc_message *msg)
{
BUG();
}
@@ -764,6 +769,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.write_rpc_prepare = nfs_proc_write_rpc_prepare,
.write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
+ .commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
.lock = nfs_proc_lock,
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c074623..54f7c0f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -48,11 +48,12 @@ static const struct rpc_call_ops nfs_commit_ops;

static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
+static struct kmem_cache *nfs_cdata_cachep;
static mempool_t *nfs_commit_mempool;

-struct nfs_write_data *nfs_commitdata_alloc(void)
+struct nfs_commit_data *nfs_commitdata_alloc(void)
{
- struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
+ struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);

if (p) {
memset(p, 0, sizeof(*p));
@@ -62,10 +63,8 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
}
EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);

-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_free(struct nfs_commit_data *p)
{
- if (p && (p->pagevec != &p->page_array[0]))
- kfree(p->pagevec);
mempool_free(p, nfs_commit_mempool);
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
@@ -1179,6 +1178,13 @@ void nfs_write_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->write_rpc_prepare(task, data);
}

+void nfs_commit_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs_commit_data *data = calldata;
+
+ NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
+}
+
static const struct rpc_call_ops nfs_write_partial_ops = {
.rpc_call_prepare = nfs_write_prepare,
.rpc_call_done = nfs_writeback_done_partial,
@@ -1355,16 +1361,14 @@ void nfs_commit_clear_lock(struct nfs_inode *nfsi)
}
EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);

-void nfs_commitdata_release(void *data)
+void nfs_commitdata_release(struct nfs_commit_data *data)
{
- struct nfs_write_data *wdata = data;
-
- put_nfs_open_context(wdata->args.context);
- nfs_commit_free(wdata);
+ put_nfs_open_context(data->context);
+ nfs_commit_free(data);
}
EXPORT_SYMBOL_GPL(nfs_commitdata_release);

-int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
+int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
const struct rpc_call_ops *call_ops,
int how)
{
@@ -1403,7 +1407,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_commit);
/*
* Set up the argument/result storage required for the RPC call.
*/
-void nfs_init_commit(struct nfs_write_data *data,
+void nfs_init_commit(struct nfs_commit_data *data,
struct list_head *head,
struct pnfs_layout_segment *lseg)
{
@@ -1424,8 +1428,7 @@ void nfs_init_commit(struct nfs_write_data *data,
/* Note: we always request a commit of the entire inode */
data->args.offset = 0;
data->args.count = 0;
- data->args.context = get_nfs_open_context(first->wb_context);
- data->res.count = 0;
+ data->context = get_nfs_open_context(first->wb_context);
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
@@ -1455,7 +1458,7 @@ EXPORT_SYMBOL_GPL(nfs_retry_commit);
static int
nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
- struct nfs_write_data *data;
+ struct nfs_commit_data *data;

data = nfs_commitdata_alloc();

@@ -1464,7 +1467,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)

/* Set up the argument struct */
nfs_init_commit(data, head, NULL);
- return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
+ return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, how);
out_bad:
nfs_retry_commit(head, NULL);
nfs_commit_clear_lock(NFS_I(inode));
@@ -1476,7 +1479,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
*/
static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
- struct nfs_write_data *data = calldata;
+ struct nfs_commit_data *data = calldata;

dprintk("NFS: %5u nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
@@ -1485,7 +1488,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_done(task, data);
}

-void nfs_commit_release_pages(struct nfs_write_data *data)
+void nfs_commit_release_pages(struct nfs_commit_data *data)
{
struct nfs_page *req;
int status = data->task.tk_status;
@@ -1526,7 +1529,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_release_pages);

static void nfs_commit_release(void *calldata)
{
- struct nfs_write_data *data = calldata;
+ struct nfs_commit_data *data = calldata;

nfs_commit_release_pages(data);
nfs_commit_clear_lock(NFS_I(data->inode));
@@ -1534,7 +1537,7 @@ static void nfs_commit_release(void *calldata)
}

static const struct rpc_call_ops nfs_commit_ops = {
- .rpc_call_prepare = nfs_write_prepare,
+ .rpc_call_prepare = nfs_commit_prepare,
.rpc_call_done = nfs_commit_done,
.rpc_release = nfs_commit_release,
};
@@ -1753,6 +1756,13 @@ int __init nfs_init_writepagecache(void)
if (nfs_wdata_mempool == NULL)
return -ENOMEM;

+ nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
+ sizeof(struct nfs_commit_data),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (nfs_cdata_cachep == NULL)
+ return -ENOMEM;
+
nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
nfs_wdata_cachep);
if (nfs_commit_mempool == NULL)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 52a1bdb..d5d68f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -552,8 +552,8 @@ extern int nfs_wb_page(struct inode *inode, struct page* page);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
extern int nfs_commit_inode(struct inode *, int);
-extern struct nfs_write_data *nfs_commitdata_alloc(void);
-extern void nfs_commit_free(struct nfs_write_data *wdata);
+extern struct nfs_commit_data *nfs_commitdata_alloc(void);
+extern void nfs_commit_free(struct nfs_commit_data *data);
#else
static inline int
nfs_commit_inode(struct inode *inode, int how)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bfd0d1b..2aff66d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -514,6 +514,24 @@ struct nfs_writeres {
};

/*
+ * Arguments to the commit call.
+ */
+struct nfs_commitargs {
+ struct nfs_fh *fh;
+ __u64 offset;
+ __u32 count;
+ const u32 *bitmask;
+ struct nfs4_sequence_args seq_args;
+};
+
+struct nfs_commitres {
+ struct nfs_fattr *fattr;
+ struct nfs_writeverf *verf;
+ const struct nfs_server *server;
+ struct nfs4_sequence_res seq_res;
+};
+
+/*
* Common arguments to the unlink call
*/
struct nfs_removeargs {
@@ -1166,6 +1184,8 @@ struct nfs_read_data {
struct page *page_array[NFS_PAGEVEC_SIZE];
};

+struct nfs_direct_req;
+
struct nfs_write_data {
struct rpc_task task;
struct inode *inode;
@@ -1181,7 +1201,6 @@ struct nfs_write_data {
struct nfs_writeres res; /* result struct */
struct pnfs_layout_segment *lseg;
struct nfs_client *ds_clp; /* pNFS data server */
- int ds_commit_index;
const struct rpc_call_ops *mds_ops;
int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
#ifdef CONFIG_NFS_V4
@@ -1192,6 +1211,25 @@ struct nfs_write_data {
struct page *page_array[NFS_PAGEVEC_SIZE];
};

+struct nfs_commit_data {
+ struct rpc_task task;
+ struct inode *inode;
+ struct rpc_cred *cred;
+ struct nfs_fattr fattr;
+ struct nfs_writeverf verf;
+ struct list_head pages; /* Coalesced requests we wish to flush */
+ struct list_head list; /* lists of struct nfs_write_data */
+ struct nfs_direct_req *dreq; /* O_DIRECT request */
+ struct nfs_commitargs args; /* argument struct */
+ struct nfs_commitres res; /* result struct */
+ struct nfs_open_context *context;
+ struct pnfs_layout_segment *lseg;
+ struct nfs_client *ds_clp; /* pNFS data server */
+ int ds_commit_index;
+ const struct rpc_call_ops *mds_ops;
+ int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
+};
+
struct nfs_unlinkdata {
struct hlist_node list;
struct nfs_removeargs args;
@@ -1272,8 +1310,9 @@ struct nfs_rpc_ops {
void (*write_setup) (struct nfs_write_data *, struct rpc_message *);
void (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
int (*write_done) (struct rpc_task *, struct nfs_write_data *);
- void (*commit_setup) (struct nfs_write_data *, struct rpc_message *);
- int (*commit_done) (struct rpc_task *, struct nfs_write_data *);
+ void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
+ void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
+ int (*commit_done) (struct rpc_task *, struct nfs_commit_data *);
int (*lock)(struct file *, int, struct file_lock *);
int (*lock_check_bounds)(const struct file_lock *);
void (*clear_acl_cache)(struct inode *);
--
1.7.2.1