2011-03-03 15:14:45

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 0/9] NFSv4.1: pnfs wave4 submission v2

These apply on top of Andy's most recent wave3 patches.

They can also be found at the branch wave4-submit-2 at
git://linux-nfs.org/~iisaman/linux-pnfs.git

These include bugfixes and cleanups from the recent connectathon.
The final two patches should probably be at the start of the wave 3
submission, but were not part of the connectathon testing, so I've
put them last.

These patches give write functionality to the pnfs file layout driver.
The primary limitation is that we have not yet modified the commit
code, so we can not yet handle COMMITs to the data server. We get
around this for the moment by sending any WRITE to a data server which
does not have commit-to-mds set as a SYNCH write. The next patchset will
address this limitation.

Fred



2011-03-03 15:14:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 7/9] NFSv4.1: pnfs filelayout driver write

Allows the pnfs filelayout driver to write to the data servers.

Note that COMMIT to data servers will be implemented in a future
patch. To avoid improper behavior, for the moment any WRITE to a data
server that would also require a COMMIT to the data server is sent
NFS_FILE_SYNC.

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Dean Hildebrand <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
Signed-off-by: Mingyang Guo <[email protected]>
Signed-off-by: Oleg Drokin <[email protected]>
Signed-off-by: Ricardo Labiaga <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/internal.h | 5 ++
fs/nfs/nfs4filelayout.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfs/nfs4proc.c | 17 ++++++++
fs/nfs/write.c | 5 ++-
include/linux/nfs_xdr.h | 2 +
5 files changed, 128 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1a3228e..d1ddc23 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -276,6 +276,10 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);

/* write.c */
+extern int nfs_initiate_write(struct nfs_write_data *data,
+ struct rpc_clnt *clnt,
+ const struct rpc_call_ops *call_ops,
+ int how);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
@@ -291,6 +295,7 @@ extern int nfs4_init_client(struct nfs_client *clp,
const char *ip_addr,
rpc_authflavor_t authflavour,
int noresvport);
+extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
extern int _nfs4_call_sync(struct nfs_server *server,
struct rpc_message *msg,
struct nfs4_sequence_args *args,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 35f58eb..8237982 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -189,12 +189,69 @@ static void filelayout_read_release(void *data)
rdata->mds_ops->rpc_release(data);
}

+static int filelayout_write_done_cb(struct rpc_task *task,
+ struct nfs_write_data *data)
+{
+ int reset = 0;
+
+ if (filelayout_async_handle_error(task, data->args.context->state,
+ data->ds_clp, &reset) == -EAGAIN) {
+ struct nfs_client *clp;
+
+ dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
+ __func__, data->ds_clp, data->ds_clp->cl_session);
+ if (reset) {
+ filelayout_set_lo_fail(data->lseg);
+ nfs4_reset_write(task, data);
+ clp = NFS_SERVER(data->inode)->nfs_client;
+ } else
+ clp = data->ds_clp;
+ nfs_restart_rpc(task, clp);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void filelayout_write_prepare(struct rpc_task *task, void *data)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+ if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
+ &wdata->args.seq_args, &wdata->res.seq_res,
+ 0, task))
+ return;
+
+ rpc_call_start(task);
+}
+
+static void filelayout_write_call_done(struct rpc_task *task, void *data)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+ /* Note this may cause RPC to be resent */
+ wdata->mds_ops->rpc_call_done(task, data);
+}
+
+static void filelayout_write_release(void *data)
+{
+ struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+
+ wdata->mds_ops->rpc_release(data);
+}
+
struct rpc_call_ops filelayout_read_call_ops = {
.rpc_call_prepare = filelayout_read_prepare,
.rpc_call_done = filelayout_read_call_done,
.rpc_release = filelayout_read_release,
};

+struct rpc_call_ops filelayout_write_call_ops = {
+ .rpc_call_prepare = filelayout_write_prepare,
+ .rpc_call_done = filelayout_write_call_done,
+ .rpc_release = filelayout_write_release,
+};
+
static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_read_data *data)
{
@@ -238,10 +295,52 @@ filelayout_read_pagelist(struct nfs_read_data *data)
return PNFS_ATTEMPTED;
}

+/* Perform async writes. */
static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_write_data *data, int sync)
{
- return PNFS_NOT_ATTEMPTED;
+ struct pnfs_layout_segment *lseg = data->lseg;
+ struct nfs4_pnfs_ds *ds;
+ loff_t offset = data->args.offset;
+ u32 j, idx;
+ struct nfs_fh *fh;
+ int status;
+
+ /* Retrieve the correct rpc_client for the byte range */
+ j = nfs4_fl_calc_j_index(lseg, offset);
+ idx = nfs4_fl_calc_ds_index(lseg, j);
+ ds = nfs4_fl_prepare_ds(lseg, idx);
+ if (!ds) {
+ printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
+ set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
+ set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
+ return PNFS_NOT_ATTEMPTED;
+ }
+ dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+ data->inode->i_ino, sync, (size_t) data->args.count, offset,
+ ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+
+ /* We can't handle commit to ds yet */
+ if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
+ data->args.stable = NFS_FILE_SYNC;
+
+ data->write_done_cb = filelayout_write_done_cb;
+ data->ds_clp = ds->ds_clp;
+ fh = nfs4_fl_select_ds_fh(lseg, j);
+ if (fh)
+ data->args.fh = fh;
+ /*
+ * Get the file offset on the dserver. Set the write offset to
+ * this offset and save the original offset.
+ */
+ data->args.offset = filelayout_get_dserver_offset(lseg, offset);
+ data->mds_offset = offset;
+
+ /* Perform an asynchronous write */
+ status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
+ &filelayout_write_call_ops, sync);
+ BUG_ON(status != 0);
+ return PNFS_ATTEMPTED;
}

/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e9f8a41..7d80e78 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3146,6 +3146,23 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
return data->write_done_cb(task, data);
}

+/* Reset the the nfs_write_data to send the write to the MDS. */
+void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
+{
+ dprintk("%s Reset task for i/o through\n", __func__);
+ put_lseg(data->lseg);
+ data->lseg = NULL;
+ data->ds_clp = NULL;
+ data->write_done_cb = nfs4_write_done_cb;
+ data->args.fh = NFS_FH(data->inode);
+ data->args.bitmask = data->res.server->cache_consistency_bitmask;
+ data->args.offset = data->mds_offset;
+ data->res.fattr = &data->fattr;
+ task->tk_ops = data->mds_ops;
+ rpc_task_reset_client(task, NFS_CLIENT(data->inode));
+}
+EXPORT_SYMBOL_GPL(nfs4_reset_write);
+
static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->inode);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1852a54..06a1f3f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -783,7 +783,7 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}

-static int nfs_initiate_write(struct nfs_write_data *data,
+int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops,
int how)
@@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data,
out:
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_initiate_write);

/*
* Set up the argument/result storage required for the RPC call.
@@ -1194,6 +1195,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
static unsigned long complain;

+ /* Note this will print the MDS for a DS write */
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
@@ -1214,6 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
/* Was this an NFSv2 write or an NFSv3 stable write? */
if (resp->verf->committed != NFS_UNSTABLE) {
/* Resend from where the server left off */
+ data->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index c82ad33..3440f5a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1039,11 +1039,13 @@ struct nfs_write_data {
struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */
struct pnfs_layout_segment *lseg;
+ struct nfs_client *ds_clp; /* pNFS data server */
const struct rpc_call_ops *mds_ops;
int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
+ __u64 mds_offset; /* Filelayout dense stripe */
struct page *page_array[NFS_PAGEVEC_SIZE];
};

--
1.7.2.1


2011-03-03 15:14:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 4/9] NFSv4.1: trigger LAYOUTGET for writes

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pnfs.c | 22 ++++++++++++++++++++++
fs/nfs/pnfs.h | 7 +++++++
fs/nfs/write.c | 32 ++++++++++++++++++++------------
3 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3e54514..5f205d3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -873,6 +873,28 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
}

+static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_count == prev->wb_bytes) {
+ /* This is first coelesce call for a series of nfs_pages */
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ prev->wb_context,
+ IOMODE_RW);
+ }
+ return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
+}
+
+void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
+{
+ struct pnfs_layoutdriver_type *ld;
+
+ ld = NFS_SERVER(inode)->pnfs_curr_ld;
+ pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
+}
+
/*
* Call the appropriate parallel I/O subsystem read function.
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index acbb778..1d4e631 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -123,6 +123,7 @@ void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
const struct rpc_call_ops *);
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
@@ -235,6 +236,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
pgio->pg_test = NULL;
}

+static inline void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
+{
+ pgio->pg_test = NULL;
+}
+
#endif /* CONFIG_NFS_V4_1 */

#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0df18ae..c618d41 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -919,6 +919,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);

+ BUG_ON(lseg);
+ lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
ClearPageError(page);
offset = 0;
nbytes = count;
@@ -940,6 +942,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
nbytes -= wsize;
} while (nbytes != 0);

+ put_lseg(lseg);
return ret;

out_bad:
@@ -965,11 +968,18 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
+ int ret;

data = nfs_writedata_alloc(npages);
- if (!data)
- goto out_bad;
-
+ if (!data) {
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_redirty_request(req);
+ }
+ ret = -ENOMEM;
+ goto out;
+ }
pages = data->pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
@@ -979,16 +989,14 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
+ if ((!lseg) && list_is_singular(&data->pages))
+ lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);

/* Set up the argument struct */
- return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
- out_bad:
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_redirty_request(req);
- }
- return -ENOMEM;
+ ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
+out:
+ put_lseg(lseg); /* Cleans any gotten in ->pg_test */
+ return ret;
}

static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -996,7 +1004,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
{
size_t wsize = NFS_SERVER(inode)->wsize;

- pgio->pg_test = NULL;
+ pnfs_pageio_init_write(pgio, inode);

if (wsize < PAGE_CACHE_SIZE)
nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
--
1.7.2.1


2011-03-03 15:14:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 5/9] NFSv4.1: implement generic pnfs layer write switch

From: Andy Adamson <[email protected]>

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Boaz Harrosh <[email protected]>
Signed-off-by: Dean Hildebrand <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
Signed-off-by: J. Bruce Fields <[email protected]>
Signed-off-by: Mike Sager <[email protected]>
Signed-off-by: Ricardo Labiaga <[email protected]>
Signed-off-by: Tao Guo <[email protected]>
Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Benny Halevy <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4filelayout.c | 7 +++++++
fs/nfs/pnfs.c | 24 ++++++++++++++++++++++++
fs/nfs/pnfs.h | 10 ++++++++++
fs/nfs/write.c | 4 ++++
include/linux/nfs_iostat.h | 1 +
include/linux/nfs_xdr.h | 1 +
6 files changed, 47 insertions(+), 0 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index fea483a..35f58eb 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -238,6 +238,12 @@ filelayout_read_pagelist(struct nfs_read_data *data)
return PNFS_ATTEMPTED;
}

+static enum pnfs_try_status
+filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+{
+ return PNFS_NOT_ATTEMPTED;
+}
+
/*
* filelayout_check_layout()
*
@@ -455,6 +461,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.free_lseg = filelayout_free_lseg,
.pg_test = filelayout_pg_test,
.read_pagelist = filelayout_read_pagelist,
+ .write_pagelist = filelayout_write_pagelist,
};

static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 5f205d3..f38813a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -895,6 +895,30 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
}

+enum pnfs_try_status
+pnfs_try_to_write_data(struct nfs_write_data *wdata,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ struct inode *inode = wdata->inode;
+ enum pnfs_try_status trypnfs;
+ struct nfs_server *nfss = NFS_SERVER(inode);
+
+ wdata->mds_ops = call_ops;
+
+ dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
+ inode->i_ino, wdata->args.count, wdata->args.offset, how);
+
+ trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED) {
+ put_lseg(wdata->lseg);
+ wdata->lseg = NULL;
+ } else
+ nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
+
+ dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
+ return trypnfs;
+}
+
/*
* Call the appropriate parallel I/O subsystem read function.
*/
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1d4e631..6380b94 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -79,6 +79,7 @@ struct pnfs_layoutdriver_type {
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
+ enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
};

struct pnfs_layout_hdr {
@@ -120,6 +121,8 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
+enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
+ const struct rpc_call_ops *, int);
enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
const struct rpc_call_ops *);
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
@@ -200,6 +203,13 @@ pnfs_try_to_read_data(struct nfs_read_data *data,
return PNFS_NOT_ATTEMPTED;
}

+static inline enum pnfs_try_status
+pnfs_try_to_write_data(struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops, int how)
+{
+ return PNFS_NOT_ATTEMPTED;
+}
+
static inline bool
pnfs_roc(struct inode *ino)
{
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c618d41..1852a54 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -873,6 +873,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);

+ if (data->lseg &&
+ (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
+ return 0;
+
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}

diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h
index 37a1437..8866bb3 100644
--- a/include/linux/nfs_iostat.h
+++ b/include/linux/nfs_iostat.h
@@ -114,6 +114,7 @@ enum nfs_stat_eventcounters {
NFSIOS_SHORTWRITE,
NFSIOS_DELAY,
NFSIOS_PNFS_READ,
+ NFSIOS_PNFS_WRITE,
__NFSIOS_COUNTSMAX,
};

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 09d9681..c82ad33 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1039,6 +1039,7 @@ struct nfs_write_data {
struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */
struct pnfs_layout_segment *lseg;
+ const struct rpc_call_ops *mds_ops;
int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
--
1.7.2.1


2011-03-03 15:14:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 3/9] NFSv4.1: Send lseg down into nfs_write_rpcsetup

We grab the lseg sent in from the doio function and attach it to
each struct nfs_write_data created. This is how the lseg will be
sent to the layout driver.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 7 +++++--
include/linux/nfs_xdr.h | 1 +
2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5604854..0df18ae 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)

static void nfs_writedata_release(struct nfs_write_data *wdata)
{
+ put_lseg(wdata->lseg);
put_nfs_open_context(wdata->args.context);
nfs_writedata_free(wdata);
}
@@ -840,6 +841,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
+ struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
@@ -850,6 +852,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->req = req;
data->inode = inode = req->wb_context->path.dentry->d_inode;
data->cred = req->wb_context->cred;
+ data->lseg = get_lseg(lseg);

data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -930,7 +933,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
if (nbytes < wsize)
wsize = nbytes;
ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
- wsize, offset, how);
+ wsize, offset, lseg, how);
if (ret == 0)
ret = ret2;
offset += wsize;
@@ -978,7 +981,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
req = nfs_list_entry(data->pages.next);

/* Set up the argument struct */
- return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
+ return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
out_bad:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 21cd41d..09d9681 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1038,6 +1038,7 @@ struct nfs_write_data {
unsigned int npages; /* Max length of pagevec */
struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */
+ struct pnfs_layout_segment *lseg;
int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
--
1.7.2.1


2011-03-03 15:14:46

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 6/9] NFSv4.1: remove GETATTR from ds writes

Any WRITE compound directed to a data server needs to have the
GETATTR calls suppressed.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4proc.c | 6 +++++-
fs/nfs/nfs4xdr.c | 8 +++++---
2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e79131b..e9f8a41 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3150,7 +3150,11 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
{
struct nfs_server *server = NFS_SERVER(data->inode);

- data->args.bitmask = server->cache_consistency_bitmask;
+ if (data->lseg) {
+ data->args.bitmask = NULL;
+ data->res.fattr = NULL;
+ } else
+ data->args.bitmask = server->cache_consistency_bitmask;
if (!data->write_done_cb)
data->write_done_cb = nfs4_write_done_cb;
data->res.server = server;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2380c45..c35880c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2275,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_putfh(xdr, args->fh, &hdr);
encode_write(xdr, args, &hdr);
req->rq_snd_buf.flags |= XDRBUF_WRITE;
- encode_getfattr(xdr, args->bitmask, &hdr);
+ if (args->bitmask)
+ encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}

@@ -5694,8 +5695,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_write(xdr, res);
if (status)
goto out;
- decode_getfattr(xdr, res->fattr, res->server,
- !RPC_IS_ASYNC(rqstp->rq_task));
+ if (res->fattr)
+ decode_getfattr(xdr, res->fattr, res->server,
+ !RPC_IS_ASYNC(rqstp->rq_task));
if (!status)
status = res->count;
out:
--
1.7.2.1


2011-03-03 15:14:45

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 1/9] NFSv4.1: rearrange nfs_write_rpcsetup

From: Andy Adamson <[email protected]>

Reorder nfs_write_rpcsetup, preparing for a pnfs entry point.

Signed-off-by: Andy Adamson <[email protected]>
Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/write.c | 82 +++++++++++++++++++++++++++++++------------------------
1 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index aca0268..5604854 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -782,25 +782,21 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}

-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static int nfs_write_rpcsetup(struct nfs_page *req,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- unsigned int count, unsigned int offset,
- int how)
+static int nfs_initiate_write(struct nfs_write_data *data,
+ struct rpc_clnt *clnt,
+ const struct rpc_call_ops *call_ops,
+ int how)
{
- struct inode *inode = req->wb_context->path.dentry->d_inode;
+ struct inode *inode = data->inode;
int priority = flush_task_priority(how);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
- .rpc_cred = req->wb_context->cred,
+ .rpc_cred = data->cred,
};
struct rpc_task_setup task_setup_data = {
- .rpc_client = NFS_CLIENT(inode),
+ .rpc_client = clnt,
.task = &data->task,
.rpc_message = &msg,
.callback_ops = call_ops,
@@ -811,12 +807,49 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
};
int ret = 0;

+ /* Set up the initial task struct. */
+ NFS_PROTO(inode)->write_setup(data, &msg);
+
+ dprintk("NFS: %5u initiated write call "
+ "(req %s/%lld, %u bytes @ offset %llu)\n",
+ data->task.tk_pid,
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto out;
+ }
+ if (how & FLUSH_SYNC) {
+ ret = rpc_wait_for_completion_task(task);
+ if (ret == 0)
+ ret = task->tk_status;
+ }
+ rpc_put_task(task);
+out:
+ return ret;
+}
+
+/*
+ * Set up the argument/result storage required for the RPC call.
+ */
+static int nfs_write_rpcsetup(struct nfs_page *req,
+ struct nfs_write_data *data,
+ const struct rpc_call_ops *call_ops,
+ unsigned int count, unsigned int offset,
+ int how)
+{
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
+
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */

data->req = req;
data->inode = inode = req->wb_context->path.dentry->d_inode;
- data->cred = msg.rpc_cred;
+ data->cred = req->wb_context->cred;

data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
@@ -837,30 +870,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);

- /* Set up the initial task struct. */
- NFS_PROTO(inode)->write_setup(data, &msg);
-
- dprintk("NFS: %5u initiated write call "
- "(req %s/%lld, %u bytes @ offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- count,
- (unsigned long long)data->args.offset);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- ret = PTR_ERR(task);
- goto out;
- }
- if (how & FLUSH_SYNC) {
- ret = rpc_wait_for_completion_task(task);
- if (ret == 0)
- ret = task->tk_status;
- }
- rpc_put_task(task);
-out:
- return ret;
+ return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}

/* If a nfs_flush_* function fails, it should remove reqs from @head and
--
1.7.2.1


2011-03-03 15:14:47

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 9/9] NFSv4.1: Clear lseg pointer in ->doio function

Now that we have access to the pointer, clear it immediately after
the put, instead of in caller.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pagelist.c | 1 -
fs/nfs/read.c | 2 ++
fs/nfs/write.c | 2 ++
3 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 9f62874..23e7944 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -312,7 +312,6 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
int error = desc->pg_doio(desc);
- desc->pg_lseg = NULL;
if (error < 0)
desc->pg_error = error;
else
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ab9c776..4b764c6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -311,6 +311,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
nbytes -= rsize;
} while (nbytes != 0);
put_lseg(lseg);
+ desc->pg_lseg = NULL;

return ret;

@@ -357,6 +358,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
0, lseg);
out:
put_lseg(lseg);
+ desc->pg_lseg = NULL;
return ret;
}

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ccc7c22..bdb72f3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -949,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
} while (nbytes != 0);

put_lseg(lseg);
+ desc->pg_lseg = NULL;
return ret;

out_bad:
@@ -1005,6 +1006,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
out:
put_lseg(lseg); /* Cleans any gotten in ->pg_test */
+ desc->pg_lseg = NULL;
return ret;
}

--
1.7.2.1


2011-03-10 07:58:12

by Benny Halevy

[permalink] [raw]
Subject: Re: [PATCH 8/9] NFSv4.1: rearrange ->doio args

On 2011-03-03 07:13, Fred Isaman wrote:
> This will make it possible to clear the lseg pointer in the same
> function as it is put, instead of in the caller nfs_pageio_doio().

so much better this way :)
I'm glad we discussed this in the Connectathon!

Benny

>
> Signed-off-by: Fred Isaman <[email protected]>
> ---
> fs/nfs/pagelist.c | 10 ++--------
> fs/nfs/read.c | 42 +++++++++++++++++++++++++-----------------
> fs/nfs/write.c | 28 ++++++++++++++++------------
> include/linux/nfs_page.h | 4 ++--
> 4 files changed, 45 insertions(+), 39 deletions(-)
>
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index 45b0fb8..9f62874 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -214,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
> */
> void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> struct inode *inode,
> - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *),
> + int (*doio)(struct nfs_pageio_descriptor *),
> size_t bsize,
> int io_flags)
> {
> @@ -311,13 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
> static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
> {
> if (!list_empty(&desc->pg_list)) {
> - int error = desc->pg_doio(desc->pg_inode,
> - &desc->pg_list,
> - nfs_page_array_len(desc->pg_base,
> - desc->pg_count),
> - desc->pg_count,
> - desc->pg_ioflags,
> - desc->pg_lseg);
> + int error = desc->pg_doio(desc);
> desc->pg_lseg = NULL;
> if (error < 0)
> desc->pg_error = error;
> diff --git a/fs/nfs/read.c b/fs/nfs/read.c
> index f40c7f4..ab9c776 100644
> --- a/fs/nfs/read.c
> +++ b/fs/nfs/read.c
> @@ -31,8 +31,8 @@
>
> #define NFSDBG_FACILITY NFSDBG_PAGECACHE
>
> -static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *);
> -static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *);
> +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
> +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
> static const struct rpc_call_ops nfs_read_partial_ops;
> static const struct rpc_call_ops nfs_read_full_ops;
>
> @@ -117,9 +117,9 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
> int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
> struct page *page)
> {
> - LIST_HEAD(one_request);
> struct nfs_page *new;
> unsigned int len;
> + struct nfs_pageio_descriptor pgio;
>
> len = nfs_page_length(page);
> if (len == 0)
> @@ -132,11 +132,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
> if (len < PAGE_CACHE_SIZE)
> zero_user_segment(page, len, PAGE_CACHE_SIZE);
>
> - nfs_list_add_request(new, &one_request);
> + nfs_pageio_init(&pgio, inode, NULL, 0, 0);
> + nfs_list_add_request(new, &pgio.pg_list);
> + pgio.pg_count = len;
> +
> if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
> - nfs_pagein_multi(inode, &one_request, 1, len, 0, NULL);
> + nfs_pagein_multi(&pgio);
> else
> - nfs_pagein_one(inode, &one_request, 1, len, 0, NULL);
> + nfs_pagein_one(&pgio);
> return 0;
> }
>
> @@ -258,20 +261,21 @@ nfs_async_read_error(struct list_head *head)
> * won't see the new data until our attribute cache is updated. This is more
> * or less conventional NFS client behavior.
> */
> -static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg)
> +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
> {
> - struct nfs_page *req = nfs_list_entry(head->next);
> + struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
> struct page *page = req->wb_page;
> struct nfs_read_data *data;
> - size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
> + size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
> unsigned int offset;
> int requests = 0;
> int ret = 0;
> + struct pnfs_layout_segment *lseg;
> LIST_HEAD(list);
>
> nfs_list_remove_request(req);
>
> - nbytes = count;
> + nbytes = desc->pg_count;
> do {
> size_t len = min(nbytes,rsize);
>
> @@ -284,11 +288,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
> } while(nbytes != 0);
> atomic_set(&req->wb_complete, requests);
>
> - /* We know lseg==NULL */
> - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ);
> + BUG_ON(desc->pg_lseg != NULL);
> + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
> ClearPageError(page);
> offset = 0;
> - nbytes = count;
> + nbytes = desc->pg_count;
> do {
> int ret2;
>
> @@ -321,14 +325,17 @@ out_bad:
> return -ENOMEM;
> }
>
> -static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg)
> +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
> {
> struct nfs_page *req;
> struct page **pages;
> struct nfs_read_data *data;
> + struct list_head *head = &desc->pg_list;
> + struct pnfs_layout_segment *lseg = desc->pg_lseg;
> int ret = -ENOMEM;
>
> - data = nfs_readdata_alloc(npages);
> + data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
> + desc->pg_count));
> if (!data) {
> nfs_async_read_error(head);
> goto out;
> @@ -344,9 +351,10 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
> }
> req = nfs_list_entry(data->pages.next);
> if ((!lseg) && list_is_singular(&data->pages))
> - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ);
> + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
>
> - ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0, lseg);
> + ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
> + 0, lseg);
> out:
> put_lseg(lseg);
> return ret;
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 06a1f3f..ccc7c22 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -898,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req)
> * Generate multiple small requests to write out a single
> * contiguous dirty area on one page.
> */
> -static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg)
> +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
> {
> - struct nfs_page *req = nfs_list_entry(head->next);
> + struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
> struct page *page = req->wb_page;
> struct nfs_write_data *data;
> - size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
> + size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
> unsigned int offset;
> int requests = 0;
> int ret = 0;
> + struct pnfs_layout_segment *lseg;
> LIST_HEAD(list);
>
> nfs_list_remove_request(req);
>
> - nbytes = count;
> + nbytes = desc->pg_count;
> do {
> size_t len = min(nbytes, wsize);
>
> @@ -924,11 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
> } while (nbytes != 0);
> atomic_set(&req->wb_complete, requests);
>
> - BUG_ON(lseg);
> - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
> + BUG_ON(desc->pg_lseg);
> + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
> ClearPageError(page);
> offset = 0;
> - nbytes = count;
> + nbytes = desc->pg_count;
> do {
> int ret2;
>
> @@ -940,7 +941,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
> if (nbytes < wsize)
> wsize = nbytes;
> ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
> - wsize, offset, lseg, how);
> + wsize, offset, lseg, desc->pg_ioflags);
> if (ret == 0)
> ret = ret2;
> offset += wsize;
> @@ -968,14 +969,17 @@ out_bad:
> * This is the case if nfs_updatepage detects a conflicting request
> * that has been written but not committed.
> */
> -static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg)
> +static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
> {
> struct nfs_page *req;
> struct page **pages;
> struct nfs_write_data *data;
> + struct list_head *head = &desc->pg_list;
> + struct pnfs_layout_segment *lseg = desc->pg_lseg;
> int ret;
>
> - data = nfs_writedata_alloc(npages);
> + data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
> + desc->pg_count));
> if (!data) {
> while (!list_empty(head)) {
> req = nfs_list_entry(head->next);
> @@ -995,10 +999,10 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
> }
> req = nfs_list_entry(data->pages.next);
> if ((!lseg) && list_is_singular(&data->pages))
> - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
> + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
>
> /* Set up the argument struct */
> - ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
> + ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
> out:
> put_lseg(lseg); /* Cleans any gotten in ->pg_test */
> return ret;
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index ba88ff4..90907ad 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -59,7 +59,7 @@ struct nfs_pageio_descriptor {
> unsigned int pg_base;
>
> struct inode *pg_inode;
> - int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *);
> + int (*pg_doio)(struct nfs_pageio_descriptor *);
> int pg_ioflags;
> int pg_error;
> struct pnfs_layout_segment *pg_lseg;
> @@ -81,7 +81,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
> pgoff_t idx_start, unsigned int npages, int tag);
> extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
> struct inode *inode,
> - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *),
> + int (*doio)(struct nfs_pageio_descriptor *desc),
> size_t bsize,
> int how);
> extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,

2011-03-03 15:14:47

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 8/9] NFSv4.1: rearrange ->doio args

This will make it possible to clear the lseg pointer in the same
function as it is put, instead of in the caller nfs_pageio_doio().

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/pagelist.c | 10 ++--------
fs/nfs/read.c | 42 +++++++++++++++++++++++++-----------------
fs/nfs/write.c | 28 ++++++++++++++++------------
include/linux/nfs_page.h | 4 ++--
4 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 45b0fb8..9f62874 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -214,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
*/
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *),
+ int (*doio)(struct nfs_pageio_descriptor *),
size_t bsize,
int io_flags)
{
@@ -311,13 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
- int error = desc->pg_doio(desc->pg_inode,
- &desc->pg_list,
- nfs_page_array_len(desc->pg_base,
- desc->pg_count),
- desc->pg_count,
- desc->pg_ioflags,
- desc->pg_lseg);
+ int error = desc->pg_doio(desc);
desc->pg_lseg = NULL;
if (error < 0)
desc->pg_error = error;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f40c7f4..ab9c776 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,8 +31,8 @@

#define NFSDBG_FACILITY NFSDBG_PAGECACHE

-static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *);
-static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *);
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;

@@ -117,9 +117,9 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
- LIST_HEAD(one_request);
struct nfs_page *new;
unsigned int len;
+ struct nfs_pageio_descriptor pgio;

len = nfs_page_length(page);
if (len == 0)
@@ -132,11 +132,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);

- nfs_list_add_request(new, &one_request);
+ nfs_pageio_init(&pgio, inode, NULL, 0, 0);
+ nfs_list_add_request(new, &pgio.pg_list);
+ pgio.pg_count = len;
+
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
- nfs_pagein_multi(inode, &one_request, 1, len, 0, NULL);
+ nfs_pagein_multi(&pgio);
else
- nfs_pagein_one(inode, &one_request, 1, len, 0, NULL);
+ nfs_pagein_one(&pgio);
return 0;
}

@@ -258,20 +261,21 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
-static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
{
- struct nfs_page *req = nfs_list_entry(head->next);
+ struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
- size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
+ size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
+ struct pnfs_layout_segment *lseg;
LIST_HEAD(list);

nfs_list_remove_request(req);

- nbytes = count;
+ nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);

@@ -284,11 +288,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);

- /* We know lseg==NULL */
- lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ);
+ BUG_ON(desc->pg_lseg != NULL);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
ClearPageError(page);
offset = 0;
- nbytes = count;
+ nbytes = desc->pg_count;
do {
int ret2;

@@ -321,14 +325,17 @@ out_bad:
return -ENOMEM;
}

-static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
+ struct list_head *head = &desc->pg_list;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret = -ENOMEM;

- data = nfs_readdata_alloc(npages);
+ data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
if (!data) {
nfs_async_read_error(head);
goto out;
@@ -344,9 +351,10 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);

- ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0, lseg);
+ ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
+ 0, lseg);
out:
put_lseg(lseg);
return ret;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 06a1f3f..ccc7c22 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -898,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
-static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
{
- struct nfs_page *req = nfs_list_entry(head->next);
+ struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
- size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
+ size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
+ struct pnfs_layout_segment *lseg;
LIST_HEAD(list);

nfs_list_remove_request(req);

- nbytes = count;
+ nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);

@@ -924,11 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);

- BUG_ON(lseg);
- lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
+ BUG_ON(desc->pg_lseg);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
ClearPageError(page);
offset = 0;
- nbytes = count;
+ nbytes = desc->pg_count;
do {
int ret2;

@@ -940,7 +941,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
if (nbytes < wsize)
wsize = nbytes;
ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
- wsize, offset, lseg, how);
+ wsize, offset, lseg, desc->pg_ioflags);
if (ret == 0)
ret = ret2;
offset += wsize;
@@ -968,14 +969,17 @@ out_bad:
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
-static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
+ struct list_head *head = &desc->pg_list;
+ struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret;

- data = nfs_writedata_alloc(npages);
+ data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
if (!data) {
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
@@ -995,10 +999,10 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
- lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW);
+ lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);

/* Set up the argument struct */
- ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how);
+ ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
out:
put_lseg(lseg); /* Cleans any gotten in ->pg_test */
return ret;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index ba88ff4..90907ad 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -59,7 +59,7 @@ struct nfs_pageio_descriptor {
unsigned int pg_base;

struct inode *pg_inode;
- int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *);
+ int (*pg_doio)(struct nfs_pageio_descriptor *);
int pg_ioflags;
int pg_error;
struct pnfs_layout_segment *pg_lseg;
@@ -81,7 +81,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
pgoff_t idx_start, unsigned int npages, int tag);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
- int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *),
+ int (*doio)(struct nfs_pageio_descriptor *desc),
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
--
1.7.2.1


2011-03-03 15:14:45

by Fred Isaman

[permalink] [raw]
Subject: [PATCH 2/9] NFSv4.1: add callback to nfs4_write_done

Add callback that pnfs layout driver can use to do its own handling
of data server WRITE response.

Signed-off-by: Fred Isaman <[email protected]>
---
fs/nfs/nfs4proc.c | 14 ++++++++++----
include/linux/nfs_xdr.h | 1 +
2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index dbf8678..e79131b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3124,13 +3124,10 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
}
EXPORT_SYMBOL_GPL(nfs4_reset_read);

-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
{
struct inode *inode = data->inode;

- if (!nfs4_sequence_done(task, &data->res.seq_res))
- return -EAGAIN;
-
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
return -EAGAIN;
@@ -3142,11 +3139,20 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}

+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+{
+ if (!nfs4_sequence_done(task, &data->res.seq_res))
+ return -EAGAIN;
+ return data->write_done_cb(task, data);
+}
+
static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->inode);

data->args.bitmask = server->cache_consistency_bitmask;
+ if (!data->write_done_cb)
+ data->write_done_cb = nfs4_write_done_cb;
data->res.server = server;
data->timestamp = jiffies;

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index eb0e870..21cd41d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1038,6 +1038,7 @@ struct nfs_write_data {
unsigned int npages; /* Max length of pagevec */
struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */
+ int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
--
1.7.2.1