2015-12-26 23:21:39

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 01/14] pNFS/flexfiles: Support server-supplied layoutstats sampling period

Some servers want to be able to control the frequency with which clients
report layoutstats, for instance, in order to monitor QoS for a particular
file or set of file. In order to support this, the flexfiles layout allows
the server to pass this info as a hint in the layout payload.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 16 +++++++++++++---
fs/nfs/flexfilelayout/flexfilelayout.h | 1 +
2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 03516c80855a..616817a46410 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -505,9 +505,17 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
}

p = xdr_inline_decode(&stream, 4);
- if (p)
- fls->flags = be32_to_cpup(p);
+ if (!p)
+ goto out_sort_mirrors;
+ fls->flags = be32_to_cpup(p);
+
+ p = xdr_inline_decode(&stream, 4);
+ if (!p)
+ goto out_sort_mirrors;
+ for (i=0; i < fls->mirror_array_cnt; i++)
+ fls->mirror_array[i]->report_interval = be32_to_cpup(p);

+out_sort_mirrors:
ff_layout_sort_mirrors(fls);
rc = ff_layout_check_layout(lgr);
if (rc)
@@ -603,7 +611,9 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror,
mirror->start_time = now;
if (ktime_equal(mirror->last_report_time, notime))
mirror->last_report_time = now;
- if (layoutstats_timer != 0)
+ if (mirror->report_interval != 0)
+ report_interval = (s64)mirror->report_interval * 1000LL;
+ else if (layoutstats_timer != 0)
report_interval = (s64)layoutstats_timer * 1000LL;
if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
report_interval) {
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 2bb08bc6aaf0..dd353bb7dc0a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror {
struct nfs4_ff_layoutstat write_stat;
ktime_t start_time;
ktime_t last_report_time;
+ u32 report_interval;
};

struct nfs4_ff_layout_segment {
--
2.5.0



2015-12-26 23:21:41

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 03/14] nfs: clean up rest of reqs when failing to add one

From: Peng Tao <[email protected]>

If we fail to set up things before sending anything over wire,
we need to clean up the reqs that are still attached to the
IO descriptor.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pagelist.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 871ba5df5ca9..f66021663645 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1120,6 +1120,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
+ struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
int ret;

do {
@@ -1147,7 +1148,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,

nfs_pageio_setup_mirroring(desc, req);
if (desc->pg_error < 0)
- return 0;
+ goto out_failed;

for (midx = 0; midx < desc->pg_mirror_count; midx++) {
if (midx) {
@@ -1164,7 +1165,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,

if (IS_ERR(dupreq)) {
nfs_page_group_unlock(req);
- return 0;
+ desc->pg_error = PTR_ERR(dupreq);
+ goto out_failed;
}

nfs_lock_request(dupreq);
@@ -1177,10 +1179,19 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (nfs_pgio_has_mirroring(desc))
desc->pg_mirror_idx = midx;
if (!nfs_pageio_add_request_mirror(desc, dupreq))
- return 0;
+ goto out_failed;
}

return 1;
+
+out_failed:
+ /*
+ * We might have failed before sending any reqs over wire.
+ * clean up rest of the reqs in mirror pg_list
+ */
+ if (desc->pg_error)
+ desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
+ return 0;
}

/*
--
2.5.0


2015-12-26 23:21:40

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 02/14] NFS41: pop some layoutget errors to application

From: Peng Tao <[email protected]>

For ERESTARTSYS/EIO/EROFS/ENOSPC/E2BIG in layoutget, we
should just bail out instead of hiding the error and
retrying inband IO.

Change all the call sites to pop the error all the way up.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/direct.c | 15 ++++++++++++++-
fs/nfs/filelayout/filelayout.c | 17 +++++++++++++++--
fs/nfs/flexfilelayout/flexfilelayout.c | 25 ++++++++++++++++++++++---
fs/nfs/pagelist.c | 9 ++++++++-
fs/nfs/pnfs.c | 24 ++++++++++++++++++------
fs/nfs/read.c | 2 +-
6 files changed, 78 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index da44aa17b77f..408345f7b558 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -670,6 +670,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)

req = nfs_list_entry(reqs.next);
nfs_direct_setup_mirroring(dreq, &desc, req);
+ if (desc.pg_error < 0) {
+ list_splice_init(&reqs, &failed);
+ goto out_failed;
+ }

list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
if (!nfs_pageio_add_request(&desc, req)) {
@@ -677,13 +681,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
nfs_list_add_request(req, &failed);
spin_lock(cinfo.lock);
dreq->flags = 0;
- dreq->error = -EIO;
+ if (desc.pg_error < 0)
+ dreq->error = desc.pg_error;
+ else
+ dreq->error = -EIO;
spin_unlock(cinfo.lock);
}
nfs_release_request(req);
}
nfs_pageio_complete(&desc);

+out_failed:
while (!list_empty(&failed)) {
req = nfs_list_entry(failed.next);
nfs_list_remove_request(req);
@@ -894,6 +902,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
}

nfs_direct_setup_mirroring(dreq, &desc, req);
+ if (desc.pg_error < 0) {
+ nfs_free_request(req);
+ result = desc.pg_error;
+ break;
+ }

nfs_lock_request(req);
req->wb_index = pos >> PAGE_SHIFT;
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 02ec07973bc4..ae07b0f56659 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -883,13 +883,19 @@ static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_read_mds(pgio);
@@ -902,13 +908,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo;
int status;

- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
+
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 616817a46410..57e4010e3cde 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -795,13 +795,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
int ds_idx;

/* Use full layout for now */
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
@@ -835,13 +841,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
int i;
int status;

- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
goto out_mds;
@@ -877,18 +889,25 @@ static unsigned int
ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- if (!pgio->pg_lseg)
+ if (!pgio->pg_lseg) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ goto out;
+ }
+ }
if (pgio->pg_lseg)
return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg);

/* no lseg means that pnfs is not in use, so no mirroring here */
nfs_pageio_reset_write_mds(pgio);
+out:
return 1;
}

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index c3a78450a239..871ba5df5ca9 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -874,6 +874,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio,

mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);

+ if (pgio->pg_error < 0)
+ return pgio->pg_error;
+
if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX)
return -EINVAL;

@@ -976,6 +979,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
} else {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
+ if (desc->pg_error < 0)
+ return 0;
mirror->pg_base = req->wb_pgbase;
}
if (!nfs_can_coalesce_requests(prev, req, desc))
@@ -1141,6 +1146,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
bytes = req->wb_bytes;

nfs_pageio_setup_mirroring(desc, req);
+ if (desc->pg_error < 0)
+ return 0;

for (midx = 0; midx < desc->pg_mirror_count; midx++) {
if (midx) {
@@ -1226,7 +1233,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
nfs_pageio_complete(desc);
if (!list_empty(&failed)) {
list_move(&failed, &hdr->pages);
- return -EIO;
+ return desc->pg_error < 0 ? desc->pg_error : -EIO;
}
return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6095a8d42766..b1acc4135c3c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -906,14 +906,15 @@ send_layoutget(struct pnfs_layout_hdr *lo,

if (IS_ERR(lseg)) {
switch (PTR_ERR(lseg)) {
- case -ENOMEM:
case -ERESTARTSYS:
+ case -EIO:
+ case -ENOSPC:
+ case -EROFS:
+ case -E2BIG:
break;
default:
- /* remember that LAYOUTGET failed and suspend trying */
- pnfs_layout_io_set_failed(lo, range->iomode);
+ return NULL;
}
- return NULL;
} else
pnfs_layout_clear_fail_bit(lo,
pnfs_iomode_to_fail_bit(range->iomode));
@@ -1649,7 +1650,7 @@ out:
"(%s, offset: %llu, length: %llu)\n",
__func__, ino->i_sb->s_id,
(unsigned long long)NFS_FILEID(ino),
- lseg == NULL ? "not found" : "found",
+ IS_ERR_OR_NULL(lseg) ? "not found" : "found",
iomode==IOMODE_RW ? "read/write" : "read-only",
(unsigned long long)pos,
(unsigned long long)count);
@@ -1828,6 +1829,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
rd_size,
IOMODE_READ,
GFP_KERNEL);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
}
/* If no lseg, fall back to read through mds */
if (pgio->pg_lseg == NULL)
@@ -1840,13 +1846,19 @@ void
pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size)
{
- if (pgio->pg_lseg == NULL)
+ if (pgio->pg_lseg == NULL) {
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
wb_size,
IOMODE_RW,
GFP_NOFS);
+ if (IS_ERR(pgio->pg_lseg)) {
+ pgio->pg_error = PTR_ERR(pgio->pg_lseg);
+ pgio->pg_lseg = NULL;
+ return;
+ }
+ }
/* If no lseg, fall back to write through mds */
if (pgio->pg_lseg == NULL)
nfs_pageio_reset_write_mds(pgio);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0a5e33f33b5c..0bb580174cb3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -115,7 +115,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
pgm = &pgio.pg_mirrors[0];
NFS_I(inode)->read_io += pgm->pg_bytes_written;

- return 0;
+ return pgio.pg_error < 0 ? pgio.pg_error : 0;
}

static void nfs_readpage_release(struct nfs_page *req)
--
2.5.0


2015-12-26 23:21:43

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 06/14] nfs: only remove page from mapping if launder_page fails

From: Peng Tao <[email protected]>

Instead of dropping pages when write fails, only do it when
we get fatal failure in launder_page write back.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/file.c | 2 +-
fs/nfs/write.c | 39 +++++++++++++++++++++++----------------
include/linux/nfs_fs.h | 14 +++++++++++++-
3 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 461425bec4da..5d91c0f35b4f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -537,7 +537,7 @@ static int nfs_launder_page(struct page *page)
inode->i_ino, (long long)page_offset(page));

nfs_fscache_wait_on_page_write(nfsi, page);
- return nfs_wb_page(inode, page);
+ return nfs_wb_launder_page(inode, page);
}

static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 58fa3eb5c11c..c4c12b13c56d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -562,7 +562,8 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
* May return an error if the user signalled nfs_wait_on_request().
*/
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
- struct page *page, bool nonblock)
+ struct page *page, bool nonblock,
+ bool launder)
{
struct nfs_page *req;
int ret = 0;
@@ -581,17 +582,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
if (!nfs_pageio_add_request(pgio, req)) {
ret = pgio->pg_error;
/*
- * Remove the problematic req upon fatal errors,
- * while other dirty pages can still be around
- * until they get flushed.
+ * Remove the problematic req upon fatal errors
+ * in launder case, while other dirty pages can
+ * still be around until they get flushed.
*/
if (nfs_error_is_fatal(ret)) {
nfs_context_set_write_error(req->wb_context, ret);
- nfs_write_error_remove_page(req);
- } else {
- nfs_redirty_request(req);
- ret = -EAGAIN;
+ if (launder) {
+ nfs_write_error_remove_page(req);
+ goto out;
+ }
}
+ nfs_redirty_request(req);
+ ret = -EAGAIN;
} else
nfs_add_stats(page_file_mapping(page)->host,
NFSIOS_WRITEPAGES, 1);
@@ -599,12 +602,14 @@ out:
return ret;
}

-static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
+static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
+ struct nfs_pageio_descriptor *pgio, bool launder)
{
int ret;

nfs_pageio_cond_complete(pgio, page_file_index(page));
- ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
+ ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE,
+ launder);
if (ret == -EAGAIN) {
redirty_page_for_writepage(wbc, page);
ret = 0;
@@ -615,7 +620,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
/*
* Write an mmapped page to the server.
*/
-static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
+static int nfs_writepage_locked(struct page *page,
+ struct writeback_control *wbc,
+ bool launder)
{
struct nfs_pageio_descriptor pgio;
struct inode *inode = page_file_mapping(page)->host;
@@ -624,7 +631,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
false, &nfs_async_write_completion_ops);
- err = nfs_do_writepage(page, wbc, &pgio);
+ err = nfs_do_writepage(page, wbc, &pgio, launder);
nfs_pageio_complete(&pgio);
if (err < 0)
return err;
@@ -637,7 +644,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
int ret;

- ret = nfs_writepage_locked(page, wbc);
+ ret = nfs_writepage_locked(page, wbc, false);
unlock_page(page);
return ret;
}
@@ -646,7 +653,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
{
int ret;

- ret = nfs_do_writepage(page, wbc, data);
+ ret = nfs_do_writepage(page, wbc, data, false);
unlock_page(page);
return ret;
}
@@ -1933,7 +1940,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
/*
* Write back all requests on one page - we do this before reading it.
*/
-int nfs_wb_page(struct inode *inode, struct page *page)
+int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
{
loff_t range_start = page_file_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
@@ -1950,7 +1957,7 @@ int nfs_wb_page(struct inode *inode, struct page *page)
for (;;) {
wait_on_page_writeback(page);
if (clear_page_dirty_for_io(page)) {
- ret = nfs_writepage_locked(page, &wbc);
+ ret = nfs_writepage_locked(page, &wbc, launder);
if (ret < 0)
goto out_error;
continue;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f1dd05b5564b..ced7eb1932fb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -515,13 +515,25 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
*/
extern int nfs_sync_inode(struct inode *inode);
extern int nfs_wb_all(struct inode *inode);
-extern int nfs_wb_page(struct inode *inode, struct page* page);
+extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder);
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
extern int nfs_commit_inode(struct inode *, int);
extern struct nfs_commit_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_commit_data *data);

static inline int
+nfs_wb_launder_page(struct inode *inode, struct page *page)
+{
+ return nfs_wb_single_page(inode, page, true);
+}
+
+static inline int
+nfs_wb_page(struct inode *inode, struct page *page)
+{
+ return nfs_wb_single_page(inode, page, false);
+}
+
+static inline int
nfs_have_writebacks(struct inode *inode)
{
return NFS_I(inode)->nrequests != 0;
--
2.5.0


2015-12-26 23:21:41

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 04/14] nfs: centralize pgio error cleanup

From: Peng Tao <[email protected]>

In case we fail during setting things up for read/write IO, set
pg_error in IO descriptor and do the cleanup in nfs_pageio_add_request,
where we clean up all pages that are still hanging around on the IO
descriptor.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pagelist.c | 53 +++++++++++++++++++++++++++++------------------------
fs/nfs/pnfs.c | 12 ++++--------
2 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index f66021663645..eeddbf0bf4c4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -664,22 +664,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
* @desc: IO descriptor
* @hdr: pageio header
*/
-static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
+static void nfs_pgio_error(struct nfs_pgio_header *hdr)
{
- struct nfs_pgio_mirror *mirror;
- u32 midx;
-
set_bit(NFS_IOHDR_REDO, &hdr->flags);
nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
- /* TODO: Make sure it's right to clean up all mirrors here
- * and not just hdr->pgio_mirror_idx */
- for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- }
- return -ENOMEM;
}

/**
@@ -800,8 +789,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
unsigned int pagecount, pageused;

pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
- if (!nfs_pgarray_set(&hdr->page_array, pagecount))
- return nfs_pgio_error(desc, hdr);
+ if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
+ nfs_pgio_error(hdr);
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
+ }

nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
pages = hdr->page_array.pagevec;
@@ -819,8 +811,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
*pages++ = last_page = req->wb_page;
}
}
- if (WARN_ON_ONCE(pageused != pagecount))
- return nfs_pgio_error(desc, hdr);
+ if (WARN_ON_ONCE(pageused != pagecount)) {
+ nfs_pgio_error(hdr);
+ desc->pg_error = -EINVAL;
+ return desc->pg_error;
+ }

if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
(desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
@@ -843,10 +838,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)

hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) {
- /* TODO: make sure this is right with mirroring - or
- * should it back out all mirrors? */
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- return -ENOMEM;
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
}
nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
ret = nfs_generic_pgio(desc, hdr);
@@ -1120,7 +1113,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
- struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
int ret;

do {
@@ -1187,10 +1179,23 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
out_failed:
/*
* We might have failed before sending any reqs over wire.
- * clean up rest of the reqs in mirror pg_list
+ * Clean up rest of the reqs in mirror pg_list.
*/
- if (desc->pg_error)
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
+ if (desc->pg_error) {
+ struct nfs_pgio_mirror *mirror;
+ void (*func)(struct list_head *);
+
+ /* remember fatal errors */
+ if (nfs_error_is_fatal(desc->pg_error))
+ mapping_set_error(desc->pg_inode->i_mapping,
+ desc->pg_error);
+
+ func = desc->pg_completion_ops->error_cleanup;
+ for (midx = 0; midx < desc->pg_mirror_count; midx++) {
+ mirror = &desc->pg_mirrors[midx];
+ func(&mirror->pg_list);
+ }
+ }
return 0;
}

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b1acc4135c3c..0fb3552ccfbe 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2026,15 +2026,13 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
struct nfs_pgio_header *hdr;
int ret;

hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) {
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- return -ENOMEM;
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
}
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);

@@ -2157,15 +2155,13 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
-
struct nfs_pgio_header *hdr;
int ret;

hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!hdr) {
- desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
- return -ENOMEM;
+ desc->pg_error = -ENOMEM;
+ return desc->pg_error;
}
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
--
2.5.0


2015-12-26 23:21:42

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 05/14] nfs: handle request add failure properly

From: Peng Tao <[email protected]>

When we fail to queue a read page to IO descriptor,
we need to clean it up otherwise it is hanging around
preventing nfs module from being removed.

When we fail to queue a write page to IO descriptor,
we need to clean it up and also save the failure status
to open context. Then at file close, we can try to write
pages back again and drop the page if it fails to writeback
in .launder_page, which will be done in the next patch.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/inode.c | 6 ++++++
fs/nfs/internal.h | 14 ++++++++++++++
fs/nfs/pnfs.c | 15 +++------------
fs/nfs/read.c | 41 +++++++++++++++++++++++------------------
fs/nfs/write.c | 22 +++++++++++++++++++++-
5 files changed, 67 insertions(+), 31 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c7e8b87da5b2..74fb1223c2f5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -912,6 +912,12 @@ void nfs_file_clear_open_context(struct file *filp)
if (ctx) {
struct inode *inode = d_inode(ctx->dentry);

+ /*
+ * We fatal error on write before. Try to writeback
+ * every page again.
+ */
+ if (ctx->error < 0)
+ invalidate_inode_pages2(inode->i_mapping);
filp->private_data = NULL;
spin_lock(&inode->i_lock);
list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index df3556570123..c6c4b6e23074 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -717,3 +717,17 @@ static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
return 0;
}
#endif
+
+static inline bool nfs_error_is_fatal(int err)
+{
+ switch (err) {
+ case -ERESTARTSYS:
+ case -EIO:
+ case -ENOSPC:
+ case -EROFS:
+ case -E2BIG:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0fb3552ccfbe..580207bc52a5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -904,18 +904,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lseg = nfs4_proc_layoutget(lgp, gfp_flags);
} while (lseg == ERR_PTR(-EAGAIN));

- if (IS_ERR(lseg)) {
- switch (PTR_ERR(lseg)) {
- case -ERESTARTSYS:
- case -EIO:
- case -ENOSPC:
- case -EROFS:
- case -E2BIG:
- break;
- default:
- return NULL;
- }
- } else
+ if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg)))
+ lseg = NULL;
+ else
pnfs_layout_clear_fail_bit(lo,
pnfs_iomode_to_fail_bit(range->iomode));

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 0bb580174cb3..eb31e23e7def 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);

+static void nfs_readpage_release(struct nfs_page *req)
+{
+ struct inode *inode = d_inode(req->wb_context->dentry);
+
+ dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
+ (long long)req_offset(req));
+
+ if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+ if (PageUptodate(req->wb_page))
+ nfs_readpage_to_fscache(inode, req->wb_page, 0);
+
+ unlock_page(req->wb_page);
+ }
+ nfs_release_request(req);
+}
+
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,

nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
- nfs_pageio_add_request(&pgio, new);
+ if (!nfs_pageio_add_request(&pgio, new)) {
+ nfs_list_remove_request(new);
+ nfs_readpage_release(new);
+ }
nfs_pageio_complete(&pgio);

/* It doesn't make sense to do mirrored reads! */
@@ -118,23 +138,6 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
return pgio.pg_error < 0 ? pgio.pg_error : 0;
}

-static void nfs_readpage_release(struct nfs_page *req)
-{
- struct inode *inode = d_inode(req->wb_context->dentry);
-
- dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
- (long long)req_offset(req));
-
- if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
- if (PageUptodate(req->wb_page))
- nfs_readpage_to_fscache(inode, req->wb_page, 0);
-
- unlock_page(req->wb_page);
- }
- nfs_release_request(req);
-}
-
static void nfs_page_group_set_uptodate(struct nfs_page *req)
{
if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
@@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page)
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
if (!nfs_pageio_add_request(desc->pgio, new)) {
+ nfs_list_remove_request(new);
+ nfs_readpage_release(new);
error = desc->pgio->pg_error;
goto out_unlock;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 00a5d3c90acb..58fa3eb5c11c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -548,6 +548,15 @@ try_again:
return head;
}

+static void nfs_write_error_remove_page(struct nfs_page *req)
+{
+ nfs_unlock_request(req);
+ nfs_end_page_writeback(req);
+ nfs_release_request(req);
+ generic_error_remove_page(page_file_mapping(req->wb_page),
+ req->wb_page);
+}
+
/*
* Find an associated nfs write request, and prepare to flush it out
* May return an error if the user signalled nfs_wait_on_request().
@@ -570,8 +579,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,

ret = 0;
if (!nfs_pageio_add_request(pgio, req)) {
- nfs_redirty_request(req);
ret = pgio->pg_error;
+ /*
+ * Remove the problematic req upon fatal errors,
+ * while other dirty pages can still be around
+ * until they get flushed.
+ */
+ if (nfs_error_is_fatal(ret)) {
+ nfs_context_set_write_error(req->wb_context, ret);
+ nfs_write_error_remove_page(req);
+ } else {
+ nfs_redirty_request(req);
+ ret = -EAGAIN;
+ }
} else
nfs_add_stats(page_file_mapping(page)->host,
NFSIOS_WRITEPAGES, 1);
--
2.5.0


2015-12-26 23:21:45

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 08/14] pnfs/flexfiles: do not mark delay-like status as DS failure

From: Peng Tao <[email protected]>

We just need to delay and retry in these cases.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 57e4010e3cde..b392156e6743 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1188,6 +1188,14 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
}
}

+ switch (status) {
+ case NFS4ERR_DELAY:
+ case NFS4ERR_GRACE:
+ return;
+ default:
+ break;
+ }
+
mirror = FF_LAYOUT_COMP(lseg, idx);
err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
mirror, offset, length, status, opnum,
@@ -1399,7 +1407,6 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
ff_layout_reset_write(hdr, false);
return task->tk_status;
case -EAGAIN:
- rpc_restart_call_prepare(task);
return -EAGAIN;
}

--
2.5.0


2015-12-26 23:21:45

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 09/14] pnfs/flexfiles: count io stat in rpc_count_stats callback

From: Peng Tao <[email protected]>

If client ever restarts IO due to some errors, we'll endup
mis-counting IO stats if we do the counting in .rpc_done
callback. Move it to .rpc_count_stats callback that is only
called when releasing RPC.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index b392156e6743..5ede5c26c757 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1516,11 +1516,6 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;

- nfs4_ff_layout_stat_io_end_write(task,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count, hdr->res.count,
- hdr->res.verf->committed);
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1535,6 +1530,11 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;

+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count, hdr->res.count,
+ hdr->res.verf->committed);
+
rpc_count_iostats_metrics(task,
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}
@@ -1567,6 +1567,11 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data)

static void ff_layout_commit_done(struct rpc_task *task, void *data)
{
+ pnfs_generic_write_commit_done(task, data);
+}
+
+static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
+{
struct nfs_commit_data *cdata = data;
struct nfs_page *req;
__u64 count = 0;
@@ -1580,13 +1585,6 @@ static void ff_layout_commit_done(struct rpc_task *task, void *data)
FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
count, count, NFS_FILE_SYNC);

- pnfs_generic_write_commit_done(task, data);
-}
-
-static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
-{
- struct nfs_commit_data *cdata = data;
-
rpc_count_iostats_metrics(task,
&NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
}
--
2.5.0


2015-12-26 23:21:46

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 10/14] pNFS/flexfiles: Don't prevent flexfiles client from retrying LAYOUTGET

Fix a bug in which flexfiles clients are falling back to I/O through the
MDS even when the FF_FLAGS_NO_IO_THRU_MDS flag is set.

The flexfiles client will always report errors through the LAYOUTRETURN
and/or LAYOUTERROR mechanisms, so it should normally be safe for it
to retry the LAYOUTGET until it fails or succeeds.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 4 ----
fs/nfs/flexfilelayout/flexfilelayoutdev.c | 16 ++++------------
fs/nfs/pnfs.c | 18 ++----------------
fs/nfs/pnfs.h | 21 ---------------------
4 files changed, 6 insertions(+), 53 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 5ede5c26c757..1da19d709458 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1399,11 +1399,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task,

switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
- pnfs_set_retry_layoutget(hdr->lseg->pls_layout);
ff_layout_reset_write(hdr, true);
return task->tk_status;
case -NFS4ERR_RESET_TO_MDS:
- pnfs_clear_retry_layoutget(hdr->lseg->pls_layout);
ff_layout_reset_write(hdr, false);
return task->tk_status;
case -EAGAIN:
@@ -1438,11 +1436,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,

switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
- pnfs_set_retry_layoutget(data->lseg->pls_layout);
pnfs_generic_prepare_to_resend_writes(data);
return -EAGAIN;
case -NFS4ERR_RESET_TO_MDS:
- pnfs_clear_retry_layoutget(data->lseg->pls_layout);
pnfs_generic_prepare_to_resend_writes(data);
return -EAGAIN;
case -EAGAIN:
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e125e55de86d..bd0327541366 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -429,22 +429,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
mirror, lseg->pls_range.offset,
lseg->pls_range.length, NFS4ERR_NXIO,
OP_ILLEGAL, GFP_NOIO);
- if (fail_return) {
- pnfs_error_mark_layout_for_return(ino, lseg);
- if (ff_layout_has_available_ds(lseg))
- pnfs_set_retry_layoutget(lseg->pls_layout);
- else
- pnfs_clear_retry_layoutget(lseg->pls_layout);
-
- } else {
+ if (!fail_return) {
if (ff_layout_has_available_ds(lseg))
set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
&lseg->pls_layout->plh_flags);
- else {
+ else
pnfs_error_mark_layout_for_return(ino, lseg);
- pnfs_clear_retry_layoutget(lseg->pls_layout);
- }
- }
+ } else
+ pnfs_error_mark_layout_for_return(ino, lseg);
}
out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds))
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 580207bc52a5..6b42362cdbb0 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -618,7 +618,6 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
pnfs_get_layout_hdr(lo);
pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
- pnfs_clear_retry_layoutget(lo);
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
pnfs_put_layout_hdr(lo);
@@ -1094,7 +1093,6 @@ bool pnfs_roc(struct inode *ino)
&lo->plh_flags))
layoutreturn = pnfs_prepare_layoutreturn(lo);

- pnfs_clear_retry_layoutget(lo);
list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
/* If we are sending layoutreturn, invalidate all valid lsegs */
if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
@@ -1457,25 +1455,15 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx,
return ret;
}

-/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */
-static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode)
-{
- if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags))
- return 1;
- return nfs_wait_bit_killable(key, mode);
-}
-
static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
{
- if (!pnfs_should_retry_layoutget(lo))
- return false;
/*
* send layoutcommit as it can hold up layoutreturn due to lseg
* reference
*/
pnfs_layoutcommit_inode(lo->plh_inode, false);
return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
- pnfs_layoutget_retry_bit_wait,
+ nfs_wait_bit_killable,
TASK_UNINTERRUPTIBLE);
}

@@ -1550,8 +1538,7 @@ lookup_again:
}

/* if LAYOUTGET already failed once we don't try again */
- if (pnfs_layout_io_test_failed(lo, iomode) &&
- !pnfs_should_retry_layoutget(lo)) {
+ if (pnfs_layout_io_test_failed(lo, iomode)) {
trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_IO_TEST_FAIL);
goto out_unlock;
@@ -1628,7 +1615,6 @@ lookup_again:
arg.length = PAGE_CACHE_ALIGN(arg.length);

lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
- pnfs_clear_retry_layoutget(lo);
atomic_dec(&lo->plh_outstanding);
trace_pnfs_update_layout(ino, pos, count, iomode, lo,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d1990e90e7a0..6916ff4e86f9 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -98,7 +98,6 @@ enum {
NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
- NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */
};

enum layoutdriver_policy_flags {
@@ -379,26 +378,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d)
return d;
}

-static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo)
-{
- if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags))
- atomic_inc(&lo->plh_refcount);
-}
-
-static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo)
-{
- if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) {
- atomic_dec(&lo->plh_refcount);
- /* wake up waiters for LAYOUTRETURN as that is not needed */
- wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN);
- }
-}
-
-static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo)
-{
- return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags);
-}
-
static inline struct pnfs_layout_segment *
pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
--
2.5.0


2015-12-26 23:21:48

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 12/14] pNFS/flexfiles: Fix a statistics gathering imbalance

When we replay a failed read, write or commit to the dataserver, we
need to ensure that we call ff_layout_read_prepare_v3(),
ff_layout_write_prepare_v3 or ff_layout_commit_prepare_v3() so that we
reset the statistics.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 1da19d709458..14109a82ce84 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1130,7 +1130,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
task->tk_status = 0;
- rpc_restart_call(task);
+ rpc_restart_call_prepare(task);
rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
return -EAGAIN;
}
--
2.5.0


2015-12-26 23:21:49

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 13/14] pNFS: Add flag to track if we've called nfs4_ff_layout_stat_io_start_read/write

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 95 +++++++++++++++++++++++++---------
include/linux/nfs_xdr.h | 1 +
2 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 14109a82ce84..9257679a15ba 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1279,14 +1279,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
return ff_layout_test_devid_unavailable(node);
}

-static int ff_layout_read_prepare_common(struct rpc_task *task,
- struct nfs_pgio_header *hdr)
+static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
+ if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
nfs4_ff_layout_stat_io_start_read(hdr->inode,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count,
task->tk_start);
+}
+
+static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
+ if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
+ nfs4_ff_layout_stat_io_end_read(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count,
+ hdr->res.count);
+}

+static int ff_layout_read_prepare_common(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -1302,6 +1319,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
}
hdr->pgio_done_cb = ff_layout_read_done_cb;

+ ff_layout_read_record_layoutstats_start(task, hdr);
return 0;
}

@@ -1360,10 +1378,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)

dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);

- nfs4_ff_layout_stat_io_end_read(task,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count, hdr->res.count);
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) {
nfs4_sequence_done(task, &hdr->res.seq_res);
@@ -1378,6 +1392,7 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;

+ ff_layout_read_record_layoutstats_done(task, hdr);
rpc_count_iostats_metrics(task,
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
}
@@ -1453,14 +1468,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
return 0;
}

-static int ff_layout_write_prepare_common(struct rpc_task *task,
- struct nfs_pgio_header *hdr)
+static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
{
+ if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
nfs4_ff_layout_stat_io_start_write(hdr->inode,
FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
hdr->args.count,
task->tk_start);
+}
+
+static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
+ if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
+ return;
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
+ hdr->args.count, hdr->res.count,
+ hdr->res.verf->committed);
+}

+static int ff_layout_write_prepare_common(struct rpc_task *task,
+ struct nfs_pgio_header *hdr)
+{
if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO);
return -EIO;
@@ -1477,6 +1509,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
return -EAGAIN;
}

+ ff_layout_write_record_layoutstats_start(task, hdr);
return 0;
}

@@ -1526,23 +1559,45 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
{
struct nfs_pgio_header *hdr = data;

- nfs4_ff_layout_stat_io_end_write(task,
- FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
- hdr->args.count, hdr->res.count,
- hdr->res.verf->committed);
-
+ ff_layout_write_record_layoutstats_done(task, hdr);
rpc_count_iostats_metrics(task,
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}

-static void ff_layout_commit_prepare_common(struct rpc_task *task,
+static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
struct nfs_commit_data *cdata)
{
+ if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
+ return;
nfs4_ff_layout_stat_io_start_write(cdata->inode,
FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
0, task->tk_start);
}

+static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
+{
+ struct nfs_page *req;
+ __u64 count = 0;
+
+ if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
+ return;
+
+ if (task->tk_status == 0) {
+ list_for_each_entry(req, &cdata->pages, wb_list)
+ count += req->wb_bytes;
+ }
+ nfs4_ff_layout_stat_io_end_write(task,
+ FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
+ count, count, NFS_FILE_SYNC);
+}
+
+static void ff_layout_commit_prepare_common(struct rpc_task *task,
+ struct nfs_commit_data *cdata)
+{
+ ff_layout_commit_record_layoutstats_start(task, cdata);
+}
+
static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
{
ff_layout_commit_prepare_common(task, data);
@@ -1569,18 +1624,8 @@ static void ff_layout_commit_done(struct rpc_task *task, void *data)
static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
{
struct nfs_commit_data *cdata = data;
- struct nfs_page *req;
- __u64 count = 0;
-
- if (task->tk_status == 0) {
- list_for_each_entry(req, &cdata->pages, wb_list)
- count += req->wb_bytes;
- }
-
- nfs4_ff_layout_stat_io_end_write(task,
- FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
- count, count, NFS_FILE_SYNC);

+ ff_layout_commit_record_layoutstats_done(task, cdata);
rpc_count_iostats_metrics(task,
&NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 29c949bd4462..77b5bc8b5b2b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1375,6 +1375,7 @@ enum {
NFS_IOHDR_ERROR = 0,
NFS_IOHDR_EOF,
NFS_IOHDR_REDO,
+ NFS_IOHDR_STAT,
};

struct nfs_pgio_header {
--
2.5.0


2015-12-26 23:21:49

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 14/14] pNFS/flexfiles: Ensure we record layoutstats even if RPC is terminated early

Currently, we will only record the layoutstats correctly if the
RPC call successfully obtains a slot. If we exit before that
happens, then we may find ourselves starting the busy timer through
the call in ff_layout_(read|write)_prepare_layoutstats, but never stopping it.

The same thing happens if we're doing DA-DS.

The fix is to ensure that we catch these cases in the rpc_release()
callback.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/flexfilelayout/flexfilelayout.c | 37 ++++++++++++++++++++++++++++------
1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 9257679a15ba..2981cd190bfd 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1397,6 +1397,15 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
}

+static void ff_layout_read_release(void *data)
+{
+ struct nfs_pgio_header *hdr = data;
+
+ ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
+ pnfs_generic_rw_release(data);
+}
+
+
static int ff_layout_write_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
@@ -1564,6 +1573,14 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
&NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
}

+static void ff_layout_write_release(void *data)
+{
+ struct nfs_pgio_header *hdr = data;
+
+ ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
+ pnfs_generic_rw_release(data);
+}
+
static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
struct nfs_commit_data *cdata)
{
@@ -1630,46 +1647,54 @@ static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
&NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
}

+static void ff_layout_commit_release(void *data)
+{
+ struct nfs_commit_data *cdata = data;
+
+ ff_layout_commit_record_layoutstats_done(&cdata->task, cdata);
+ pnfs_generic_commit_release(data);
+}
+
static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
.rpc_call_prepare = ff_layout_read_prepare_v3,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_read_release,
};

static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
.rpc_call_prepare = ff_layout_read_prepare_v4,
.rpc_call_done = ff_layout_read_call_done,
.rpc_count_stats = ff_layout_read_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_read_release,
};

static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
.rpc_call_prepare = ff_layout_write_prepare_v3,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_write_release,
};

static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
.rpc_call_prepare = ff_layout_write_prepare_v4,
.rpc_call_done = ff_layout_write_call_done,
.rpc_count_stats = ff_layout_write_count_stats,
- .rpc_release = pnfs_generic_rw_release,
+ .rpc_release = ff_layout_write_release,
};

static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = {
.rpc_call_prepare = ff_layout_commit_prepare_v3,
.rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
- .rpc_release = pnfs_generic_commit_release,
+ .rpc_release = ff_layout_commit_release,
};

static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = {
.rpc_call_prepare = ff_layout_commit_prepare_v4,
.rpc_call_done = ff_layout_commit_done,
.rpc_count_stats = ff_layout_commit_count_stats,
- .rpc_release = pnfs_generic_commit_release,
+ .rpc_release = ff_layout_commit_release,
};

static enum pnfs_try_status
--
2.5.0


2015-12-26 23:21:47

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 11/14] pNFS/flexfiles: Don't mark the entire layout as failed, when returning it

In pNFS/flexfiles, we want to return the layout without necessarily marking
it as having completely failed. We therefore move the call to
pnfs_layout_io_set_failed() out of pnfs_error_mark_layout_for_return(),
and then ensura that pNFS/files layout calls it separately.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/filelayout/filelayout.c | 1 +
fs/nfs/pnfs.c | 3 ---
2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index ae07b0f56659..bb1f4e7a3270 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
task->tk_status);
nfs4_mark_deviceid_unavailable(devid);
pnfs_error_mark_layout_for_return(inode, lseg);
+ pnfs_set_lo_fail(lseg);
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
default:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6b42362cdbb0..113c3b327e24 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1763,7 +1763,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
struct pnfs_layout_segment *lseg)
{
struct pnfs_layout_hdr *lo = NFS_I(inode)->layout;
- int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode);
struct pnfs_layout_range range = {
.iomode = lseg->pls_range.iomode,
.offset = 0,
@@ -1772,8 +1771,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
LIST_HEAD(free_me);

spin_lock(&inode->i_lock);
- /* set failure bit so that pnfs path will be retried later */
- pnfs_layout_set_fail_bit(lo, iomode);
if (lo->plh_return_iomode == 0)
lo->plh_return_iomode = range.iomode;
else if (lo->plh_return_iomode != range.iomode)
--
2.5.0


2015-12-26 23:21:44

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH 07/14] NFS41: map NFS4ERR_LAYOUTUNAVAILABLE to ENODATA

From: Peng Tao <[email protected]>

Instead of mapping it to EIO that is a fatal error and
fails application. We'll go inband after getting
NFS4ERR_LAYOUTUNAVAILABLE.

Signed-off-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 9 +++++++++
1 file changed, 9 insertions(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 18d862db15b6..0065bbdc53c3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7795,6 +7795,15 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
+
+ /*
+ * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs
+ * on the file. set tk_status to -ENODATA to tell upper layer to
+ * retry go inband.
+ */
+ case -NFS4ERR_LAYOUTUNAVAILABLE:
+ task->tk_status = -ENODATA;
+ goto out;
/*
* NFS4ERR_BADLAYOUT means the MDS cannot return a layout of
* length lgp->args.minlength != 0 (see RFC5661 section 18.43.3).
--
2.5.0


2016-01-04 15:48:48

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH 03/14] nfs: clean up rest of reqs when failing to add one

On Sat, 26 Dec 2015, Trond Myklebust wrote:

> From: Peng Tao <[email protected]>
>
> If we fail to set up things before sending anything over wire,
> we need to clean up the reqs that are still attached to the
> IO descriptor.
>
> Signed-off-by: Peng Tao <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/pagelist.c | 17 ++++++++++++++---
> 1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index 871ba5df5ca9..f66021663645 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -1120,6 +1120,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
> static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc,
> struct nfs_page *req)
> {
> + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
> int ret;
>
> do {
> @@ -1147,7 +1148,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
>
> nfs_pageio_setup_mirroring(desc, req);
> if (desc->pg_error < 0)
> - return 0;
> + goto out_failed;
>
> for (midx = 0; midx < desc->pg_mirror_count; midx++) {
> if (midx) {
> @@ -1164,7 +1165,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
>
> if (IS_ERR(dupreq)) {
> nfs_page_group_unlock(req);
> - return 0;
> + desc->pg_error = PTR_ERR(dupreq);
> + goto out_failed;
> }
>
> nfs_lock_request(dupreq);
> @@ -1177,10 +1179,19 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> if (nfs_pgio_has_mirroring(desc))
> desc->pg_mirror_idx = midx;
> if (!nfs_pageio_add_request_mirror(desc, dupreq))
> - return 0;
> + goto out_failed;
> }
>
> return 1;
> +
> +out_failed:
> + /*
> + * We might have failed before sending any reqs over wire.
> + * clean up rest of the reqs in mirror pg_list
> + */
> + if (desc->pg_error)
> + desc->pg_completion_ops->error_cleanup(&mirror->pg_list);

I don't have the "mirror" variable here. I think the section adding it in
nfs_pageio_add_request_mirror() should actually be adding it in
nfs_pageio_add_request()..

Ben

> + return 0;
> }
>
> /*
> --
> 2.5.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

2016-01-04 16:29:50

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH 04/14] nfs: centralize pgio error cleanup

On Sat, 26 Dec 2015, Trond Myklebust wrote:

> From: Peng Tao <[email protected]>
>
> In case we fail during setting things up for read/write IO, set
> pg_error in IO descriptor and do the cleanup in nfs_pageio_add_request,
> where we clean up all pages that are still hanging around on the IO
> descriptor.
>
> Signed-off-by: Peng Tao <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/pagelist.c | 53 +++++++++++++++++++++++++++++------------------------
> fs/nfs/pnfs.c | 12 ++++--------
> 2 files changed, 33 insertions(+), 32 deletions(-)
>
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index f66021663645..eeddbf0bf4c4 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -664,22 +664,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
> * @desc: IO descriptor
> * @hdr: pageio header
> */
> -static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
> - struct nfs_pgio_header *hdr)
> +static void nfs_pgio_error(struct nfs_pgio_header *hdr)
> {
> - struct nfs_pgio_mirror *mirror;
> - u32 midx;
> -
> set_bit(NFS_IOHDR_REDO, &hdr->flags);
> nfs_pgio_data_destroy(hdr);
> hdr->completion_ops->completion(hdr);
> - /* TODO: Make sure it's right to clean up all mirrors here
> - * and not just hdr->pgio_mirror_idx */
> - for (midx = 0; midx < desc->pg_mirror_count; midx++) {
> - mirror = &desc->pg_mirrors[midx];
> - desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
> - }
> - return -ENOMEM;
> }
>
> /**
> @@ -800,8 +789,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
> unsigned int pagecount, pageused;
>
> pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
> - if (!nfs_pgarray_set(&hdr->page_array, pagecount))
> - return nfs_pgio_error(desc, hdr);
> + if (!nfs_pgarray_set(&hdr->page_array, pagecount)) {
> + nfs_pgio_error(hdr);
> + desc->pg_error = -ENOMEM;
> + return desc->pg_error;
> + }
>
> nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
> pages = hdr->page_array.pagevec;
> @@ -819,8 +811,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
> *pages++ = last_page = req->wb_page;
> }
> }
> - if (WARN_ON_ONCE(pageused != pagecount))
> - return nfs_pgio_error(desc, hdr);
> + if (WARN_ON_ONCE(pageused != pagecount)) {
> + nfs_pgio_error(hdr);
> + desc->pg_error = -EINVAL;
> + return desc->pg_error;
> + }
>
> if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
> (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
> @@ -843,10 +838,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
>
> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
> if (!hdr) {
> - /* TODO: make sure this is right with mirroring - or
> - * should it back out all mirrors? */
> - desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
> - return -ENOMEM;
> + desc->pg_error = -ENOMEM;
> + return desc->pg_error;
> }
> nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
> ret = nfs_generic_pgio(desc, hdr);
> @@ -1120,7 +1113,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
> static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc,
> struct nfs_page *req)
> {
> - struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
> int ret;
>
> do {
> @@ -1187,10 +1179,23 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
> out_failed:
> /*
> * We might have failed before sending any reqs over wire.
> - * clean up rest of the reqs in mirror pg_list
> + * Clean up rest of the reqs in mirror pg_list.
> */
> - if (desc->pg_error)
> - desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
> + if (desc->pg_error) {
> + struct nfs_pgio_mirror *mirror;
> + void (*func)(struct list_head *);
> +
> + /* remember fatal errors */
> + if (nfs_error_is_fatal(desc->pg_error))

nfs_error_is_fatal isn't defined until PATCH 05/14..

Ben

> + mapping_set_error(desc->pg_inode->i_mapping,
> + desc->pg_error);
> +
> + func = desc->pg_completion_ops->error_cleanup;
> + for (midx = 0; midx < desc->pg_mirror_count; midx++) {
> + mirror = &desc->pg_mirrors[midx];
> + func(&mirror->pg_list);
> + }
> + }
> return 0;
> }
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index b1acc4135c3c..0fb3552ccfbe 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -2026,15 +2026,13 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
> int
> pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
> {
> - struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
> -
> struct nfs_pgio_header *hdr;
> int ret;
>
> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
> if (!hdr) {
> - desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
> - return -ENOMEM;
> + desc->pg_error = -ENOMEM;
> + return desc->pg_error;
> }
> nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
>
> @@ -2157,15 +2155,13 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
> int
> pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
> {
> - struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
> -
> struct nfs_pgio_header *hdr;
> int ret;
>
> hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
> if (!hdr) {
> - desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
> - return -ENOMEM;
> + desc->pg_error = -ENOMEM;
> + return desc->pg_error;
> }
> nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
> hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
> --
> 2.5.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

2016-01-04 20:12:33

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH 13/14] pNFS: Add flag to track if we've called nfs4_ff_layout_stat_io_start_read/write

Hi Trond, this won't build for me..

On Sat, 26 Dec 2015, Trond Myklebust wrote:

> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> fs/nfs/flexfilelayout/flexfilelayout.c | 95 +++++++++++++++++++++++++---------
> include/linux/nfs_xdr.h | 1 +
> 2 files changed, 71 insertions(+), 25 deletions(-)
>
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
> index 14109a82ce84..9257679a15ba 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -1279,14 +1279,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx)
> return ff_layout_test_devid_unavailable(node);
> }
>
> -static int ff_layout_read_prepare_common(struct rpc_task *task,
> - struct nfs_pgio_header *hdr)
> +static void ff_layout_read_record_layoutstats_start(struct rpc_task *task,
> + struct nfs_pgio_header *hdr)
> {
> + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
> + return;
> nfs4_ff_layout_stat_io_start_read(hdr->inode,
> FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
> hdr->args.count,
> task->tk_start);
> +}
> +
> +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task,
> + struct nfs_pgio_header *hdr)
> +{
> + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
> + return;
> + nfs4_ff_layout_stat_io_end_read(task,
> + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
> + hdr->args.count,
> + hdr->res.count);
> +}
>
> +static int ff_layout_read_prepare_common(struct rpc_task *task,
> + struct nfs_pgio_header *hdr)
> +{
> if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
> rpc_exit(task, -EIO);
> return -EIO;
> @@ -1302,6 +1319,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,
> }
> hdr->pgio_done_cb = ff_layout_read_done_cb;
>
> + ff_layout_read_record_layoutstats_start(task, hdr);
> return 0;
> }
>
> @@ -1360,10 +1378,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data)
>
> dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
>
> - nfs4_ff_layout_stat_io_end_read(task,
> - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
> - hdr->args.count, hdr->res.count);
> -
> if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
> task->tk_status == 0) {
> nfs4_sequence_done(task, &hdr->res.seq_res);
> @@ -1378,6 +1392,7 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data)
> {
> struct nfs_pgio_header *hdr = data;
>
> + ff_layout_read_record_layoutstats_done(task, hdr);
> rpc_count_iostats_metrics(task,
> &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]);
> }
> @@ -1453,14 +1468,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
> return 0;
> }
>
> -static int ff_layout_write_prepare_common(struct rpc_task *task,
> - struct nfs_pgio_header *hdr)
> +static void ff_layout_write_record_layoutstats_start(struct rpc_task *task,
> + struct nfs_pgio_header *hdr)
> {
> + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags))
> + return;
> nfs4_ff_layout_stat_io_start_write(hdr->inode,
> FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
> hdr->args.count,
> task->tk_start);
> +}
> +
> +static void ff_layout_write_record_layoutstats_done(struct rpc_task *task,
> + struct nfs_pgio_header *hdr)
> +{
> + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags))
> + return;
> + nfs4_ff_layout_stat_io_end_write(task,
> + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
> + hdr->args.count, hdr->res.count,
> + hdr->res.verf->committed);
> +}
>
> +static int ff_layout_write_prepare_common(struct rpc_task *task,
> + struct nfs_pgio_header *hdr)
> +{
> if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
> rpc_exit(task, -EIO);
> return -EIO;
> @@ -1477,6 +1509,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,
> return -EAGAIN;
> }
>
> + ff_layout_write_record_layoutstats_start(task, hdr);
> return 0;
> }
>
> @@ -1526,23 +1559,45 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data)
> {
> struct nfs_pgio_header *hdr = data;
>
> - nfs4_ff_layout_stat_io_end_write(task,
> - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx),
> - hdr->args.count, hdr->res.count,
> - hdr->res.verf->committed);
> -
> + ff_layout_write_record_layoutstats_done(task, hdr);
> rpc_count_iostats_metrics(task,
> &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]);
> }
>
> -static void ff_layout_commit_prepare_common(struct rpc_task *task,
> +static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task,
> struct nfs_commit_data *cdata)
> {
> + if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags))
> + return;

..since nfs_commit_data is missing "flags"..

> nfs4_ff_layout_stat_io_start_write(cdata->inode,
> FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
> 0, task->tk_start);
> }
>
> +static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task,
> + struct nfs_commit_data *cdata)
> +{
> + struct nfs_page *req;
> + __u64 count = 0;
> +
> + if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags))
> + return;

.. same here.

Ben

> +
> + if (task->tk_status == 0) {
> + list_for_each_entry(req, &cdata->pages, wb_list)
> + count += req->wb_bytes;
> + }
> + nfs4_ff_layout_stat_io_end_write(task,
> + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
> + count, count, NFS_FILE_SYNC);
> +}
> +
> +static void ff_layout_commit_prepare_common(struct rpc_task *task,
> + struct nfs_commit_data *cdata)
> +{
> + ff_layout_commit_record_layoutstats_start(task, cdata);
> +}
> +
> static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data)
> {
> ff_layout_commit_prepare_common(task, data);
> @@ -1569,18 +1624,8 @@ static void ff_layout_commit_done(struct rpc_task *task, void *data)
> static void ff_layout_commit_count_stats(struct rpc_task *task, void *data)
> {
> struct nfs_commit_data *cdata = data;
> - struct nfs_page *req;
> - __u64 count = 0;
> -
> - if (task->tk_status == 0) {
> - list_for_each_entry(req, &cdata->pages, wb_list)
> - count += req->wb_bytes;
> - }
> -
> - nfs4_ff_layout_stat_io_end_write(task,
> - FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index),
> - count, count, NFS_FILE_SYNC);
>
> + ff_layout_commit_record_layoutstats_done(task, cdata);
> rpc_count_iostats_metrics(task,
> &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]);
> }
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 29c949bd4462..77b5bc8b5b2b 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -1375,6 +1375,7 @@ enum {
> NFS_IOHDR_ERROR = 0,
> NFS_IOHDR_EOF,
> NFS_IOHDR_REDO,
> + NFS_IOHDR_STAT,
> };
>
> struct nfs_pgio_header {
> --
> 2.5.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>