The flexfiles layout in particular, seems to want to poke around in the
O_DIRECT flags when retransmitting.
This patch sets up an interface to allow it to call back into O_DIRECT
to handle retransmission correctly. It also fixes a potential bug whereby
we could change the behaviour of O_DIRECT if an error is already pending.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/direct.c | 21 +++++++++++++++------
fs/nfs/flexfilelayout/flexfilelayout.c | 13 +------------
fs/nfs/internal.h | 1 -
fs/nfs/write.c | 6 ++++++
include/linux/nfs_xdr.h | 1 +
5 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4b1d08f56aba..e73693f75dee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}
-void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
-{
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-}
-EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);
-
static void
nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
{
@@ -839,10 +833,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head)
}
}
+static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
+{
+ struct nfs_direct_req *dreq = hdr->dreq;
+
+ spin_lock(&dreq->lock);
+ if (dreq->error == 0) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ /* fake unstable write to let common nfs resend pages */
+ hdr->verf.committed = NFS_UNSTABLE;
+ hdr->good_bytes = hdr->args.count;
+ }
+ spin_unlock(&dreq->lock);
+}
+
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
.error_cleanup = nfs_write_sync_pgio_error,
.init_hdr = nfs_direct_pgio_init,
.completion = nfs_direct_write_completion,
+ .reschedule_io = nfs_direct_write_reschedule_io,
};
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 03516c80855a..df475d42df77 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -912,18 +912,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
hdr->args.count,
(unsigned long long)hdr->args.offset);
- if (!hdr->dreq) {
- struct nfs_open_context *ctx;
-
- ctx = nfs_list_entry(hdr->pages.next)->wb_context;
- set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
- hdr->completion_ops->error_cleanup(&hdr->pages);
- } else {
- nfs_direct_set_resched_writes(hdr->dreq);
- /* fake unstable write to let common nfs resend pages */
- hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = hdr->args.count;
- }
+ hdr->completion_ops->reschedule_io(hdr);
return;
}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 313d55402238..99a2919047e9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -519,7 +519,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
inode_dio_wait(inode);
}
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
-extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq);
/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b9316406930..0aa3e6b3db70 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1326,9 +1326,15 @@ static void nfs_async_write_error(struct list_head *head)
}
}
+static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
+{
+ nfs_async_write_error(&hdr->pages);
+}
+
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.error_cleanup = nfs_async_write_error,
.completion = nfs_write_completion,
+ .reschedule_io = nfs_async_write_reschedule_io,
};
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 11bbae44f4cb..e89dbb14138c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1460,6 +1460,7 @@ struct nfs_pgio_completion_ops {
void (*error_cleanup)(struct list_head *head);
void (*init_hdr)(struct nfs_pgio_header *hdr);
void (*completion)(struct nfs_pgio_header *hdr);
+ void (*reschedule_io)(struct nfs_pgio_header *hdr);
};
struct nfs_unlinkdata {
--
2.5.0
If the layout segment is invalid, then we should not be adding more
write requests to the commit list. Instead, those writes should be
replayed after requesting a new layout.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 2 ++
fs/nfs/direct.c | 12 ++++++++++++
fs/nfs/pnfs.c | 3 +++
fs/nfs/pnfs.h | 6 ++++++
fs/nfs/pnfs_nfs.c | 5 +++++
fs/nfs/write.c | 8 ++++++++
include/linux/nfs_xdr.h | 2 ++
7 files changed, 38 insertions(+)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e4dbab5dce6e..2be8b252e3b1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -233,6 +233,8 @@ static u32 initiate_file_draining(struct nfs_client *clp,
unlock:
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
+ /* Free all lsegs that are attached to commit buckets */
+ nfs_commit_inode(ino, 0);
pnfs_put_layout_hdr(lo);
trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
&args->cbl_stateid, -rv);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 14f77df79c25..a9a93927fe3e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -721,8 +721,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
nfs_direct_write_complete(dreq, data->inode);
}
+static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
+{
+ struct nfs_direct_req *dreq = cinfo->dreq;
+
+ spin_lock(&dreq->lock);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ nfs_mark_request_commit(req, NULL, cinfo, 0);
+}
+
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
.completion = nfs_direct_commit_complete,
+ .resched_write = nfs_direct_resched_write,
};
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 360fe95c97b5..6593be7c0129 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -703,6 +703,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
ret = -EAGAIN;
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&lseg_list);
+ /* Free all lsegs that are attached to commit buckets */
+ nfs_commit_inode(inode, 0);
pnfs_put_layout_hdr(lo);
iput(inode);
}
@@ -1811,6 +1813,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
pnfs_mark_matching_lsegs_return(lo, &free_me, &range);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&free_me);
+ nfs_commit_inode(inode, 0);
}
EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d93c2ebc0fd3..4bd7faf9ce50 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -412,6 +412,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
return lseg;
}
+static inline bool
+pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg)
+{
+ return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0;
+}
+
/* Return true if a layout driver is being used for this mountpoint */
static inline int pnfs_enabled_sb(struct nfs_server *nfss)
{
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 3c8e3a44e6ea..81ac6480f9e7 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -868,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
buckets = cinfo->ds->buckets;
list = &buckets[ds_commit_idx].written;
if (list_empty(list)) {
+ if (!pnfs_is_valid_lseg(lseg)) {
+ spin_unlock(cinfo->lock);
+ cinfo->completion_ops->resched_write(cinfo, req);
+ return;
+ }
/* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released
* there. It could also be released if the last req is pulled
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ae29f082c9c2..9c9d3e7a2452 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1676,6 +1676,13 @@ void nfs_retry_commit(struct list_head *page_list,
}
EXPORT_SYMBOL_GPL(nfs_retry_commit);
+static void
+nfs_commit_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
+{
+ __set_page_dirty_nobuffers(req->wb_page);
+}
+
/*
* Commit dirty pages
*/
@@ -1777,6 +1784,7 @@ static const struct rpc_call_ops nfs_commit_ops = {
static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
.completion = nfs_commit_release_pages,
+ .resched_write = nfs_commit_resched_write,
};
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a8905b7d4d7f..bee3e60a7006 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1420,10 +1420,12 @@ struct nfs_mds_commit_info {
struct list_head list;
};
+struct nfs_commit_info;
struct nfs_commit_data;
struct nfs_inode;
struct nfs_commit_completion_ops {
void (*completion) (struct nfs_commit_data *data);
+ void (*resched_write) (struct nfs_commit_info *, struct nfs_page *);
};
struct nfs_commit_info {
--
2.5.0
If two processes share the same credentials and NFSv4 open stateid, then
allow them both to dirty the same page, even if their nfs_open_context
differs.
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 6 ++++++
fs/nfs/pagelist.c | 6 ------
fs/nfs/write.c | 3 ++-
3 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 99a2919047e9..870e2ba7ba49 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -264,6 +264,12 @@ static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
return desc->pg_mirror_count > 1;
}
+static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
+ const struct nfs_open_context *ctx2)
+{
+ return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
+}
+
/* nfs2xdr.c */
extern struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 452a011ba0d8..c3a78450a239 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -903,12 +903,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
pgio->pg_mirrors_dynamic = NULL;
}
-static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
- const struct nfs_open_context *ctx2)
-{
- return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
-}
-
static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
const struct nfs_lock_context *l2)
{
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9c9d3e7a2452..83348db2fac7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1130,7 +1130,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
- do_flush = req->wb_page != page || req->wb_context != ctx;
+ do_flush = req->wb_page != page ||
+ !nfs_match_open_context(req->wb_context, ctx);
/* for now, flush if more than 1 request in page_group */
do_flush |= req->wb_this_page != req;
if (l_ctx && flctx &&
--
2.5.0
Allow synchronous RPC calls to wait for pending RPC calls to finish,
but also allow asynchronous ones to just fire off another commit.
With this patch, the xfstests generic/074 test completes in 226s
instead of 242s
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/direct.c | 6 -----
fs/nfs/file.c | 2 +-
fs/nfs/nfstrace.h | 1 -
fs/nfs/pnfs_nfs.c | 5 +---
fs/nfs/write.c | 70 +++++++++++++++++++++++--------------------------
include/linux/nfs_fs.h | 1 -
include/linux/nfs_xdr.h | 1 -
7 files changed, 35 insertions(+), 51 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e73693f75dee..14f77df79c25 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -721,14 +721,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
nfs_direct_write_complete(dreq, data->inode);
}
-static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
-{
- /* There is no lock to clear */
-}
-
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
.completion = nfs_direct_commit_complete,
- .error_cleanup = nfs_direct_error_cleanup,
};
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 93e236429c5d..e6ef80ec699c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page,
* so it will not block due to pages that will shortly be freeable.
*/
nfsi = NFS_I(mapping->host);
- if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
+ if (atomic_read(&nfsi->commit_info.rpcs_out)) {
*writeback = true;
return;
}
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 59f838cdc009..9f80a086b612 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -39,7 +39,6 @@
{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
- { 1 << NFS_INO_COMMIT, "COMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 24655b807d44..3c8e3a44e6ea 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
} else {
nfs_retry_commit(mds_pages, NULL, cinfo, 0);
pnfs_generic_retry_commit(cinfo, 0);
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}
}
nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
- if (nreq == 0) {
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
+ if (nreq == 0)
goto out;
- }
atomic_add(nreq, &cinfo->mds->rpcs_out);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0aa3e6b3db70..ae29f082c9c2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -21,6 +21,8 @@
#include <linux/nfs_page.h>
#include <linux/backing-dev.h>
#include <linux/export.h>
+#include <linux/freezer.h>
+#include <linux/wait.h>
#include <asm/uaccess.h>
@@ -1535,27 +1537,29 @@ static void nfs_writeback_result(struct rpc_task *task,
}
}
+static int nfs_wait_atomic_killable(atomic_t *key)
+{
+ if (fatal_signal_pending(current))
+ return -ERESTARTSYS;
+ freezable_schedule_unsafe();
+ return 0;
+}
-static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
{
- int ret;
+ return wait_on_atomic_t(&cinfo->rpcs_out,
+ nfs_wait_atomic_killable, TASK_KILLABLE);
+}
- if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
- return 1;
- if (!may_wait)
- return 0;
- ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
- NFS_INO_COMMIT,
- nfs_wait_bit_killable,
- TASK_KILLABLE);
- return (ret < 0) ? ret : 1;
+static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+{
+ atomic_inc(&cinfo->rpcs_out);
}
-static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
{
- clear_bit(NFS_INO_COMMIT, &nfsi->flags);
- smp_mb__after_atomic();
- wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+ if (atomic_dec_and_test(&cinfo->rpcs_out))
+ wake_up_atomic_t(&cinfo->rpcs_out);
}
void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1693,7 +1697,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
data->mds_ops, how, 0);
out_bad:
nfs_retry_commit(head, NULL, cinfo, 0);
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}
@@ -1755,8 +1758,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
nfs_init_cinfo(&cinfo, data->inode, data->dreq);
- if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
- nfs_commit_clear_lock(NFS_I(data->inode));
+ nfs_commit_end(cinfo.mds);
}
static void nfs_commit_release(void *calldata)
@@ -1775,7 +1777,6 @@ static const struct rpc_call_ops nfs_commit_ops = {
static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
.completion = nfs_commit_release_pages,
- .error_cleanup = nfs_commit_clear_lock,
};
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
@@ -1794,30 +1795,25 @@ int nfs_commit_inode(struct inode *inode, int how)
LIST_HEAD(head);
struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
+ int error = 0;
int res;
- res = nfs_commit_set_lock(NFS_I(inode), may_wait);
- if (res <= 0)
- goto out_mark_dirty;
nfs_init_cinfo_from_inode(&cinfo, inode);
+ nfs_commit_begin(cinfo.mds);
res = nfs_scan_commit(inode, &head, &cinfo);
- if (res) {
- int error;
-
+ if (res)
error = nfs_generic_commit_list(inode, &head, how, &cinfo);
- if (error < 0)
- return error;
- if (!may_wait)
- goto out_mark_dirty;
- error = wait_on_bit_action(&NFS_I(inode)->flags,
- NFS_INO_COMMIT,
- nfs_wait_bit_killable,
- TASK_KILLABLE);
- if (error < 0)
- return error;
- } else
- nfs_commit_clear_lock(NFS_I(inode));
+ nfs_commit_end(cinfo.mds);
+ if (error < 0)
+ goto out_error;
+ if (!may_wait)
+ goto out_mark_dirty;
+ error = wait_on_commit(cinfo.mds);
+ if (error < 0)
+ return error;
return res;
+out_error:
+ res = error;
/* Note: If we exit without ensuring that the commit is complete,
* we must mark the inode as dirty. Otherwise, future calls to
* sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c0e961474a52..ebf0bd72a42b 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -216,7 +216,6 @@ struct nfs_inode {
#define NFS_INO_FLUSHING (4) /* inode is flushing out data */
#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
-#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e89dbb14138c..a8905b7d4d7f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1423,7 +1423,6 @@ struct nfs_mds_commit_info {
struct nfs_commit_data;
struct nfs_inode;
struct nfs_commit_completion_ops {
- void (*error_cleanup) (struct nfs_inode *nfsi);
void (*completion) (struct nfs_commit_data *data);
};
--
2.5.0