2016-01-04 18:21:45

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 01/18] pNFS: Ensure nfs4_layoutget_prepare returns the correct error

If we're unable to perform the layoutget due to an invalid open stateid
or a bulk recall, ensure that we return the error so that the caller
can decide on an appropriate action.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/nfs4proc.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 18d862db15b6..fcd7a9039020 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7760,6 +7760,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
struct nfs4_layoutget *lgp = calldata;
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
struct nfs4_session *session = nfs4_get_session(server);
+ int ret;

dprintk("--> %s\n", __func__);
/* Note the is a race here, where a CB_LAYOUTRECALL can come in
@@ -7770,12 +7771,12 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
if (nfs41_setup_sequence(session, &lgp->args.seq_args,
&lgp->res.seq_res, task))
return;
- if (pnfs_choose_layoutget_stateid(&lgp->args.stateid,
+ ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid,
NFS_I(lgp->args.inode)->layout,
&lgp->args.range,
- lgp->args.ctx->state)) {
- rpc_exit(task, NFS4_OK);
- }
+ lgp->args.ctx->state);
+ if (ret < 0)
+ rpc_exit(task, ret);
}

static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
--
2.5.0



2016-01-04 18:21:46

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 02/18] NFSv4.1/pNFS: Add a helper to mark the layout as returned

This ensures that we don't reuse the stateid if a layout return or
implied layout return means that we've returned all layout segments

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 1 +
fs/nfs/nfs4proc.c | 3 ++-
fs/nfs/pnfs.c | 1 +
fs/nfs/pnfs.h | 13 +++++++++++++
4 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 807eb6ef4f91..716cbff24450 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -192,6 +192,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo,
&args->cbl_range);
}
+ pnfs_mark_layout_returned_if_empty(lo);
unlock:
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index fcd7a9039020..883da29b9ace 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8049,9 +8049,10 @@ static void nfs4_layoutreturn_release(void *calldata)

dprintk("--> %s\n", __func__);
spin_lock(&lo->plh_inode->i_lock);
+ pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
+ pnfs_mark_layout_returned_if_empty(lo);
if (lrp->res.lrs_present)
pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range);
pnfs_clear_layoutreturn_waitbit(lo);
lo->plh_block_lgets--;
spin_unlock(&lo->plh_inode->i_lock);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6095a8d42766..b3fb6bb02275 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1149,6 +1149,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)

spin_lock(&ino->i_lock);
lo = NFS_I(ino)->layout;
+ pnfs_mark_layout_returned_if_empty(lo);
if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
lo->plh_barrier = barrier;
spin_unlock(&ino->i_lock);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d1990e90e7a0..be24a759b655 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -556,6 +556,19 @@ pnfs_calc_offset_length(u64 offset, u64 end)
return 1 + end - offset;
}

+/**
+ * pnfs_mark_layout_returned_if_empty - marks the layout as returned
+ * @lo: layout header
+ *
+ * Note: Caller must hold inode->i_lock
+ */
+static inline void
+pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
+{
+ if (list_empty(&lo->plh_segs))
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+}
+
extern unsigned int layoutstats_timer;

#ifdef NFS_DEBUG
--
2.5.0


2016-01-04 18:21:47

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 03/18] pNFS: If we have to delay the layout callback, mark the layout for return

If the client needs to delay the layout callback, then speed up the recall
process by marking the remaining layout segments to be actively returned
by the client.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 14 ++++++++++++--
fs/nfs/pnfs.c | 4 +++-
fs/nfs/pnfs.h | 3 +++
3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 716cbff24450..34852ece4057 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -181,9 +181,19 @@ static u32 initiate_file_draining(struct nfs_client *clp,
pnfs_layoutcommit_inode(ino, false);

spin_lock(&ino->i_lock);
- if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
- pnfs_mark_matching_lsegs_invalid(lo, &free_me_list,
+ /*
+ * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return)
+ */
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ rv = NFS4ERR_DELAY;
+ goto unlock;
+ }
+
+ if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list,
&args->cbl_range)) {
+ pnfs_mark_matching_lsegs_return(lo,
+ &free_me_list,
+ &args->cbl_range);
rv = NFS4ERR_DELAY;
goto unlock;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b3fb6bb02275..360fe95c97b5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1756,7 +1756,7 @@ out_forget_reply:
goto out;
}

-static void
+void
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *return_range)
@@ -1768,6 +1768,8 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
if (list_empty(&lo->plh_segs))
return;

+ assert_spin_locked(&lo->plh_inode->i_lock);
+
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (should_free_lseg(&lseg->pls_range, return_range)) {
dprintk("%s: marking lseg %p iomode %d "
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index be24a759b655..d93c2ebc0fd3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -266,6 +266,9 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *recall_range);
+void pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
+ struct list_head *tmp_list,
+ struct pnfs_layout_range *recall_range);
bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
--
2.5.0


2016-01-04 18:21:48

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 04/18] NFSv4.1/pNFS: Ensure we enforce RFC5661 Section 12.5.5.2.1

The RFC requires us to check if the server is recalling a stateid that we
haven't yet received. If so, tell it to wait.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 34852ece4057..1b24ad07d4f5 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -160,6 +160,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp,
return lo;
}

+/*
+ * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
+ */
+static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo,
+ const nfs4_stateid *new)
+{
+ u32 oldseq, newseq;
+
+ oldseq = be32_to_cpu(lo->plh_stateid.seqid);
+ newseq = be32_to_cpu(new->seqid);
+
+ if (newseq > oldseq + 1)
+ return false;
+ return true;
+}
+
static u32 initiate_file_draining(struct nfs_client *clp,
struct cb_layoutrecallargs *args)
{
@@ -175,6 +191,10 @@ static u32 initiate_file_draining(struct nfs_client *clp,
ino = lo->plh_inode;

spin_lock(&ino->i_lock);
+ if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) {
+ rv = NFS4ERR_DELAY;
+ goto unlock;
+ }
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&ino->i_lock);

--
2.5.0


2016-01-04 18:21:48

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 05/18] NFSv4.1/pNFS: Don't return NFS4ERR_DELAY unnecessarily in CB_LAYOUTRECALL

If the client is promising to return the layout ASAP, then there is no
need to return DELAY and have the server retry. Instead default to the
normal procedure described in RFC5661.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 1b24ad07d4f5..724a9b756ab0 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -214,7 +214,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
pnfs_mark_matching_lsegs_return(lo,
&free_me_list,
&args->cbl_range);
- rv = NFS4ERR_DELAY;
+ rv = NFS4_OK;
goto unlock;
}

--
2.5.0


2016-01-04 18:21:49

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 06/18] NFSv4: List stateid information in the callback tracepoints

The stateid is extremely valuable when debugging.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 16 +++++++++---
fs/nfs/nfs4trace.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 79 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 724a9b756ab0..e4dbab5dce6e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,

res = htonl(NFS4ERR_BADHANDLE);
inode = nfs_delegation_find_inode(cps->clp, &args->fh);
- if (inode == NULL)
+ if (inode == NULL) {
+ trace_nfs4_cb_recall(cps->clp, &args->fh, NULL,
+ &args->stateid, -ntohl(res));
goto out;
+ }
/* Set up a helper thread to actually return the delegation */
switch (nfs_async_inode_return_delegation(inode, &args->stateid)) {
case 0:
@@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
default:
res = htonl(NFS4ERR_RESOURCE);
}
- trace_nfs4_recall_delegation(inode, -ntohl(res));
+ trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
+ &args->stateid, -ntohl(res));
iput(inode);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
@@ -185,8 +189,11 @@ static u32 initiate_file_draining(struct nfs_client *clp,
LIST_HEAD(free_me_list);

lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid);
- if (!lo)
+ if (!lo) {
+ trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL,
+ &args->cbl_stateid, -rv);
goto out;
+ }

ino = lo->plh_inode;

@@ -227,7 +234,8 @@ unlock:
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
pnfs_put_layout_hdr(lo);
- trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv);
+ trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
+ &args->cbl_stateid, -rv);
iput(ino);
out:
return rv;
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index d08d0c84b778..88b6b14ce71b 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -982,7 +982,6 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl);
DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label);
DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label);
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
-DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation);

DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
TP_PROTO(
@@ -1145,8 +1144,74 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
), \
TP_ARGS(clp, fhandle, inode, error))
DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr);
-DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode);

+DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
+ TP_PROTO(
+ const struct nfs_client *clp,
+ const struct nfs_fh *fhandle,
+ const struct inode *inode,
+ const nfs4_stateid *stateid,
+ int error
+ ),
+
+ TP_ARGS(clp, fhandle, inode, stateid, error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ __field(dev_t, dev)
+ __field(u32, fhandle)
+ __field(u64, fileid)
+ __string(dstaddr, clp ?
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ __field(int, stateid_seq)
+ __field(u32, stateid_hash)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->fhandle = nfs_fhandle_hash(fhandle);
+ if (inode != NULL) {
+ __entry->fileid = NFS_FILEID(inode);
+ __entry->dev = inode->i_sb->s_dev;
+ } else {
+ __entry->fileid = 0;
+ __entry->dev = 0;
+ }
+ __assign_str(dstaddr, clp ?
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR) : "unknown")
+ __entry->stateid_seq =
+ be32_to_cpu(stateid->seqid);
+ __entry->stateid_hash =
+ nfs_stateid_hash(stateid);
+ ),
+
+ TP_printk(
+ "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
+ "stateid=%d:0x%08x dstaddr=%s",
+ __entry->error,
+ show_nfsv4_errors(__entry->error),
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->fileid,
+ __entry->fhandle,
+ __entry->stateid_seq, __entry->stateid_hash,
+ __get_str(dstaddr)
+ )
+);
+
+#define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \
+ DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \
+ TP_PROTO( \
+ const struct nfs_client *clp, \
+ const struct nfs_fh *fhandle, \
+ const struct inode *inode, \
+ const nfs4_stateid *stateid, \
+ int error \
+ ), \
+ TP_ARGS(clp, fhandle, inode, stateid, error))
+DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall);
+DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file);

DECLARE_EVENT_CLASS(nfs4_idmap_event,
TP_PROTO(
--
2.5.0


2016-01-04 18:21:50

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 07/18] NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs

The flexfiles layout in particular, seems to want to poke around in the
O_DIRECT flags when retransmitting.
This patch sets up an interface to allow it to call back into O_DIRECT
to handle retransmission correctly. It also fixes a potential bug whereby
we could change the behaviour of O_DIRECT if an error is already pending.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/direct.c | 21 +++++++++++++++------
fs/nfs/flexfilelayout/flexfilelayout.c | 13 +------------
fs/nfs/internal.h | 1 -
fs/nfs/write.c | 6 ++++++
include/linux/nfs_xdr.h | 1 +
5 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4b1d08f56aba..e73693f75dee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}

-void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq)
-{
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-}
-EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes);
-
static void
nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
{
@@ -839,10 +833,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head)
}
}

+static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
+{
+ struct nfs_direct_req *dreq = hdr->dreq;
+
+ spin_lock(&dreq->lock);
+ if (dreq->error == 0) {
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ /* fake unstable write to let common nfs resend pages */
+ hdr->verf.committed = NFS_UNSTABLE;
+ hdr->good_bytes = hdr->args.count;
+ }
+ spin_unlock(&dreq->lock);
+}
+
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
.error_cleanup = nfs_write_sync_pgio_error,
.init_hdr = nfs_direct_pgio_init,
.completion = nfs_direct_write_completion,
+ .reschedule_io = nfs_direct_write_reschedule_io,
};


diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 03516c80855a..df475d42df77 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -912,18 +912,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
hdr->args.count,
(unsigned long long)hdr->args.offset);

- if (!hdr->dreq) {
- struct nfs_open_context *ctx;
-
- ctx = nfs_list_entry(hdr->pages.next)->wb_context;
- set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
- hdr->completion_ops->error_cleanup(&hdr->pages);
- } else {
- nfs_direct_set_resched_writes(hdr->dreq);
- /* fake unstable write to let common nfs resend pages */
- hdr->verf.committed = NFS_UNSTABLE;
- hdr->good_bytes = hdr->args.count;
- }
+ hdr->completion_ops->reschedule_io(hdr);
return;
}

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 313d55402238..99a2919047e9 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -519,7 +519,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
inode_dio_wait(inode);
}
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
-extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq);

/* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b9316406930..0aa3e6b3db70 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1326,9 +1326,15 @@ static void nfs_async_write_error(struct list_head *head)
}
}

+static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
+{
+ nfs_async_write_error(&hdr->pages);
+}
+
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.error_cleanup = nfs_async_write_error,
.completion = nfs_write_completion,
+ .reschedule_io = nfs_async_write_reschedule_io,
};

void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 11bbae44f4cb..e89dbb14138c 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1460,6 +1460,7 @@ struct nfs_pgio_completion_ops {
void (*error_cleanup)(struct list_head *head);
void (*init_hdr)(struct nfs_pgio_header *hdr);
void (*completion)(struct nfs_pgio_header *hdr);
+ void (*reschedule_io)(struct nfs_pgio_header *hdr);
};

struct nfs_unlinkdata {
--
2.5.0


2016-01-04 18:21:52

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 09/18] NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid

If the layout segment is invalid, then we should not be adding more
write requests to the commit list. Instead, those writes should be
replayed after requesting a new layout.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 2 ++
fs/nfs/direct.c | 12 ++++++++++++
fs/nfs/pnfs.c | 3 +++
fs/nfs/pnfs.h | 6 ++++++
fs/nfs/pnfs_nfs.c | 5 +++++
fs/nfs/write.c | 9 +++++++++
include/linux/nfs_xdr.h | 2 ++
7 files changed, 39 insertions(+)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e4dbab5dce6e..2be8b252e3b1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -233,6 +233,8 @@ static u32 initiate_file_draining(struct nfs_client *clp,
unlock:
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
+ /* Free all lsegs that are attached to commit buckets */
+ nfs_commit_inode(ino, 0);
pnfs_put_layout_hdr(lo);
trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino,
&args->cbl_stateid, -rv);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 14f77df79c25..a9a93927fe3e 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -721,8 +721,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
nfs_direct_write_complete(dreq, data->inode);
}

+static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
+{
+ struct nfs_direct_req *dreq = cinfo->dreq;
+
+ spin_lock(&dreq->lock);
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ nfs_mark_request_commit(req, NULL, cinfo, 0);
+}
+
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
.completion = nfs_direct_commit_complete,
+ .resched_write = nfs_direct_resched_write,
};

static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 360fe95c97b5..6593be7c0129 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -703,6 +703,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
ret = -EAGAIN;
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&lseg_list);
+ /* Free all lsegs that are attached to commit buckets */
+ nfs_commit_inode(inode, 0);
pnfs_put_layout_hdr(lo);
iput(inode);
}
@@ -1811,6 +1813,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
pnfs_mark_matching_lsegs_return(lo, &free_me, &range);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&free_me);
+ nfs_commit_inode(inode, 0);
}
EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d93c2ebc0fd3..4bd7faf9ce50 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -412,6 +412,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
return lseg;
}

+static inline bool
+pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg)
+{
+ return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0;
+}
+
/* Return true if a layout driver is being used for this mountpoint */
static inline int pnfs_enabled_sb(struct nfs_server *nfss)
{
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 3c8e3a44e6ea..81ac6480f9e7 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -868,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
buckets = cinfo->ds->buckets;
list = &buckets[ds_commit_idx].written;
if (list_empty(list)) {
+ if (!pnfs_is_valid_lseg(lseg)) {
+ spin_unlock(cinfo->lock);
+ cinfo->completion_ops->resched_write(cinfo, req);
+ return;
+ }
/* Non-empty buckets hold a reference on the lseg. That ref
* is normally transferred to the COMMIT call and released
* there. It could also be released if the last req is pulled
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ae29f082c9c2..0aa8d6f23b4c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1676,6 +1676,13 @@ void nfs_retry_commit(struct list_head *page_list,
}
EXPORT_SYMBOL_GPL(nfs_retry_commit);

+static void
+nfs_commit_resched_write(struct nfs_commit_info *cinfo,
+ struct nfs_page *req)
+{
+ __set_page_dirty_nobuffers(req->wb_page);
+}
+
/*
* Commit dirty pages
*/
@@ -1777,6 +1784,7 @@ static const struct rpc_call_ops nfs_commit_ops = {

static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
.completion = nfs_commit_release_pages,
+ .resched_write = nfs_commit_resched_write,
};

int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
@@ -1823,6 +1831,7 @@ out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return res;
}
+EXPORT_SYMBOL_GPL(nfs_commit_inode);

int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a8905b7d4d7f..bee3e60a7006 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1420,10 +1420,12 @@ struct nfs_mds_commit_info {
struct list_head list;
};

+struct nfs_commit_info;
struct nfs_commit_data;
struct nfs_inode;
struct nfs_commit_completion_ops {
void (*completion) (struct nfs_commit_data *data);
+ void (*resched_write) (struct nfs_commit_info *, struct nfs_page *);
};

struct nfs_commit_info {
--
2.5.0


2016-01-04 18:21:53

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 10/18] NFS: Relax requirements in nfs_flush_incompatible

If two processes share the same credentials and NFSv4 open stateid, then
allow them both to dirty the same page, even if their nfs_open_context
differs.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/internal.h | 6 ++++++
fs/nfs/pagelist.c | 6 ------
fs/nfs/write.c | 3 ++-
3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 99a2919047e9..870e2ba7ba49 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -264,6 +264,12 @@ static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc)
return desc->pg_mirror_count > 1;
}

+static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1,
+ const struct nfs_open_context *ctx2)
+{
+ return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
+}
+
/* nfs2xdr.c */
extern struct rpc_procinfo nfs_procedures[];
extern int nfs2_decode_dirent(struct xdr_stream *,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 452a011ba0d8..c3a78450a239 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -903,12 +903,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio)
pgio->pg_mirrors_dynamic = NULL;
}

-static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
- const struct nfs_open_context *ctx2)
-{
- return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state;
-}
-
static bool nfs_match_lock_context(const struct nfs_lock_context *l1,
const struct nfs_lock_context *l2)
{
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0aa8d6f23b4c..2c26e04d9396 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1130,7 +1130,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
if (req == NULL)
return 0;
l_ctx = req->wb_lock_context;
- do_flush = req->wb_page != page || req->wb_context != ctx;
+ do_flush = req->wb_page != page ||
+ !nfs_match_open_context(req->wb_context, ctx);
/* for now, flush if more than 1 request in page_group */
do_flush |= req->wb_this_page != req;
if (l_ctx && flctx &&
--
2.5.0


2016-01-04 18:21:51

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 08/18] NFS: Allow multiple commit requests in flight per file

Allow synchronous RPC calls to wait for pending RPC calls to finish,
but also allow asynchronous ones to just fire off another commit.

With this patch, the xfstests generic/074 test completes in 226s
instead of 242s

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/direct.c | 6 -----
fs/nfs/file.c | 2 +-
fs/nfs/nfstrace.h | 1 -
fs/nfs/pnfs_nfs.c | 5 +---
fs/nfs/write.c | 70 +++++++++++++++++++++++--------------------------
include/linux/nfs_fs.h | 1 -
include/linux/nfs_xdr.h | 1 -
7 files changed, 35 insertions(+), 51 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e73693f75dee..14f77df79c25 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -721,14 +721,8 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
nfs_direct_write_complete(dreq, data->inode);
}

-static void nfs_direct_error_cleanup(struct nfs_inode *nfsi)
-{
- /* There is no lock to clear */
-}
-
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
.completion = nfs_direct_commit_complete,
- .error_cleanup = nfs_direct_error_cleanup,
};

static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 93e236429c5d..e6ef80ec699c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page,
* so it will not block due to pages that will shortly be freeable.
*/
nfsi = NFS_I(mapping->host);
- if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) {
+ if (atomic_read(&nfsi->commit_info.rpcs_out)) {
*writeback = true;
return;
}
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 59f838cdc009..9f80a086b612 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -39,7 +39,6 @@
{ 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \
{ 1 << NFS_INO_FLUSHING, "FLUSHING" }, \
{ 1 << NFS_INO_FSCACHE, "FSCACHE" }, \
- { 1 << NFS_INO_COMMIT, "COMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \
{ 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" })

diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 24655b807d44..3c8e3a44e6ea 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
} else {
nfs_retry_commit(mds_pages, NULL, cinfo, 0);
pnfs_generic_retry_commit(cinfo, 0);
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}
}

nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);

- if (nreq == 0) {
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
+ if (nreq == 0)
goto out;
- }

atomic_add(nreq, &cinfo->mds->rpcs_out);

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0aa3e6b3db70..ae29f082c9c2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -21,6 +21,8 @@
#include <linux/nfs_page.h>
#include <linux/backing-dev.h>
#include <linux/export.h>
+#include <linux/freezer.h>
+#include <linux/wait.h>

#include <asm/uaccess.h>

@@ -1535,27 +1537,29 @@ static void nfs_writeback_result(struct rpc_task *task,
}
}

+static int nfs_wait_atomic_killable(atomic_t *key)
+{
+ if (fatal_signal_pending(current))
+ return -ERESTARTSYS;
+ freezable_schedule_unsafe();
+ return 0;
+}

-static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
{
- int ret;
+ return wait_on_atomic_t(&cinfo->rpcs_out,
+ nfs_wait_atomic_killable, TASK_KILLABLE);
+}

- if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
- return 1;
- if (!may_wait)
- return 0;
- ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
- NFS_INO_COMMIT,
- nfs_wait_bit_killable,
- TASK_KILLABLE);
- return (ret < 0) ? ret : 1;
+static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+{
+ atomic_inc(&cinfo->rpcs_out);
}

-static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
{
- clear_bit(NFS_INO_COMMIT, &nfsi->flags);
- smp_mb__after_atomic();
- wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+ if (atomic_dec_and_test(&cinfo->rpcs_out))
+ wake_up_atomic_t(&cinfo->rpcs_out);
}

void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1693,7 +1697,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
data->mds_ops, how, 0);
out_bad:
nfs_retry_commit(head, NULL, cinfo, 0);
- cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}

@@ -1755,8 +1758,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);

nfs_init_cinfo(&cinfo, data->inode, data->dreq);
- if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
- nfs_commit_clear_lock(NFS_I(data->inode));
+ nfs_commit_end(cinfo.mds);
}

static void nfs_commit_release(void *calldata)
@@ -1775,7 +1777,6 @@ static const struct rpc_call_ops nfs_commit_ops = {

static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
.completion = nfs_commit_release_pages,
- .error_cleanup = nfs_commit_clear_lock,
};

int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
@@ -1794,30 +1795,25 @@ int nfs_commit_inode(struct inode *inode, int how)
LIST_HEAD(head);
struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
+ int error = 0;
int res;

- res = nfs_commit_set_lock(NFS_I(inode), may_wait);
- if (res <= 0)
- goto out_mark_dirty;
nfs_init_cinfo_from_inode(&cinfo, inode);
+ nfs_commit_begin(cinfo.mds);
res = nfs_scan_commit(inode, &head, &cinfo);
- if (res) {
- int error;
-
+ if (res)
error = nfs_generic_commit_list(inode, &head, how, &cinfo);
- if (error < 0)
- return error;
- if (!may_wait)
- goto out_mark_dirty;
- error = wait_on_bit_action(&NFS_I(inode)->flags,
- NFS_INO_COMMIT,
- nfs_wait_bit_killable,
- TASK_KILLABLE);
- if (error < 0)
- return error;
- } else
- nfs_commit_clear_lock(NFS_I(inode));
+ nfs_commit_end(cinfo.mds);
+ if (error < 0)
+ goto out_error;
+ if (!may_wait)
+ goto out_mark_dirty;
+ error = wait_on_commit(cinfo.mds);
+ if (error < 0)
+ return error;
return res;
+out_error:
+ res = error;
/* Note: If we exit without ensuring that the commit is complete,
* we must mark the inode as dirty. Otherwise, future calls to
* sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c0e961474a52..ebf0bd72a42b 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -216,7 +216,6 @@ struct nfs_inode {
#define NFS_INO_FLUSHING (4) /* inode is flushing out data */
#define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */
#define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */
-#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */
#define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */
#define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */
#define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e89dbb14138c..a8905b7d4d7f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1423,7 +1423,6 @@ struct nfs_mds_commit_info {
struct nfs_commit_data;
struct nfs_inode;
struct nfs_commit_completion_ops {
- void (*error_cleanup) (struct nfs_inode *nfsi);
void (*completion) (struct nfs_commit_data *data);
};

--
2.5.0


2016-01-04 18:21:54

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 12/18] NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 7a452895169f..7e0f2b9a9b10 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -385,7 +385,7 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
enum pnfs_iomode iomode;
bool send;

- stateid = lo->plh_stateid;
+ nfs4_stateid_copy(&stateid, &lo->plh_stateid);
iomode = lo->plh_return_iomode;
send = pnfs_prepare_layoutreturn(lo);
spin_unlock(&inode->i_lock);
@@ -1007,7 +1007,7 @@ _pnfs_return_layout(struct inode *ino)
dprintk("NFS: %s no layout to return\n", __func__);
goto out;
}
- stateid = nfsi->layout->plh_stateid;
+ nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid);
/* Reference matched in nfs4_layoutreturn_release */
pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
@@ -1098,7 +1098,7 @@ bool pnfs_roc(struct inode *ino)
goto out_noroc;
}

- stateid = lo->plh_stateid;
+ nfs4_stateid_copy(&stateid, &lo->plh_stateid);
/* always send layoutreturn if being marked so */
if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
&lo->plh_flags))
--
2.5.0


2016-01-04 18:21:54

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 11/18] NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn()

A stateid is a structure, pass it as a pointer.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6593be7c0129..7a452895169f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
static LIST_HEAD(pnfs_modules_tbl);

static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
+pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
enum pnfs_iomode iomode, bool sync);

/* Return the registered pnfs layout driver module matching given id */
@@ -391,7 +391,7 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
spin_unlock(&inode->i_lock);
if (send) {
/* Send an async layoutreturn so we dont deadlock */
- pnfs_send_layoutreturn(lo, stateid, iomode, false);
+ pnfs_send_layoutreturn(lo, &stateid, iomode, false);
}
} else
spin_unlock(&inode->i_lock);
@@ -947,7 +947,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
}

static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
+pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
enum pnfs_iomode iomode, bool sync)
{
struct inode *ino = lo->plh_inode;
@@ -964,7 +964,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid,
goto out;
}

- lrp->args.stateid = stateid;
+ nfs4_stateid_copy(&lrp->args.stateid, stateid);
lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
lrp->args.inode = ino;
lrp->args.range.iomode = iomode;
@@ -1035,7 +1035,7 @@ _pnfs_return_layout(struct inode *ino)
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
if (send)
- status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+ status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
out_put_layout_hdr:
pnfs_put_layout_hdr(lo);
out:
@@ -1126,7 +1126,7 @@ out_noroc:
pnfs_free_lseg_list(&tmp_list);
pnfs_layoutcommit_inode(ino, true);
if (layoutreturn)
- pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+ pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true);
return roc;
}

--
2.5.0


2016-01-04 18:21:55

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 13/18] NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode

If pnfs_mark_matching_lsegs_return() needs to mark a layout segment for
return, then it must also set the return iomode.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 7e0f2b9a9b10..449c4782cab3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1758,6 +1758,16 @@ out_forget_reply:
goto out;
}

+static void
+pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
+{
+ if (lo->plh_return_iomode == iomode)
+ return;
+ if (lo->plh_return_iomode != 0)
+ iomode = IOMODE_ANY;
+ lo->plh_return_iomode = iomode;
+}
+
void
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
@@ -1780,6 +1790,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
lseg->pls_range.offset,
lseg->pls_range.length);
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
+ pnfs_set_plh_return_iomode(lo, return_range->iomode);
mark_lseg_invalid(lseg, tmp_list);
set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
&lo->plh_flags);
@@ -1801,10 +1812,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
spin_lock(&inode->i_lock);
/* set failure bit so that pnfs path will be retried later */
pnfs_layout_set_fail_bit(lo, iomode);
- if (lo->plh_return_iomode == 0)
- lo->plh_return_iomode = range.iomode;
- else if (lo->plh_return_iomode != range.iomode)
- lo->plh_return_iomode = IOMODE_ANY;
+ pnfs_set_plh_return_iomode(lo, range.iomode);
/*
* mark all matching lsegs so that we are sure to have no live
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
--
2.5.0


2016-01-04 18:21:56

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 14/18] NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout

Fix a bug whereby if all the layout segments could be immediately freed,
the call to pnfs_error_mark_layout_for_return() would never result in
a layoutreturn.

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 26 ++++++++++++++++++++------
fs/nfs/pnfs.h | 2 +-
2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 449c4782cab3..8e1d4229bf2d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1768,17 +1768,18 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
lo->plh_return_iomode = iomode;
}

-void
+int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *return_range)
{
struct pnfs_layout_segment *lseg, *next;
+ int remaining = 0;

dprintk("%s:Begin lo %p\n", __func__, lo);

if (list_empty(&lo->plh_segs))
- return;
+ return 0;

assert_spin_locked(&lo->plh_inode->i_lock);

@@ -1791,10 +1792,12 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
lseg->pls_range.length);
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
pnfs_set_plh_return_iomode(lo, return_range->iomode);
- mark_lseg_invalid(lseg, tmp_list);
+ if (!mark_lseg_invalid(lseg, tmp_list))
+ remaining++;
set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
&lo->plh_flags);
}
+ return remaining;
}

void pnfs_error_mark_layout_for_return(struct inode *inode,
@@ -1808,6 +1811,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
.length = NFS4_MAX_UINT64,
};
LIST_HEAD(free_me);
+ bool return_now = false;

spin_lock(&inode->i_lock);
/* set failure bit so that pnfs path will be retried later */
@@ -1818,10 +1822,20 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
* segments at hand when sending layoutreturn. See pnfs_put_lseg()
* for how it works.
*/
- pnfs_mark_matching_lsegs_return(lo, &free_me, &range);
- spin_unlock(&inode->i_lock);
+ if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) {
+ nfs4_stateid stateid;
+ enum pnfs_iomode iomode = lo->plh_return_iomode;
+
+ nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+ return_now = pnfs_prepare_layoutreturn(lo);
+ spin_unlock(&inode->i_lock);
+ if (return_now)
+ pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+ } else {
+ spin_unlock(&inode->i_lock);
+ nfs_commit_inode(inode, 0);
+ }
pnfs_free_lseg_list(&free_me);
- nfs_commit_inode(inode, 0);
}
EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);

diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 4bd7faf9ce50..3d0f513a4a77 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -266,7 +266,7 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *recall_range);
-void pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
+int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *recall_range);
bool pnfs_roc(struct inode *ino);
--
2.5.0


2016-01-04 18:21:57

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 15/18] NFSv4.1/pNFS: Fix a race in initiate_file_draining()

Peng Tao points out that the call to pnfs_mark_matching_lsegs_return()
could race with pnfs_put_lseg(), in which case the layout segment is
cleared, but no layoutreturn will be sent.
Fix is to replace the call to pnfs_mark_matching_lsegs_invalid().

Reported-by: Peng Tao <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/callback_proc.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 2be8b252e3b1..f0939d097406 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -216,11 +216,8 @@ static u32 initiate_file_draining(struct nfs_client *clp,
goto unlock;
}

- if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list,
+ if (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
&args->cbl_range)) {
- pnfs_mark_matching_lsegs_return(lo,
- &free_me_list,
- &args->cbl_range);
rv = NFS4_OK;
goto unlock;
}
--
2.5.0


2016-01-04 18:21:58

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 16/18] NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid()

Make it more obvious what we're returning...

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8e1d4229bf2d..f86f060f853d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -569,7 +569,7 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct pnfs_layout_range *recall_range)
{
struct pnfs_layout_segment *lseg, *next;
- int invalid = 0, removed = 0;
+ int remaining = 0;

dprintk("%s:Begin lo %p\n", __func__, lo);

@@ -582,11 +582,11 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
"offset %llu length %llu\n", __func__,
lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
lseg->pls_range.length);
- invalid++;
- removed += mark_lseg_invalid(lseg, tmp_list);
+ if (!mark_lseg_invalid(lseg, tmp_list))
+ remaining++;
}
- dprintk("%s:Return %i\n", __func__, invalid - removed);
- return invalid - removed;
+ dprintk("%s:Return %i\n", __func__, remaining);
+ return remaining;
}

/* note free_me must contain lsegs from a single layout_hdr */
--
2.5.0


2016-01-04 18:21:59

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 17/18] NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 4 ++--
fs/nfs/pnfs.h | 7 +++++++
2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f86f060f853d..ebf896b54d9d 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -863,7 +863,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
static struct pnfs_layout_segment *
send_layoutget(struct pnfs_layout_hdr *lo,
struct nfs_open_context *ctx,
- struct pnfs_layout_range *range,
+ const struct pnfs_layout_range *range,
gfp_t gfp_flags)
{
struct inode *ino = lo->plh_inode;
@@ -896,7 +896,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
lgp->args.minlength = i_size - range->offset;
}
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
- lgp->args.range = *range;
+ pnfs_copy_range(&lgp->args.range, range);
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 3d0f513a4a77..dcc76335fd4b 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -578,6 +578,13 @@ pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo)
set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
}

+static inline void
+pnfs_copy_range(struct pnfs_layout_range *dst,
+ const struct pnfs_layout_range *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+}
+
extern unsigned int layoutstats_timer;

#ifdef NFS_DEBUG
--
2.5.0


2016-01-04 18:22:00

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v3 18/18] NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments

Signed-off-by: Trond Myklebust <[email protected]>
---
fs/nfs/pnfs.c | 6 +++---
fs/nfs/pnfs.h | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ebf896b54d9d..04db6d951b99 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -566,7 +566,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
int
pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- struct pnfs_layout_range *recall_range)
+ const struct pnfs_layout_range *recall_range)
{
struct pnfs_layout_segment *lseg, *next;
int remaining = 0;
@@ -828,7 +828,7 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo)

int
pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct pnfs_layout_range *range,
+ const struct pnfs_layout_range *range,
struct nfs4_state *open_state)
{
int status = 0;
@@ -1771,7 +1771,7 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- struct pnfs_layout_range *return_range)
+ const struct pnfs_layout_range *return_range)
{
struct pnfs_layout_segment *lseg, *next;
int remaining = 0;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index dcc76335fd4b..78df618a1596 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -261,14 +261,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
bool update_barrier);
int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct pnfs_layout_hdr *lo,
- struct pnfs_layout_range *range,
+ const struct pnfs_layout_range *range,
struct nfs4_state *open_state);
int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- struct pnfs_layout_range *recall_range);
+ const struct pnfs_layout_range *recall_range);
int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
- struct pnfs_layout_range *recall_range);
+ const struct pnfs_layout_range *recall_range);
bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
--
2.5.0