This RFC series adds logic to the Linux NFS server to make it wait a
few moments for clients to return a delegation before replying with
NFS4ERR_DELAY. There are two main benefits:
- This improves responsiveness when a delegated file is accessed
from another client
- This removes an unnecessary latency if a client has neglected to
return a delegation before attempting a RENAME or SETATTR
This series is incomplete:
- There are likely other operations that can benefit (eg. OPEN)
- The wait is a fixed 30ms delay; it would be better if the server
could match an incoming CB_RECALL reply with waiters so they can
proceed immediately, but I am still figuring out how that can be
done
Changes since RFC:
- Eliminate spurious console messages on the server
- Wait for DELEGRETURN for RENAME operations
- Add CB done tracepoints
- Rework the retry loop
---
Chuck Lever (3):
NFSD: Add tracepoints to report NFSv4 callback completions
NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY
fs/nfsd/nfs4layouts.c | 2 +-
fs/nfsd/nfs4proc.c | 52 ++++++++++++++++++++++++++++++--------
fs/nfsd/nfs4state.c | 21 ++++++++++++++++
fs/nfsd/nfsd.h | 1 +
fs/nfsd/trace.h | 58 +++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/xdr4.h | 2 ++
6 files changed, 124 insertions(+), 12 deletions(-)
--
Chuck Lever
Wireshark has always been lousy about dissecting NFSv4 callbacks,
especially NFSv4.0. Add tracepoints so we can surgically capture
these events in the trace log.
Tracepoints are time-stamped and ordered so that we can now observe
the timing relationship between a CB_RECALL Reply and the client's
DELEGRETURN Call. Example:
nfsd-1153 [002] 211.986391: nfsd_cb_recall: addr=192.168.1.67:45767 client 62ea82e4:fee7492a stateid 00000003:00000001
nfsd-1153 [002] 212.095634: nfsd_compound: xid=0x0000002c opcnt=2
nfsd-1153 [002] 212.095647: nfsd_compound_status: op=1/2 OP_PUTFH status=0
nfsd-1153 [002] 212.095658: nfsd_file_put: hash=0xf72 inode=0xffff9291148c7410 ref=3 flags=HASHED|REFERENCED may=READ file=0xffff929103b3ea00
nfsd-1153 [002] 212.095661: nfsd_compound_status: op=2/2 OP_DELEGRETURN status=0
kworker/u25:8-148 [002] 212.096713: nfsd_cb_recall_done: client 62ea82e4:fee7492a stateid 00000003:00000001 status=0
Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4layouts.c | 2 +-
fs/nfsd/nfs4proc.c | 3 +++
fs/nfsd/nfs4state.c | 4 ++++
fs/nfsd/trace.h | 39 +++++++++++++++++++++++++++++++++++++++
4 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2c05692a9abf..3564d1c6f610 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -658,7 +658,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
-
+ trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) {
case 0:
case -NFS4ERR_DELAY:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3895eb52d2b1..42bfe0d769ec 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1666,6 +1666,9 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
+
+ trace_nfsd_cb_offload_done(©->cp_res.cb_stateid, task);
return 1;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9409a0dc1b76..0cf5a4bb36df 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -357,6 +357,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
+ trace_nfsd_cb_notify_lock_done(&zero_stateid, task);
+
/*
* Since this is just an optimization, we don't try very hard if it
* turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
@@ -4715,6 +4717,8 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
+ trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
+
if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
return 1;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index a60ead3b227a..8c3d5f88072f 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1144,6 +1144,45 @@ TRACE_EVENT(nfsd_cb_offload,
__entry->fh_hash, __entry->count, __entry->status)
);
+DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+ TP_PROTO(
+ const stateid_t *stp,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(stp, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ __entry->status = task->tk_status;
+ ),
+ TP_printk("client %08x:%08x stateid %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->si_id,
+ __entry->si_generation, __entry->status
+ )
+);
+
+#define DEFINE_NFSD_CB_DONE_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_done_class, name, \
+ TP_PROTO( \
+ const stateid_t *stp, \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(stp, task))
+
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+
#endif /* _NFSD_TRACE_H */
#undef TRACE_INCLUDE_PATH
nfsd_rename() can kick off a CB_RECALL (via
vfs_rename() -> leases_conflict()) if a delegation is present.
Before returning NFS4ERR_DELAY, give the client holding that
delegation a chance to return it and then retry the nfsd_rename()
again, once.
Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4proc.c | 31 +++++++++++++++++++++++--------
1 file changed, 23 insertions(+), 8 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 62a267bb2ce5..2e484aafc41c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1056,17 +1056,32 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_rename *rename = &u->rename;
__be32 status;
+ int retries;
if (opens_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
- status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
- rename->rn_snamelen, &cstate->current_fh,
- rename->rn_tname, rename->rn_tnamelen);
- if (status)
- return status;
- set_change_info(&rename->rn_sinfo, &cstate->current_fh);
- set_change_info(&rename->rn_tinfo, &cstate->save_fh);
- return nfs_ok;
+
+ retries = 1;
+ do {
+ status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
+ rename->rn_snamelen, &cstate->current_fh,
+ rename->rn_tname, rename->rn_tnamelen);
+ if (status == nfs_ok) {
+ set_change_info(&rename->rn_sinfo, &cstate->current_fh);
+ set_change_info(&rename->rn_tinfo, &cstate->save_fh);
+ break;
+ }
+ if (status != nfserr_jukebox)
+ break;
+ if (!retries--)
+ break;
+
+ fh_clear_pre_post_attrs(&cstate->save_fh);
+ fh_clear_pre_post_attrs(&cstate->current_fh);
+ nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
+ } while (1);
+
+ return status;
}
static __be32