2022-08-03 14:44:24

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v2 0/3] Wait for DELEGRETURN before returning NFS4ERR_DELAY

This RFC series adds logic to the Linux NFS server to make it wait a
few moments for clients to return a delegation before replying with
NFS4ERR_DELAY. There are two main benefits:

- This improves responsiveness when a delegated file is accessed
from another client
- This removes an unnecessary latency if a client has neglected to
return a delegation before attempting a RENAME or SETATTR

This series is incomplete:
- There are likely other operations that can benefit (eg. OPEN)
- The wait is a fixed 30ms delay; it would be better if the server
could match an incoming CB_RECALL reply with waiters so they can
proceed immediately, but I am still figuring out how that can be
done

Changes since RFC:
- Eliminate spurious console messages on the server
- Wait for DELEGRETURN for RENAME operations
- Add CB done tracepoints
- Rework the retry loop

---

Chuck Lever (3):
NFSD: Add tracepoints to report NFSv4 callback completions
NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY


fs/nfsd/nfs4layouts.c | 2 +-
fs/nfsd/nfs4proc.c | 52 ++++++++++++++++++++++++++++++--------
fs/nfsd/nfs4state.c | 21 ++++++++++++++++
fs/nfsd/nfsd.h | 1 +
fs/nfsd/trace.h | 58 +++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/xdr4.h | 2 ++
6 files changed, 124 insertions(+), 12 deletions(-)

--
Chuck Lever



2022-08-03 14:45:12

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v2 1/3] NFSD: Add tracepoints to report NFSv4 callback completions

Wireshark has always been lousy about dissecting NFSv4 callbacks,
especially NFSv4.0. Add tracepoints so we can surgically capture
these events in the trace log.

Tracepoints are time-stamped and ordered so that we can now observe
the timing relationship between a CB_RECALL Reply and the client's
DELEGRETURN Call. Example:

nfsd-1153 [002] 211.986391: nfsd_cb_recall: addr=192.168.1.67:45767 client 62ea82e4:fee7492a stateid 00000003:00000001

nfsd-1153 [002] 212.095634: nfsd_compound: xid=0x0000002c opcnt=2
nfsd-1153 [002] 212.095647: nfsd_compound_status: op=1/2 OP_PUTFH status=0
nfsd-1153 [002] 212.095658: nfsd_file_put: hash=0xf72 inode=0xffff9291148c7410 ref=3 flags=HASHED|REFERENCED may=READ file=0xffff929103b3ea00
nfsd-1153 [002] 212.095661: nfsd_compound_status: op=2/2 OP_DELEGRETURN status=0
kworker/u25:8-148 [002] 212.096713: nfsd_cb_recall_done: client 62ea82e4:fee7492a stateid 00000003:00000001 status=0

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4layouts.c | 2 +-
fs/nfsd/nfs4proc.c | 3 +++
fs/nfsd/nfs4state.c | 4 ++++
fs/nfsd/trace.h | 39 +++++++++++++++++++++++++++++++++++++++
4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 2c05692a9abf..3564d1c6f610 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -658,7 +658,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;

-
+ trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) {
case 0:
case -NFS4ERR_DELAY:
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3895eb52d2b1..42bfe0d769ec 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1666,6 +1666,9 @@ static void nfsd4_cb_offload_release(struct nfsd4_callback *cb)
static int nfsd4_cb_offload_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
+ struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb);
+
+ trace_nfsd_cb_offload_done(&copy->cp_res.cb_stateid, task);
return 1;
}

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9409a0dc1b76..0cf5a4bb36df 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -357,6 +357,8 @@ nfsd4_cb_notify_lock_prepare(struct nfsd4_callback *cb)
static int
nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task)
{
+ trace_nfsd_cb_notify_lock_done(&zero_stateid, task);
+
/*
* Since this is just an optimization, we don't try very hard if it
* turns out not to succeed. We'll requeue it on NFS4ERR_DELAY, and
@@ -4715,6 +4717,8 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
{
struct nfs4_delegation *dp = cb_to_delegation(cb);

+ trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
+
if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
return 1;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index a60ead3b227a..8c3d5f88072f 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1144,6 +1144,45 @@ TRACE_EVENT(nfsd_cb_offload,
__entry->fh_hash, __entry->count, __entry->status)
);

+DECLARE_EVENT_CLASS(nfsd_cb_done_class,
+ TP_PROTO(
+ const stateid_t *stp,
+ const struct rpc_task *task
+ ),
+ TP_ARGS(stp, task),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ __field(int, status)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ __entry->status = task->tk_status;
+ ),
+ TP_printk("client %08x:%08x stateid %08x:%08x status=%d",
+ __entry->cl_boot, __entry->cl_id, __entry->si_id,
+ __entry->si_generation, __entry->status
+ )
+);
+
+#define DEFINE_NFSD_CB_DONE_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_done_class, name, \
+ TP_PROTO( \
+ const stateid_t *stp, \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(stp, task))
+
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_recall_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_notify_lock_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_layout_done);
+DEFINE_NFSD_CB_DONE_EVENT(nfsd_cb_offload_done);
+
#endif /* _NFSD_TRACE_H */

#undef TRACE_INCLUDE_PATH



2022-08-03 14:45:12

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v2 3/3] NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY

nfsd_rename() can kick off a CB_RECALL (via
vfs_rename() -> leases_conflict()) if a delegation is present.
Before returning NFS4ERR_DELAY, give the client holding that
delegation a chance to return it and then retry the nfsd_rename()
again, once.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4proc.c | 31 +++++++++++++++++++++++--------
1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 62a267bb2ce5..2e484aafc41c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1056,17 +1056,32 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfsd4_rename *rename = &u->rename;
__be32 status;
+ int retries;

if (opens_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
- status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
- rename->rn_snamelen, &cstate->current_fh,
- rename->rn_tname, rename->rn_tnamelen);
- if (status)
- return status;
- set_change_info(&rename->rn_sinfo, &cstate->current_fh);
- set_change_info(&rename->rn_tinfo, &cstate->save_fh);
- return nfs_ok;
+
+ retries = 1;
+ do {
+ status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
+ rename->rn_snamelen, &cstate->current_fh,
+ rename->rn_tname, rename->rn_tnamelen);
+ if (status == nfs_ok) {
+ set_change_info(&rename->rn_sinfo, &cstate->current_fh);
+ set_change_info(&rename->rn_tinfo, &cstate->save_fh);
+ break;
+ }
+ if (status != nfserr_jukebox)
+ break;
+ if (!retries--)
+ break;
+
+ fh_clear_pre_post_attrs(&cstate->save_fh);
+ fh_clear_pre_post_attrs(&cstate->current_fh);
+ nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh);
+ } while (1);
+
+ return status;
}

static __be32