2020-05-01 17:23:22

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 0/4] New NFSD tracepoints

These patches introduce tracepoint infrastructure in several places
in the NFS server in order to help improve operational observability.
They were constructed while chasing down problems I found while
testing v5.7.

---

Chuck Lever (4):
NFSD: Add tracepoints to NFSD's duplicate reply cache
NFSD: Add tracepoints to the NFSD state management code
NFSD: Add tracepoints for monitoring NFSD callbacks
SUNRPC: Clean up request deferral tracepoints


fs/nfsd/nfs4callback.c | 37 +++--
fs/nfsd/nfs4state.c | 58 +++----
fs/nfsd/state.h | 7 -
fs/nfsd/trace.h | 286 ++++++++++++++++++++++++++++++++++
include/trace/events/sunrpc.h | 11 +-
net/sunrpc/svc_xprt.c | 12 +-
6 files changed, 343 insertions(+), 68 deletions(-)

--
Chuck Lever


2020-05-01 17:23:22

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 4/4] SUNRPC: Clean up request deferral tracepoints

- Rename these so they are easy to enable and search for as a set
- Move the tracepoints to get a more accurate sense of control flow
- Tracepoints should not fire on xprt shutdown
- Display memory address in case data structure had been corrupted
- Abandon dprintk in these paths

I haven't ever gotten one of these tracepoints to trigger. I wonder
if we should simply remove them.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 11 ++++++++---
net/sunrpc/svc_xprt.c | 12 ++++++------
2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index ffd2215950dc..3158b3f7e01e 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1313,27 +1313,32 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
TP_ARGS(dr),

TP_STRUCT__entry(
+ __field(const void *, dr)
__field(u32, xid)
__string(addr, dr->xprt->xpt_remotebuf)
),

TP_fast_assign(
+ __entry->dr = dr;
__entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
(dr->xprt_hlen>>2)));
__assign_str(addr, dr->xprt->xpt_remotebuf);
),

- TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid)
+ TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr,
+ __entry->xid)
);
+
#define DEFINE_SVC_DEFERRED_EVENT(name) \
- DEFINE_EVENT(svc_deferred_event, svc_##name##_deferred, \
+ DEFINE_EVENT(svc_deferred_event, svc_defer_##name, \
TP_PROTO( \
const struct svc_deferred_req *dr \
), \
TP_ARGS(dr))

DEFINE_SVC_DEFERRED_EVENT(drop);
-DEFINE_SVC_DEFERRED_EVENT(revisit);
+DEFINE_SVC_DEFERRED_EVENT(queue);
+DEFINE_SVC_DEFERRED_EVENT(recv);

DECLARE_EVENT_CLASS(cache_event,
TP_PROTO(
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2284ff038dad..e12ec68cd0ff 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1158,16 +1158,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
set_bit(XPT_DEFERRED, &xprt->xpt_flags);
if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
spin_unlock(&xprt->xpt_lock);
- dprintk("revisit canceled\n");
+ trace_svc_defer_drop(dr);
svc_xprt_put(xprt);
- trace_svc_drop_deferred(dr);
kfree(dr);
return;
}
- dprintk("revisit queued\n");
dr->xprt = NULL;
list_add(&dr->handle.recent, &xprt->xpt_deferred);
spin_unlock(&xprt->xpt_lock);
+ trace_svc_defer_queue(dr);
svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
@@ -1213,22 +1212,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
dr->argslen << 2);
}
+ trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
set_bit(RQ_DROPME, &rqstp->rq_flags);

dr->handle.revisit = svc_revisit;
- trace_svc_defer(rqstp);
return &dr->handle;
}

/*
* recv data from a deferred request into an active one
*/
-static int svc_deferred_recv(struct svc_rqst *rqstp)
+static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
{
struct svc_deferred_req *dr = rqstp->rq_deferred;

+ trace_svc_defer_recv(dr);
+
/* setup iov_base past transport header */
rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
/* The iov_len does not include the transport header bytes */
@@ -1259,7 +1260,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
struct svc_deferred_req,
handle.recent);
list_del_init(&dr->handle.recent);
- trace_svc_revisit_deferred(dr);
} else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock);

2020-05-01 17:23:22

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 2/4] NFSD: Add tracepoints to the NFSD state management code

Capture obvious events and replace dprintk() callsites. Introduce
infrastructure so that adding more tracepoints in this code later
is simplified.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4state.c | 52 +++++++-------------
fs/nfsd/state.h | 7 ---
fs/nfsd/trace.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 152 insertions(+), 40 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c107caa56525..04d80f9e2f91 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -51,6 +51,7 @@
#include "netns.h"
#include "pnfs.h"
#include "filecache.h"
+#include "trace.h"

#define NFSDDBG_FACILITY NFSDDBG_PROC

@@ -167,9 +168,6 @@ renew_client_locked(struct nfs4_client *clp)
return;
}

- dprintk("renewing client (clientid %08x/%08x)\n",
- clp->cl_clientid.cl_boot,
- clp->cl_clientid.cl_id);
list_move_tail(&clp->cl_lru, &nn->client_lru);
clp->cl_time = ktime_get_boottime_seconds();
}
@@ -1922,8 +1920,7 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
*/
if (clid->cl_boot == (u32)nn->boot_time)
return 0;
- dprintk("NFSD stale clientid (%08x/%08x) boot_time %08llx\n",
- clid->cl_boot, clid->cl_id, nn->boot_time);
+ trace_nfsd_clid_stale(clid);
return 1;
}

@@ -3879,11 +3876,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (clp_used_exchangeid(conf))
goto out;
if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
- char addr_str[INET6_ADDRSTRLEN];
- rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
- sizeof(addr_str));
- dprintk("NFSD: setclientid: string in use by client "
- "at %s\n", addr_str);
+ trace_nfsd_clid_inuse_err(conf);
goto out;
}
}
@@ -4076,7 +4069,6 @@ nfsd4_init_slabs(void)
out_free_client_slab:
kmem_cache_destroy(client_slab);
out:
- dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM;
}

@@ -4508,6 +4500,8 @@ nfsd_break_deleg_cb(struct file_lock *fl)
struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
struct nfs4_file *fp = dp->dl_stid.sc_file;

+ trace_nfsd_deleg_break(&dp->dl_stid.sc_stateid);
+
/*
* We don't want the locks code to timeout the lease for us;
* we'll remove it ourself if a delegation isn't returned
@@ -5018,8 +5012,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,

memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));

- dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
- STATEID_VAL(&dp->dl_stid.sc_stateid));
+ trace_nfsd_deleg_open(&dp->dl_stid.sc_stateid);
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
nfs4_put_stid(&dp->dl_stid);
return;
@@ -5136,9 +5129,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
nfs4_open_delegation(current_fh, open, stp);
nodeleg:
status = nfs_ok;
-
- dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
- STATEID_VAL(&stp->st_stid.sc_stateid));
+ trace_nfsd_deleg_none(&stp->st_stid.sc_stateid);
out:
/* 4.1 client trying to upgrade/downgrade delegation? */
if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
@@ -5192,8 +5183,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);

- dprintk("process_renew(%08x/%08x): starting\n",
- clid->cl_boot, clid->cl_id);
+ trace_nfsd_clid_renew(clid);
status = lookup_clientid(clid, cstate, nn, false);
if (status)
goto out;
@@ -5214,6 +5204,7 @@ nfsd4_end_grace(struct nfsd_net *nn)
if (nn->grace_ended)
return;

+ trace_nfsd_grace_complete(nn);
nn->grace_ended = true;
/*
* If the server goes down again right now, an NFSv4
@@ -5279,13 +5270,10 @@ nfs4_laundromat(struct nfsd_net *nn)
copy_stateid_t *cps_t;
int i;

- dprintk("NFSD: laundromat service - starting\n");
-
if (clients_still_reclaiming(nn)) {
new_timeo = 0;
goto out;
}
- dprintk("NFSD: end of grace period\n");
nfsd4_end_grace(nn);
INIT_LIST_HEAD(&reaplist);

@@ -5307,8 +5295,7 @@ nfs4_laundromat(struct nfsd_net *nn)
break;
}
if (mark_client_expired_locked(clp)) {
- dprintk("NFSD: client in use (clientid %08x)\n",
- clp->cl_clientid.cl_id);
+ trace_nfsd_clid_expired(&clp->cl_clientid);
continue;
}
list_add(&clp->cl_lru, &reaplist);
@@ -5316,8 +5303,7 @@ nfs4_laundromat(struct nfsd_net *nn)
spin_unlock(&nn->client_lock);
list_for_each_safe(pos, next, &reaplist) {
clp = list_entry(pos, struct nfs4_client, cl_lru);
- dprintk("NFSD: purging unused client (clientid %08x)\n",
- clp->cl_clientid.cl_id);
+ trace_nfsd_clid_purged(&clp->cl_clientid);
list_del_init(&clp->cl_lru);
expire_client(clp);
}
@@ -5407,7 +5393,6 @@ laundromat_main(struct work_struct *laundry)
laundromat_work);

t = nfs4_laundromat(nn);
- dprintk("NFSD: laundromat_main - sleeping for %lld seconds\n", t);
queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
}

@@ -5948,8 +5933,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
struct nfs4_stid *s;
struct nfs4_ol_stateid *stp = NULL;

- dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
- seqid, STATEID_VAL(stateid));
+ trace_nfsd_preprocess(seqid, stateid);

*stpp = NULL;
status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
@@ -6018,9 +6002,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
oo->oo_flags |= NFS4_OO_CONFIRMED;
nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
mutex_unlock(&stp->st_mutex);
- dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
- __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
-
+ trace_nfsd_open_confirm(oc->oc_seqid, &stp->st_stid.sc_stateid);
nfsd4_client_record_create(oo->oo_owner.so_client);
status = nfs_ok;
put_stateid:
@@ -7072,7 +7054,7 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
unsigned int strhashval;
struct nfs4_client_reclaim *crp;

- dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", name.len, name.data);
+ trace_nfsd_clid_reclaim(nn, name.len, name.data);
crp = alloc_reclaim();
if (crp) {
strhashval = clientstr_hashval(name);
@@ -7122,7 +7104,7 @@ nfsd4_find_reclaim_client(struct xdr_netobj name, struct nfsd_net *nn)
unsigned int strhashval;
struct nfs4_client_reclaim *crp = NULL;

- dprintk("NFSD: nfs4_find_reclaim_client for name %.*s\n", name.len, name.data);
+ trace_nfsd_clid_find(nn, name.len, name.data);

strhashval = clientstr_hashval(name);
list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
@@ -7686,6 +7668,9 @@ nfsd_recall_delegations(struct list_head *reaplist)
list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
list_del_init(&dp->dl_recall_lru);
clp = dp->dl_stid.sc_client;
+
+ trace_nfsd_deleg_recall(&dp->dl_stid.sc_stateid);
+
/*
* We skipped all entries that had a zero dl_time before,
* so we can now reset the dl_time back to 0. If a delegation
@@ -7868,6 +7853,7 @@ nfs4_state_start_net(struct net *net)
goto skip_grace;
printk(KERN_INFO "NFSD: starting %lld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
+ trace_nfsd_grace_start(nn);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;

diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 68d3f30ee760..3b408532a5dc 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -64,13 +64,6 @@ typedef struct {
refcount_t sc_count;
} copy_stateid_t;

-#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
-#define STATEID_VAL(s) \
- (s)->si_opaque.so_clid.cl_boot, \
- (s)->si_opaque.so_clid.cl_id, \
- (s)->si_opaque.so_id, \
- (s)->si_generation
-
struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 68b9f5014a6d..654a27649207 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -277,6 +277,7 @@ DECLARE_EVENT_CLASS(nfsd_stateid_class,
DEFINE_EVENT(nfsd_stateid_class, nfsd_##name, \
TP_PROTO(stateid_t *stp), \
TP_ARGS(stp))
+
DEFINE_STATEID_EVENT(layoutstate_alloc);
DEFINE_STATEID_EVENT(layoutstate_unhash);
DEFINE_STATEID_EVENT(layoutstate_free);
@@ -288,6 +289,138 @@ DEFINE_STATEID_EVENT(layout_recall_done);
DEFINE_STATEID_EVENT(layout_recall_fail);
DEFINE_STATEID_EVENT(layout_recall_release);

+DEFINE_STATEID_EVENT(deleg_open);
+DEFINE_STATEID_EVENT(deleg_none);
+DEFINE_STATEID_EVENT(deleg_break);
+DEFINE_STATEID_EVENT(deleg_recall);
+
+DECLARE_EVENT_CLASS(nfsd_stateseqid_class,
+ TP_PROTO(u32 seqid, const stateid_t *stp),
+ TP_ARGS(seqid, stp),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, si_id)
+ __field(u32, si_generation)
+ ),
+ TP_fast_assign(
+ __entry->seqid = seqid;
+ __entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
+ __entry->cl_id = stp->si_opaque.so_clid.cl_id;
+ __entry->si_id = stp->si_opaque.so_id;
+ __entry->si_generation = stp->si_generation;
+ ),
+ TP_printk("seqid=%u client %08x:%08x stateid %08x:%08x",
+ __entry->seqid, __entry->cl_boot, __entry->cl_id,
+ __entry->si_id, __entry->si_generation)
+)
+
+#define DEFINE_STATESEQID_EVENT(name) \
+DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
+ TP_PROTO(u32 seqid, const stateid_t *stp), \
+ TP_ARGS(seqid, stp))
+
+DEFINE_STATESEQID_EVENT(preprocess);
+DEFINE_STATESEQID_EVENT(open_confirm);
+
+DECLARE_EVENT_CLASS(nfsd_clientid_class,
+ TP_PROTO(const clientid_t *clid),
+ TP_ARGS(clid),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clid->cl_boot;
+ __entry->cl_id = clid->cl_id;
+ ),
+ TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id)
+)
+
+#define DEFINE_CLIENTID_EVENT(name) \
+DEFINE_EVENT(nfsd_clientid_class, nfsd_clid_##name, \
+ TP_PROTO(const clientid_t *clid), \
+ TP_ARGS(clid))
+
+DEFINE_CLIENTID_EVENT(expired);
+DEFINE_CLIENTID_EVENT(purged);
+DEFINE_CLIENTID_EVENT(renew);
+DEFINE_CLIENTID_EVENT(stale);
+
+DECLARE_EVENT_CLASS(nfsd_net_class,
+ TP_PROTO(const struct nfsd_net *nn),
+ TP_ARGS(nn),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ ),
+ TP_printk("boot_time=%16llx", __entry->boot_time)
+)
+
+#define DEFINE_NET_EVENT(name) \
+DEFINE_EVENT(nfsd_net_class, nfsd_##name, \
+ TP_PROTO(const struct nfsd_net *nn), \
+ TP_ARGS(nn))
+
+DEFINE_NET_EVENT(grace_start);
+DEFINE_NET_EVENT(grace_complete);
+
+DECLARE_EVENT_CLASS(nfsd_clid_class,
+ TP_PROTO(const struct nfsd_net *nn,
+ unsigned int namelen,
+ const unsigned char *namedata),
+ TP_ARGS(nn, namelen, namedata),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(unsigned int, namelen)
+ __dynamic_array(unsigned char, name, namelen)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->namelen = namelen;
+ memcpy(__get_dynamic_array(name), namedata, namelen);
+ ),
+ TP_printk("boot_time=%16llx nfs4_clientid=%.*s",
+ __entry->boot_time, __entry->namelen, __get_str(name))
+)
+
+#define DEFINE_CLID_EVENT(name) \
+DEFINE_EVENT(nfsd_clid_class, nfsd_clid_##name, \
+ TP_PROTO(const struct nfsd_net *nn, \
+ unsigned int namelen, \
+ const unsigned char *namedata), \
+ TP_ARGS(nn, namelen, namedata))
+
+DEFINE_CLID_EVENT(find);
+DEFINE_CLID_EVENT(reclaim);
+
+TRACE_EVENT(nfsd_clid_inuse_err,
+ TP_PROTO(const struct nfs4_client *clp),
+ TP_ARGS(clp),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ __field(unsigned int, namelen)
+ __dynamic_array(unsigned char, name, clp->cl_name.len)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ memcpy(__entry->addr, &clp->cl_addr,
+ sizeof(struct sockaddr_in6));
+ __entry->namelen = clp->cl_name.len;
+ memcpy(__get_dynamic_array(name), clp->cl_name.data,
+ clp->cl_name.len);
+ ),
+ TP_printk("nfs4_clientid %.*s already in use by %pISpc, client %08x:%08x",
+ __entry->namelen, __get_str(name), __entry->addr,
+ __entry->cl_boot, __entry->cl_id)
+)
+
TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);

2020-05-01 17:24:24

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 1/4] NFSD: Add tracepoints to NFSD's duplicate reply cache

Try to capture DRC failures.

Two additional clean-ups:
- Introduce Doxygen-style comments for the main entry points
- Remove a dprintk that fires for an allocation failure. This was
the only dprintk in the REPCACHE class.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfscache.c | 57 +++++++++++++++++++++++++++++++-------------------
fs/nfsd/trace.h | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 94 insertions(+), 22 deletions(-)

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 96352ab7bd81..945d2f5e760e 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -20,8 +20,7 @@

#include "nfsd.h"
#include "cache.h"
-
-#define NFSDDBG_FACILITY NFSDDBG_REPCACHE
+#include "trace.h"

/*
* We use this value to determine the number of hash buckets from the max
@@ -323,8 +322,10 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
const struct svc_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
- key->c_key.k_csum != rp->c_key.k_csum)
+ key->c_key.k_csum != rp->c_key.k_csum) {
++nn->payload_misses;
+ trace_nfsd_drc_mismatch(nn, key, rp);
+ }

return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key));
}
@@ -377,15 +378,22 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
return ret;
}

-/*
+/**
+ * nfsd_cache_lookup - Find an entry in the duplicate reply cache
+ * @rqstp: Incoming Call to find
+ *
* Try to find an entry matching the current call in the cache. When none
* is found, we try to grab the oldest expired entry off the LRU list. If
* a suitable one isn't there, then drop the cache_lock and allocate a
* new one, then search again in case one got inserted while this thread
* didn't hold the lock.
+ *
+ * Return values:
+ * %RC_DOIT: Process the request normally
+ * %RC_REPLY: Reply from cache
+ * %RC_DROPIT: Do not process the request further
*/
-int
-nfsd_cache_lookup(struct svc_rqst *rqstp)
+int nfsd_cache_lookup(struct svc_rqst *rqstp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp, *found;
@@ -399,7 +407,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rqstp->rq_cacherep = NULL;
if (type == RC_NOCACHE) {
nfsdstats.rcnocache++;
- return rtn;
+ goto out;
}

csum = nfsd_cache_csum(rqstp);
@@ -409,10 +417,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* preallocate an entry.
*/
rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
- if (!rp) {
- dprintk("nfsd: unable to allocate DRC entry!\n");
- return rtn;
- }
+ if (!rp)
+ goto out;

spin_lock(&b->cache_lock);
found = nfsd_cache_insert(b, rp, nn);
@@ -431,8 +437,10 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)

/* go ahead and prune the cache */
prune_bucket(b, nn);
- out:
+
+out_unlock:
spin_unlock(&b->cache_lock);
+out:
return rtn;

found_entry:
@@ -442,13 +450,13 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)

/* Request being processed */
if (rp->c_state == RC_INPROG)
- goto out;
+ goto out_trace;

/* From the hall of fame of impractical attacks:
* Is this a user who tries to snoop on the cache? */
rtn = RC_DOIT;
if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
- goto out;
+ goto out_trace;

/* Compose RPC reply header */
switch (rp->c_type) {
@@ -460,7 +468,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
break;
case RC_REPLBUFF:
if (!nfsd_cache_append(rqstp, &rp->c_replvec))
- goto out; /* should not happen */
+ goto out_unlock; /* should not happen */
rtn = RC_REPLY;
break;
default:
@@ -468,13 +476,19 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
nfsd_reply_cache_free_locked(b, rp, nn);
}

- goto out;
+out_trace:
+ trace_nfsd_drc_found(nn, rqstp, rtn);
+ goto out_unlock;
}

-/*
- * Update a cache entry. This is called from nfsd_dispatch when
- * the procedure has been executed and the complete reply is in
- * rqstp->rq_res.
+/**
+ * nfsd_cache_update - Update an entry in the duplicate reply cache.
+ * @rqstp: svc_rqst with a finished Reply
+ * @cachetype: which cache to update
+ * @statp: Reply's status code
+ *
+ * This is called from nfsd_dispatch when the procedure has been
+ * executed and the complete reply is in rqstp->rq_res.
*
* We're copying around data here rather than swapping buffers because
* the toplevel loop requires max-sized buffers, which would be a waste
@@ -487,8 +501,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* nfsd failed to encode a reply that otherwise would have been cached.
* In this case, nfsd_cache_update is called with statp == NULL.
*/
-void
-nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_cacherep *rp = rqstp->rq_cacherep;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 78c574251c60..68b9f5014a6d 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -432,6 +432,65 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
__entry->nlink, __entry->mode, __entry->mask)
);

+#include "cache.h"
+
+TRACE_DEFINE_ENUM(RC_DROPIT);
+TRACE_DEFINE_ENUM(RC_REPLY);
+TRACE_DEFINE_ENUM(RC_DOIT);
+
+#define show_drc_retval(x) \
+ __print_symbolic(x, \
+ { RC_DROPIT, "DROPIT" }, \
+ { RC_REPLY, "REPLY" }, \
+ { RC_DOIT, "DOIT" })
+
+TRACE_EVENT(nfsd_drc_found,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_rqst *rqstp,
+ int result
+ ),
+ TP_ARGS(nn, rqstp, result),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(unsigned long, result)
+ __field(u32, xid)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->result = result;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x result=%s",
+ __entry->boot_time, __entry->xid,
+ show_drc_retval(__entry->result))
+
+);
+
+TRACE_EVENT(nfsd_drc_mismatch,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ const struct svc_cacherep *key,
+ const struct svc_cacherep *rp
+ ),
+ TP_ARGS(nn, key, rp),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(u32, xid)
+ __field(u32, cached)
+ __field(u32, ingress)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->xid = be32_to_cpu(key->c_key.k_xid);
+ __entry->cached = key->c_key.k_csum;
+ __entry->ingress = rp->c_key.k_csum;
+ ),
+ TP_printk("boot_time=%16llx xid=0x%08x cached-csum=0x%08x ingress-csum=0x%08x",
+ __entry->boot_time, __entry->xid, __entry->cached,
+ __entry->ingress)
+);
+
#endif /* _NFSD_TRACE_H */

#undef TRACE_INCLUDE_PATH

2020-05-01 17:24:24

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 3/4] NFSD: Add tracepoints for monitoring NFSD callbacks

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfsd/nfs4callback.c | 37 +++++++-----
fs/nfsd/nfs4state.c | 6 +-
fs/nfsd/trace.h | 153 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 177 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 5cf91322de0f..966ca75418c8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -38,6 +38,7 @@
#include "nfsd.h"
#include "state.h"
#include "netns.h"
+#include "trace.h"
#include "xdr4cb.h"
#include "xdr4.h"

@@ -904,16 +905,20 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c

if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
- (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5))
+ (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) {
+ trace_nfsd_cb_setup_err(clp, -EINVAL);
return -EINVAL;
+ }
args.client_name = clp->cl_cred.cr_principal;
args.prognumber = conn->cb_prog;
args.protocol = XPRT_TRANSPORT_TCP;
args.authflavor = clp->cl_cred.cr_flavor;
clp->cl_cb_ident = conn->cb_ident;
} else {
- if (!conn->cb_xprt)
+ if (!conn->cb_xprt) {
+ trace_nfsd_cb_setup_err(clp, -EINVAL);
return -EINVAL;
+ }
clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
@@ -925,32 +930,27 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
/* Create RPC client */
client = rpc_create(&args);
if (IS_ERR(client)) {
- dprintk("NFSD: couldn't create callback client: %ld\n",
- PTR_ERR(client));
+ trace_nfsd_cb_setup_err(clp, PTR_ERR(client));
return PTR_ERR(client);
}
cred = get_backchannel_cred(clp, client, ses);
if (!cred) {
+ trace_nfsd_cb_setup_err(clp, -ENOMEM);
rpc_shutdown_client(client);
return -ENOMEM;
}
clp->cl_cb_client = client;
clp->cl_cb_cred = cred;
+ trace_nfsd_cb_setup(clp);
return 0;
}

-static void warn_no_callback_path(struct nfs4_client *clp, int reason)
-{
- dprintk("NFSD: warning: no callback path to client %.*s: error %d\n",
- (int)clp->cl_name.len, clp->cl_name.data, reason);
-}
-
static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
clp->cl_cb_state = NFSD4_CB_DOWN;
- warn_no_callback_path(clp, reason);
+ trace_nfsd_cb_state(clp);
}

static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
@@ -958,17 +958,20 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
clp->cl_cb_state = NFSD4_CB_FAULT;
- warn_no_callback_path(clp, reason);
+ trace_nfsd_cb_state(clp);
}

static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
{
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);

+ trace_nfsd_cb_done(clp, task->tk_status);
if (task->tk_status)
nfsd4_mark_cb_down(clp, task->tk_status);
- else
+ else {
clp->cl_cb_state = NFSD4_CB_UP;
+ trace_nfsd_cb_state(clp);
+ }
}

static void nfsd4_cb_probe_release(void *calldata)
@@ -993,6 +996,7 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
void nfsd4_probe_callback(struct nfs4_client *clp)
{
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ trace_nfsd_cb_state(clp);
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
nfsd4_run_cb(&clp->cl_cb_null);
}
@@ -1009,6 +1013,7 @@ void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
spin_lock(&clp->cl_lock);
memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
spin_unlock(&clp->cl_lock);
+ trace_nfsd_cb_state(clp);
}

/*
@@ -1165,8 +1170,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;

- dprintk("%s: minorversion=%d\n", __func__,
- clp->cl_minorversion);
+ trace_nfsd_cb_done(clp, task->tk_status);

if (!nfsd4_cb_sequence_done(task, cb))
return;
@@ -1271,6 +1275,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
* kill the old client:
*/
if (clp->cl_cb_client) {
+ trace_nfsd_cb_shutdown(clp);
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred);
@@ -1314,6 +1319,8 @@ nfsd4_run_cb_work(struct work_struct *work)
struct rpc_clnt *clnt;
int flags;

+ trace_nfsd_cb_work(clp, cb->cb_msg.rpc_proc->p_name);
+
if (cb->cb_need_restart) {
cb->cb_need_restart = false;
} else {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 04d80f9e2f91..88433be551b7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2842,14 +2842,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r
conn->cb_prog = se->se_callback_prog;
conn->cb_ident = se->se_callback_ident;
memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
+ trace_nfsd_cb_args(clp, conn);
return;
out_err:
conn->cb_addr.ss_family = AF_UNSPEC;
conn->cb_addrlen = 0;
- dprintk("NFSD: this client (clientid %08x/%08x) "
- "will not receive delegations\n",
- clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-
+ trace_nfsd_cb_nodelegs(clp);
return;
}

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 654a27649207..848044ad82b4 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -624,6 +624,159 @@ TRACE_EVENT(nfsd_drc_mismatch,
__entry->ingress)
);

+TRACE_EVENT(nfsd_cb_args,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfs4_cb_conn *conn
+ ),
+ TP_ARGS(clp, conn),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, prog)
+ __field(u32, ident)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->prog = conn->cb_prog;
+ __entry->ident = conn->cb_ident;
+ memcpy(__entry->addr, &conn->cb_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("client %08x:%08x callback addr=%pISpc prog=%u ident=%u",
+ __entry->cl_boot, __entry->cl_id,
+ __entry->addr, __entry->prog, __entry->ident)
+);
+
+TRACE_EVENT(nfsd_cb_nodelegs,
+ TP_PROTO(const struct nfs4_client *clp),
+ TP_ARGS(clp),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ ),
+ TP_printk("client %08x:%08x", __entry->cl_boot, __entry->cl_id)
+)
+
+TRACE_DEFINE_ENUM(NFSD4_CB_UP);
+TRACE_DEFINE_ENUM(NFSD4_CB_UNKNOWN);
+TRACE_DEFINE_ENUM(NFSD4_CB_DOWN);
+TRACE_DEFINE_ENUM(NFSD4_CB_FAULT);
+
+#define show_cb_state(val) \
+ __print_symbolic(val, \
+ { NFSD4_CB_UP, "UP" }, \
+ { NFSD4_CB_UNKNOWN, "UNKNOWN" }, \
+ { NFSD4_CB_DOWN, "DOWN" }, \
+ { NFSD4_CB_FAULT, "FAULT"})
+
+DECLARE_EVENT_CLASS(nfsd_cb_class,
+ TP_PROTO(const struct nfs4_client *clp),
+ TP_ARGS(clp),
+ TP_STRUCT__entry(
+ __field(unsigned long, state)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->state = clp->cl_cb_state;
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x state=%s",
+ __entry->addr, __entry->cl_boot, __entry->cl_id,
+ show_cb_state(__entry->state))
+);
+
+#define DEFINE_NFSD_CB_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \
+ TP_PROTO(const struct nfs4_client *clp), \
+ TP_ARGS(clp))
+
+DEFINE_NFSD_CB_EVENT(setup);
+DEFINE_NFSD_CB_EVENT(state);
+DEFINE_NFSD_CB_EVENT(shutdown);
+
+TRACE_EVENT(nfsd_cb_setup_err,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ long error
+ ),
+ TP_ARGS(clp, error),
+ TP_STRUCT__entry(
+ __field(long, error)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->error = error;
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x error=%ld",
+ __entry->addr, __entry->cl_boot, __entry->cl_id, __entry->error)
+);
+
+TRACE_EVENT(nfsd_cb_work,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const char *procedure
+ ),
+ TP_ARGS(clp, procedure),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __string(procedure, procedure)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_str(procedure, procedure)
+ memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x procedure=%s",
+ __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __get_str(procedure))
+);
+
+TRACE_EVENT(nfsd_cb_done,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ int status
+ ),
+ TP_ARGS(clp, status),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(int, status)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->status = status;
+ memcpy(__entry->addr, &clp->cl_cb_conn.cb_addr,
+ sizeof(struct sockaddr_in6));
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x status=%d",
+ __entry->addr, __entry->cl_boot, __entry->cl_id,
+ __entry->status)
+);
+
#endif /* _NFSD_TRACE_H */

#undef TRACE_INCLUDE_PATH

2020-05-04 16:05:01

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v1 4/4] SUNRPC: Clean up request deferral tracepoints

On Fri, May 01, 2020 at 01:22:27PM -0400, Chuck Lever wrote:
> - Rename these so they are easy to enable and search for as a set
> - Move the tracepoints to get a more accurate sense of control flow
> - Tracepoints should not fire on xprt shutdown
> - Display memory address in case data structure had been corrupted
> - Abandon dprintk in these paths
>
> I haven't ever gotten one of these tracepoints to trigger. I wonder
> if we should simply remove them.

It's definitely not dead code. But I forget the conditions required to
hit those cases. Looking.... So, we in cache_check we have a cache miss
that requires an upcall, and call cache_defer_req, which will wait for a
response 1 or 5 seconds (depending on how busy server threads are) and
then calls svc_defer().

--b.

>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
> include/trace/events/sunrpc.h | 11 ++++++++---
> net/sunrpc/svc_xprt.c | 12 ++++++------
> 2 files changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
> index ffd2215950dc..3158b3f7e01e 100644
> --- a/include/trace/events/sunrpc.h
> +++ b/include/trace/events/sunrpc.h
> @@ -1313,27 +1313,32 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
> TP_ARGS(dr),
>
> TP_STRUCT__entry(
> + __field(const void *, dr)
> __field(u32, xid)
> __string(addr, dr->xprt->xpt_remotebuf)
> ),
>
> TP_fast_assign(
> + __entry->dr = dr;
> __entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
> (dr->xprt_hlen>>2)));
> __assign_str(addr, dr->xprt->xpt_remotebuf);
> ),
>
> - TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid)
> + TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr,
> + __entry->xid)
> );
> +
> #define DEFINE_SVC_DEFERRED_EVENT(name) \
> - DEFINE_EVENT(svc_deferred_event, svc_##name##_deferred, \
> + DEFINE_EVENT(svc_deferred_event, svc_defer_##name, \
> TP_PROTO( \
> const struct svc_deferred_req *dr \
> ), \
> TP_ARGS(dr))
>
> DEFINE_SVC_DEFERRED_EVENT(drop);
> -DEFINE_SVC_DEFERRED_EVENT(revisit);
> +DEFINE_SVC_DEFERRED_EVENT(queue);
> +DEFINE_SVC_DEFERRED_EVENT(recv);
>
> DECLARE_EVENT_CLASS(cache_event,
> TP_PROTO(
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index 2284ff038dad..e12ec68cd0ff 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -1158,16 +1158,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
> set_bit(XPT_DEFERRED, &xprt->xpt_flags);
> if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
> spin_unlock(&xprt->xpt_lock);
> - dprintk("revisit canceled\n");
> + trace_svc_defer_drop(dr);
> svc_xprt_put(xprt);
> - trace_svc_drop_deferred(dr);
> kfree(dr);
> return;
> }
> - dprintk("revisit queued\n");
> dr->xprt = NULL;
> list_add(&dr->handle.recent, &xprt->xpt_deferred);
> spin_unlock(&xprt->xpt_lock);
> + trace_svc_defer_queue(dr);
> svc_xprt_enqueue(xprt);
> svc_xprt_put(xprt);
> }
> @@ -1213,22 +1212,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
> memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
> dr->argslen << 2);
> }
> + trace_svc_defer(rqstp);
> svc_xprt_get(rqstp->rq_xprt);
> dr->xprt = rqstp->rq_xprt;
> set_bit(RQ_DROPME, &rqstp->rq_flags);
>
> dr->handle.revisit = svc_revisit;
> - trace_svc_defer(rqstp);
> return &dr->handle;
> }
>
> /*
> * recv data from a deferred request into an active one
> */
> -static int svc_deferred_recv(struct svc_rqst *rqstp)
> +static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
> {
> struct svc_deferred_req *dr = rqstp->rq_deferred;
>
> + trace_svc_defer_recv(dr);
> +
> /* setup iov_base past transport header */
> rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
> /* The iov_len does not include the transport header bytes */
> @@ -1259,7 +1260,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
> struct svc_deferred_req,
> handle.recent);
> list_del_init(&dr->handle.recent);
> - trace_svc_revisit_deferred(dr);
> } else
> clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
> spin_unlock(&xprt->xpt_lock);

2020-05-04 17:15:41

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH v1 4/4] SUNRPC: Clean up request deferral tracepoints



> On May 4, 2020, at 12:04 PM, [email protected] wrote:
>
> On Fri, May 01, 2020 at 01:22:27PM -0400, Chuck Lever wrote:
>> - Rename these so they are easy to enable and search for as a set
>> - Move the tracepoints to get a more accurate sense of control flow
>> - Tracepoints should not fire on xprt shutdown
>> - Display memory address in case data structure had been corrupted
>> - Abandon dprintk in these paths
>>
>> I haven't ever gotten one of these tracepoints to trigger. I wonder
>> if we should simply remove them.
>
> It's definitely not dead code.

Sure. I'm just suggesting that maybe we could make do with just a single
tracepoint here (or maybe, no tracepoints) since these paths don't seem
to trigger often.


> But I forget the conditions required to
> hit those cases. Looking.... So, we in cache_check we have a cache miss
> that requires an upcall, and call cache_defer_req, which will wait for a
> response 1 or 5 seconds (depending on how busy server threads are) and
> then calls svc_defer().

It would be valuable to have some stock test cases in our community tools
that would exercise these paths.


> --b.
>
>>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> include/trace/events/sunrpc.h | 11 ++++++++---
>> net/sunrpc/svc_xprt.c | 12 ++++++------
>> 2 files changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
>> index ffd2215950dc..3158b3f7e01e 100644
>> --- a/include/trace/events/sunrpc.h
>> +++ b/include/trace/events/sunrpc.h
>> @@ -1313,27 +1313,32 @@ DECLARE_EVENT_CLASS(svc_deferred_event,
>> TP_ARGS(dr),
>>
>> TP_STRUCT__entry(
>> + __field(const void *, dr)
>> __field(u32, xid)
>> __string(addr, dr->xprt->xpt_remotebuf)
>> ),
>>
>> TP_fast_assign(
>> + __entry->dr = dr;
>> __entry->xid = be32_to_cpu(*(__be32 *)(dr->args +
>> (dr->xprt_hlen>>2)));
>> __assign_str(addr, dr->xprt->xpt_remotebuf);
>> ),
>>
>> - TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid)
>> + TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr,
>> + __entry->xid)
>> );
>> +
>> #define DEFINE_SVC_DEFERRED_EVENT(name) \
>> - DEFINE_EVENT(svc_deferred_event, svc_##name##_deferred, \
>> + DEFINE_EVENT(svc_deferred_event, svc_defer_##name, \
>> TP_PROTO( \
>> const struct svc_deferred_req *dr \
>> ), \
>> TP_ARGS(dr))
>>
>> DEFINE_SVC_DEFERRED_EVENT(drop);
>> -DEFINE_SVC_DEFERRED_EVENT(revisit);
>> +DEFINE_SVC_DEFERRED_EVENT(queue);
>> +DEFINE_SVC_DEFERRED_EVENT(recv);
>>
>> DECLARE_EVENT_CLASS(cache_event,
>> TP_PROTO(
>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>> index 2284ff038dad..e12ec68cd0ff 100644
>> --- a/net/sunrpc/svc_xprt.c
>> +++ b/net/sunrpc/svc_xprt.c
>> @@ -1158,16 +1158,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
>> set_bit(XPT_DEFERRED, &xprt->xpt_flags);
>> if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
>> spin_unlock(&xprt->xpt_lock);
>> - dprintk("revisit canceled\n");
>> + trace_svc_defer_drop(dr);
>> svc_xprt_put(xprt);
>> - trace_svc_drop_deferred(dr);
>> kfree(dr);
>> return;
>> }
>> - dprintk("revisit queued\n");
>> dr->xprt = NULL;
>> list_add(&dr->handle.recent, &xprt->xpt_deferred);
>> spin_unlock(&xprt->xpt_lock);
>> + trace_svc_defer_queue(dr);
>> svc_xprt_enqueue(xprt);
>> svc_xprt_put(xprt);
>> }
>> @@ -1213,22 +1212,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
>> memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
>> dr->argslen << 2);
>> }
>> + trace_svc_defer(rqstp);
>> svc_xprt_get(rqstp->rq_xprt);
>> dr->xprt = rqstp->rq_xprt;
>> set_bit(RQ_DROPME, &rqstp->rq_flags);
>>
>> dr->handle.revisit = svc_revisit;
>> - trace_svc_defer(rqstp);
>> return &dr->handle;
>> }
>>
>> /*
>> * recv data from a deferred request into an active one
>> */
>> -static int svc_deferred_recv(struct svc_rqst *rqstp)
>> +static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
>> {
>> struct svc_deferred_req *dr = rqstp->rq_deferred;
>>
>> + trace_svc_defer_recv(dr);
>> +
>> /* setup iov_base past transport header */
>> rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
>> /* The iov_len does not include the transport header bytes */
>> @@ -1259,7 +1260,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
>> struct svc_deferred_req,
>> handle.recent);
>> list_del_init(&dr->handle.recent);
>> - trace_svc_revisit_deferred(dr);
>> } else
>> clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
>> spin_unlock(&xprt->xpt_lock);

--
Chuck Lever