2020-05-01 17:34:01

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 0/7] RPC server tracepoints

Hi-

(cc: linux-nfs and linux-rdma)

A mix of RPC-over-sockets and RPC-over-RDMA observability
enhancements, along with one very interesting change:

"SUNRPC: Move xpt_mutex into socket xpo_sendto methods"

I've changed svc_send() so that it no longer holds the transport
mutex during the call to ->xpo_sendto. Instead, the socket
sendto methods take that mutex, since they need to serialize
sends on the socket. The RDMA sendto method does not need that
serialization, so the mutex is not taken in svc_rdma_sendto.

Performance results for that change are reported in the patch
description.

---

Chuck Lever (7):
SUNRPC: svc_show_status() macro needs enum definitions
SUNRPC: Move xpt_mutex into socket xpo_sendto methods
svcrdma: Displayed remote IP address should match stored address
SUNRPC: Remove kernel memory address from svc_xprt tracepoints
SUNRPC: Tracepoint to record errors in svc_xpo_create()
SUNRPC: Trace a few more generic svc_xprt events
svcrdma: Add tracepoints to report ->xpo_accept failures


include/linux/sunrpc/svc_xprt.h | 6 ++
include/trace/events/rpcrdma.h | 36 ++++----
include/trace/events/sunrpc.h | 95 ++++++++++++++++------
net/sunrpc/svc_xprt.c | 41 ++++------
net/sunrpc/svcsock.c | 37 ++++++---
net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 35 ++++----
net/sunrpc/xprtrdma/svc_rdma_sendto.c | 9 +-
net/sunrpc/xprtrdma/svc_rdma_transport.c | 55 +++++--------
net/sunrpc/xprtsock.c | 12 ++-
9 files changed, 187 insertions(+), 139 deletions(-)

--
Chuck Lever


2020-05-01 17:34:08

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 2/7] SUNRPC: Move xpt_mutex into socket xpo_sendto methods

It appears that the RPC/RDMA transport does not need serialization
of calls to its xpo_sendto method. Move the mutex into the socket
methods that still need that serialization.

Tail latencies are unambiguously better with this patch applied.
fio randrw 8KB 70/30 on NFSv3, smaller numbers are better:

clat percentiles (usec):

With xpt_mutex:
r | 99.99th=[ 8848]
w | 99.99th=[ 9634]

Without xpt_mutex:
r | 99.99th=[ 8586]
w | 99.99th=[ 8979]

Serializing the construction of RPC/RDMA transport headers is not
really necessary at this point, because the Linux NFS server
implementation never changes its credit grant on a connection. If
that should change, then svc_rdma_sendto will need to serialize
access to the transport's credit grant fields.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/svc_xprt.h | 6 +++++
net/sunrpc/svc_xprt.c | 12 ++--------
net/sunrpc/svcsock.c | 25 ++++++++++++++++++++
net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 35 +++++++++++++---------------
net/sunrpc/xprtrdma/svc_rdma_sendto.c | 9 ++-----
net/sunrpc/xprtsock.c | 12 ++++++++--
6 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 9e1e046de176..aca35ab5cff2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
return 0;
}

+static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt)
+{
+ return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) ||
+ (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0);
+}
+
int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e12ec68cd0ff..e8092225e784 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -914,16 +914,10 @@ int svc_send(struct svc_rqst *rqstp)
xb->page_len +
xb->tail[0].iov_len;
trace_svc_sendto(xb);
-
- /* Grab mutex to serialize outgoing data. */
- mutex_lock(&xprt->xpt_mutex);
trace_svc_stats_latency(rqstp);
- if (test_bit(XPT_DEAD, &xprt->xpt_flags)
- || test_bit(XPT_CLOSE, &xprt->xpt_flags))
- len = -ENOTCONN;
- else
- len = xprt->xpt_ops->xpo_sendto(rqstp);
- mutex_unlock(&xprt->xpt_mutex);
+
+ len = xprt->xpt_ops->xpo_sendto(rqstp);
+
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b3..3e7b6445e317 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -506,6 +506,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
* svc_udp_sendto - Send out a reply on a UDP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_udp_sendto(struct svc_rqst *rqstp)
@@ -531,6 +534,11 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)

svc_set_cmsg_data(rqstp, cmh);

+ mutex_lock(&xprt->xpt_mutex);
+
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
+
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
@@ -538,9 +546,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
}
+
+ mutex_unlock(&xprt->xpt_mutex);
if (err < 0)
return err;
return sent;
+
+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
}

static int svc_udp_has_wspace(struct svc_xprt *xprt)
@@ -1063,6 +1077,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_tcp_sendto(struct svc_rqst *rqstp)
@@ -1080,12 +1097,19 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)

svc_release_skb(rqstp);

+ mutex_lock(&xprt->xpt_mutex);
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
+ mutex_unlock(&xprt->xpt_mutex);
return sent;

+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
out_close:
pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
@@ -1093,6 +1117,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}

diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index af7eb8d202ae..d9aab4504f2c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -210,34 +210,31 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
return -ENOTCONN;
}

-/* Send an RPC call on the passive end of a transport
- * connection.
+/**
+ * xprt_rdma_bc_send_request - Send a reverse-direction Call
+ * @rqst: rpc_rqst containing Call message to be sent
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
-static int
-xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
+static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
{
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
- struct svcxprt_rdma *rdma;
+ struct svcxprt_rdma *rdma =
+ container_of(sxprt, struct svcxprt_rdma, sc_xprt);
int ret;

dprintk("svcrdma: sending bc call with xid: %08x\n",
be32_to_cpu(rqst->rq_xid));

- mutex_lock(&sxprt->xpt_mutex);
-
- ret = -ENOTCONN;
- rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
- ret = rpcrdma_bc_send_request(rdma, rqst);
- if (ret == -ENOTCONN)
- svc_close_xprt(sxprt);
- }
+ if (test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ return -ENOTCONN;

- mutex_unlock(&sxprt->xpt_mutex);
-
- if (ret < 0)
- return ret;
- return 0;
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+ if (ret == -ENOTCONN)
+ svc_close_xprt(sxprt);
+ return ret;
}

static void
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b6c8643867f2..b08152009a72 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -868,12 +868,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
__be32 *p;
int ret;

- /* Create the RDMA response header. xprt->xpt_mutex,
- * acquired in svc_send(), serializes RPC replies. The
- * code path below that inserts the credit grant value
- * into each transport header runs only inside this
- * critical section.
- */
+ if (svc_xprt_is_dead(xprt))
+ goto err0;
+
ret = -ENOMEM;
sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ec..839c49330785 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2548,8 +2548,16 @@ static int bc_sendto(struct rpc_rqst *req)
return sent;
}

-/*
- * The send routine. Borrows from svc_send
+/**
+ * bc_send_request - Send a backchannel Call on a TCP socket
+ * @req: rpc_rqst containing Call message to be sent
+ *
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
static int bc_send_request(struct rpc_rqst *req)
{

2020-05-01 17:34:24

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 4/7] SUNRPC: Remove kernel memory address from svc_xprt tracepoints

The xprt=%p was meant to distinguish events from different transports,
but the addr=%s does that just as well and does not expose kernel
memory addresses.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 27 +++++++--------------------
1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 287011041e92..282f703b3976 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1184,22 +1184,19 @@ TRACE_EVENT(svc_xprt_do_enqueue,
TP_ARGS(xprt, rqst),

TP_STRUCT__entry(
- __field(struct svc_xprt *, xprt)
__field(int, pid)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),

TP_fast_assign(
- __entry->xprt = xprt;
__entry->pid = rqst? rqst->rq_task->pid : 0;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),

- TP_printk("xprt=%p addr=%s pid=%d flags=%s",
- __entry->xprt, __get_str(addr),
- __entry->pid, show_svc_xprt_flags(__entry->flags))
+ TP_printk("addr=%s pid=%d flags=%s", __get_str(addr),
+ __entry->pid, show_svc_xprt_flags(__entry->flags))
);

DECLARE_EVENT_CLASS(svc_xprt_event,
@@ -1208,20 +1205,17 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
TP_ARGS(xprt),

TP_STRUCT__entry(
- __field(struct svc_xprt *, xprt)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),

TP_fast_assign(
- __entry->xprt = xprt;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),

- TP_printk("xprt=%p addr=%s flags=%s",
- __entry->xprt, __get_str(addr),
- show_svc_xprt_flags(__entry->flags))
+ TP_printk("addr=%s flags=%s", __get_str(addr),
+ show_svc_xprt_flags(__entry->flags))
);

DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
@@ -1234,24 +1228,20 @@ TRACE_EVENT(svc_xprt_dequeue,
TP_ARGS(rqst),

TP_STRUCT__entry(
- __field(struct svc_xprt *, xprt)
__field(unsigned long, flags)
__field(unsigned long, wakeup)
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),

TP_fast_assign(
- __entry->xprt = rqst->rq_xprt;
__entry->flags = rqst->rq_xprt->xpt_flags;
__entry->wakeup = ktime_to_us(ktime_sub(ktime_get(),
rqst->rq_qtime));
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),

- TP_printk("xprt=%p addr=%s flags=%s wakeup-us=%lu",
- __entry->xprt, __get_str(addr),
- show_svc_xprt_flags(__entry->flags),
- __entry->wakeup)
+ TP_printk("addr=%s flags=%s wakeup-us=%lu", __get_str(addr),
+ show_svc_xprt_flags(__entry->flags), __entry->wakeup)
);

TRACE_EVENT(svc_wake_up,
@@ -1276,21 +1266,18 @@ TRACE_EVENT(svc_handle_xprt,
TP_ARGS(xprt, len),

TP_STRUCT__entry(
- __field(struct svc_xprt *, xprt)
__field(int, len)
__field(unsigned long, flags)
__string(addr, xprt->xpt_remotebuf)
),

TP_fast_assign(
- __entry->xprt = xprt;
__entry->len = len;
__entry->flags = xprt->xpt_flags;
__assign_str(addr, xprt->xpt_remotebuf);
),

- TP_printk("xprt=%p addr=%s len=%d flags=%s",
- __entry->xprt, __get_str(addr),
+ TP_printk("addr=%s len=%d flags=%s", __get_str(addr),
__entry->len, show_svc_xprt_flags(__entry->flags))
);


2020-05-01 17:34:59

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 1/7] SUNRPC: svc_show_status() macro needs enum definitions

Clean up: Add missing TRACE_DEFINE_ENUMs in
include/trace/events/sunrpc.h

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 3158b3f7e01e..287011041e92 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1024,6 +1024,17 @@ TRACE_EVENT(svc_recv,
show_rqstp_flags(__entry->flags))
);

+TRACE_DEFINE_ENUM(SVC_GARBAGE);
+TRACE_DEFINE_ENUM(SVC_SYSERR);
+TRACE_DEFINE_ENUM(SVC_VALID);
+TRACE_DEFINE_ENUM(SVC_NEGATIVE);
+TRACE_DEFINE_ENUM(SVC_OK);
+TRACE_DEFINE_ENUM(SVC_DROP);
+TRACE_DEFINE_ENUM(SVC_CLOSE);
+TRACE_DEFINE_ENUM(SVC_DENIED);
+TRACE_DEFINE_ENUM(SVC_PENDING);
+TRACE_DEFINE_ENUM(SVC_COMPLETE);
+
#define svc_show_status(status) \
__print_symbolic(status, \
{ SVC_GARBAGE, "SVC_GARBAGE" }, \

2020-05-01 17:35:00

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 7/7] svcrdma: Add tracepoints to report ->xpo_accept failures

Failure to accept a connection is typically due to a problem
specific to a transport type. Also, ->xpo_accept returns NULL
on error rather than reporting a specific problem.

So, add failure-specific tracepoints in svc_rdma_accept().

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/rpcrdma.h | 37 ++++++++++++++++++++++++++++++
net/sunrpc/xprtrdma/svc_rdma_transport.c | 25 +++++++++-----------
2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 6de7a9202a68..c25e11564598 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -1283,6 +1283,43 @@ TRACE_EVENT(xprtrdma_leaked_rep,
** Server-side RPC/RDMA events
**/

+DECLARE_EVENT_CLASS(svcrdma_accept_class,
+ TP_PROTO(
+ const struct svcxprt_rdma *rdma,
+ long status
+ ),
+
+ TP_ARGS(rdma, status),
+
+ TP_STRUCT__entry(
+ __field(long, status)
+ __string(addr, rdma->sc_xprt.xpt_remotebuf)
+ ),
+
+ TP_fast_assign(
+ __entry->status = status;
+ __assign_str(addr, rdma->sc_xprt.xpt_remotebuf);
+ ),
+
+ TP_printk("addr=%s status=%ld",
+ __get_str(addr), __entry->status
+ )
+);
+
+#define DEFINE_ACCEPT_EVENT(name) \
+ DEFINE_EVENT(svcrdma_accept_class, svcrdma_##name##_err, \
+ TP_PROTO( \
+ const struct svcxprt_rdma *rdma, \
+ long status \
+ ), \
+ TP_ARGS(rdma, status))
+
+DEFINE_ACCEPT_EVENT(pd);
+DEFINE_ACCEPT_EVENT(qp);
+DEFINE_ACCEPT_EVENT(fabric);
+DEFINE_ACCEPT_EVENT(initdepth);
+DEFINE_ACCEPT_EVENT(accept);
+
TRACE_DEFINE_ENUM(RDMA_MSG);
TRACE_DEFINE_ENUM(RDMA_NOMSG);
TRACE_DEFINE_ENUM(RDMA_MSGP);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index aa68dc706006..d38be57b00ed 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -400,9 +400,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!newxprt)
return NULL;

- dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
- newxprt, newxprt->sc_cm_id);
-
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;

@@ -438,21 +435,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)

newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
- dprintk("svcrdma: error creating PD for connect request\n");
+ trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd));
goto errout;
}
newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
IB_POLL_WORKQUEUE);
- if (IS_ERR(newxprt->sc_sq_cq)) {
- dprintk("svcrdma: error creating SQ CQ for connect request\n");
+ if (IS_ERR(newxprt->sc_sq_cq))
goto errout;
- }
newxprt->sc_rq_cq =
ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
- if (IS_ERR(newxprt->sc_rq_cq)) {
- dprintk("svcrdma: error creating RQ CQ for connect request\n");
+ if (IS_ERR(newxprt->sc_rq_cq))
goto errout;
- }

memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.event_handler = qp_event_handler;
@@ -476,7 +469,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)

ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
- dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
+ trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
newxprt->sc_qp = newxprt->sc_cm_id->qp;
@@ -484,8 +477,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
newxprt->sc_snd_w_inv = false;
if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
- !rdma_ib_or_roce(dev, newxprt->sc_port_num))
+ !rdma_ib_or_roce(dev, newxprt->sc_port_num)) {
+ trace_svcrdma_fabric_err(newxprt, -EINVAL);
goto errout;
+ }

if (!svc_rdma_post_recvs(newxprt))
goto errout;
@@ -507,15 +502,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
dev->attrs.max_qp_init_rd_atom);
if (!conn_param.initiator_depth) {
- dprintk("svcrdma: invalid ORD setting\n");
ret = -EINVAL;
+ trace_svcrdma_initdepth_err(newxprt, ret);
goto errout;
}
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
- if (ret)
+ if (ret) {
+ trace_svcrdma_accept_err(newxprt, ret);
goto errout;
+ }

#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
dprintk("svcrdma: new connection %p accepted:\n", newxprt);

2020-05-01 17:35:24

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 5/7] SUNRPC: Tracepoint to record errors in svc_xpo_create()

This enables capture of failures for an audit log.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 29 +++++++++++++++++++++++++++++
net/sunrpc/svc_xprt.c | 7 ++++++-
2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 282f703b3976..5bcc0089e9a5 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1178,6 +1178,35 @@ DEFINE_EVENT(svc_rqst_status, svc_send,
{ (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \
{ (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"})

+TRACE_EVENT(svc_xprt_create_err,
+ TP_PROTO(
+ const char *program,
+ const char *protocol,
+ struct sockaddr *sap,
+ const struct svc_xprt *xprt
+ ),
+
+ TP_ARGS(program, protocol, sap, xprt),
+
+ TP_STRUCT__entry(
+ __field(long, error)
+ __string(program, program)
+ __string(protocol, protocol)
+ __array(unsigned char, addr, sizeof(struct sockaddr_in6))
+ ),
+
+ TP_fast_assign(
+ __entry->error = PTR_ERR(xprt);
+ __assign_str(program, program);
+ __assign_str(protocol, protocol);
+ memcpy(__entry->addr, sap, sizeof(__entry->addr));
+ ),
+
+ TP_printk("addr=%pISpc program=%s protocol=%s error=%ld",
+ __entry->addr, __get_str(program), __get_str(protocol),
+ __entry->error)
+);
+
TRACE_EVENT(svc_xprt_do_enqueue,
TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst),

diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e8092225e784..c444c01c6b6b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -206,6 +206,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
.sin6_port = htons(port),
};
#endif
+ struct svc_xprt *xprt;
struct sockaddr *sap;
size_t len;

@@ -224,7 +225,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return ERR_PTR(-EAFNOSUPPORT);
}

- return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
+ xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
+ if (IS_ERR(xprt))
+ trace_svc_xprt_create_err(serv->sv_program->pg_name,
+ xcl->xcl_name, sap, xprt);
+ return xprt;
}

/*

2020-05-01 17:36:32

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 3/7] svcrdma: Displayed remote IP address should match stored address

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/svc_rdma_transport.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ea54785db4f8..0a1125277a48 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -211,7 +211,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
newxprt->sc_ord = param->initiator_depth;

sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
- svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+ newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa);
+ memcpy(&newxprt->sc_xprt.xpt_remote, sa,
+ newxprt->sc_xprt.xpt_remotelen);
+ snprintf(newxprt->sc_xprt.xpt_remotebuf,
+ sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa);
+
/* The remote port is arbitrary and not under the control of the
* client ULP. Set it to a fixed value so that the DRC continues
* to be effective after a reconnect.

2020-05-01 17:37:13

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 6/7] SUNRPC: Trace a few more generic svc_xprt events

In lieu of dprintks or tracepoints in each individual transport
implementation, introduce tracepoints in the generic part of the
RPC layer. These typically fire for connection lifetime events,
so shouldn't contribute a lot of noise.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/rpcrdma.h | 33 -------------------------
include/trace/events/sunrpc.h | 39 ++++++++++++++++++++++++++++--
net/sunrpc/svc_xprt.c | 22 ++++-------------
net/sunrpc/svcsock.c | 12 ---------
net/sunrpc/xprtrdma/svc_rdma_transport.c | 23 +++---------------
5 files changed, 45 insertions(+), 84 deletions(-)

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 596e0a803477..6de7a9202a68 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -1283,39 +1283,6 @@ TRACE_EVENT(xprtrdma_leaked_rep,
** Server-side RPC/RDMA events
**/

-DECLARE_EVENT_CLASS(svcrdma_xprt_event,
- TP_PROTO(
- const struct svc_xprt *xprt
- ),
-
- TP_ARGS(xprt),
-
- TP_STRUCT__entry(
- __field(const void *, xprt)
- __string(addr, xprt->xpt_remotebuf)
- ),
-
- TP_fast_assign(
- __entry->xprt = xprt;
- __assign_str(addr, xprt->xpt_remotebuf);
- ),
-
- TP_printk("xprt=%p addr=%s",
- __entry->xprt, __get_str(addr)
- )
-);
-
-#define DEFINE_XPRT_EVENT(name) \
- DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name, \
- TP_PROTO( \
- const struct svc_xprt *xprt \
- ), \
- TP_ARGS(xprt))
-
-DEFINE_XPRT_EVENT(accept);
-DEFINE_XPRT_EVENT(fail);
-DEFINE_XPRT_EVENT(free);
-
TRACE_DEFINE_ENUM(RDMA_MSG);
TRACE_DEFINE_ENUM(RDMA_NOMSG);
TRACE_DEFINE_ENUM(RDMA_MSGP);
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5bcc0089e9a5..561a438bcc66 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1247,9 +1247,42 @@ DECLARE_EVENT_CLASS(svc_xprt_event,
show_svc_xprt_flags(__entry->flags))
);

-DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space,
- TP_PROTO(struct svc_xprt *xprt),
- TP_ARGS(xprt));
+#define DEFINE_SVC_XPRT_EVENT(name) \
+ DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \
+ TP_PROTO( \
+ struct svc_xprt *xprt \
+ ), \
+ TP_ARGS(xprt))
+
+DEFINE_SVC_XPRT_EVENT(no_write_space);
+DEFINE_SVC_XPRT_EVENT(close);
+DEFINE_SVC_XPRT_EVENT(detach);
+DEFINE_SVC_XPRT_EVENT(free);
+
+TRACE_EVENT(svc_xprt_accept,
+ TP_PROTO(
+ const struct svc_xprt *xprt,
+ const char *service
+ ),
+
+ TP_ARGS(xprt, service),
+
+ TP_STRUCT__entry(
+ __string(addr, xprt->xpt_remotebuf)
+ __string(protocol, xprt->xpt_class->xcl_name)
+ __string(service, service)
+ ),
+
+ TP_fast_assign(
+ __assign_str(addr, xprt->xpt_remotebuf);
+ __assign_str(protocol, xprt->xpt_class->xcl_name)
+ __assign_str(service, service);
+ ),
+
+ TP_printk("addr=%s protocol=%s service=%s",
+ __get_str(addr), __get_str(protocol), __get_str(service)
+ )
+);

TRACE_EVENT(svc_xprt_dequeue,
TP_PROTO(struct svc_rqst *rqst),
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c444c01c6b6b..c1ff8cdb5b2b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -153,6 +153,7 @@ static void svc_xprt_free(struct kref *kref)
xprt_put(xprt->xpt_bc_xprt);
if (xprt->xpt_bc_xps)
xprt_switch_put(xprt->xpt_bc_xps);
+ trace_svc_xprt_free(xprt);
xprt->xpt_ops->xpo_free(xprt);
module_put(owner);
}
@@ -309,15 +310,11 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
{
int err;

- dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
if (err == -EPROTONOSUPPORT) {
request_module("svc%s", xprt_name);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
}
- if (err < 0)
- dprintk("svc: transport %s not found, err %d\n",
- xprt_name, -err);
return err;
}
EXPORT_SYMBOL_GPL(svc_create_xprt);
@@ -785,7 +782,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
int len = 0;

if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
- dprintk("svc_recv: found XPT_CLOSE\n");
if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags))
xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
svc_delete_xprt(xprt);
@@ -804,6 +800,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
if (newxpt) {
newxpt->xpt_cred = get_cred(xprt->xpt_cred);
svc_add_new_temp_xprt(serv, newxpt);
+ trace_svc_xprt_accept(newxpt, serv->sv_name);
} else
module_put(xprt->xpt_class->xcl_owner);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
@@ -840,14 +837,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
struct svc_serv *serv = rqstp->rq_server;
int len, err;

- dprintk("svc: server %p waiting for data (to = %ld)\n",
- rqstp, timeout);
-
- if (rqstp->rq_xprt)
- printk(KERN_ERR
- "svc_recv: service %p, transport not NULL!\n",
- rqstp);
-
err = svc_alloc_arg(rqstp);
if (err)
goto out;
@@ -895,7 +884,6 @@ EXPORT_SYMBOL_GPL(svc_recv);
void svc_drop(struct svc_rqst *rqstp)
{
trace_svc_drop(rqstp);
- dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp);
}
EXPORT_SYMBOL_GPL(svc_drop);
@@ -1030,11 +1018,10 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
struct svc_serv *serv = xprt->xpt_server;
struct svc_deferred_req *dr;

- /* Only do this once */
if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
- BUG();
+ return;

- dprintk("svc: svc_delete_xprt(%p)\n", xprt);
+ trace_svc_xprt_detach(xprt);
xprt->xpt_ops->xpo_detach(xprt);
if (xprt->xpt_bc_xprt)
xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt);
@@ -1055,6 +1042,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)

void svc_close_xprt(struct svc_xprt *xprt)
{
+ trace_svc_xprt_close(xprt);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
/* someone else will have to effect the close */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3e7b6445e317..cf4bd198c19d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -727,7 +727,6 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
int err, slen;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);

- dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
return NULL;

@@ -1363,11 +1362,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
int newlen;
int family;
int val;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-
- dprintk("svc: svc_create_socket(%s, %d, %s)\n",
- serv->sv_program->pg_name, protocol,
- __svc_print_addr(sin, buf, sizeof(buf)));

if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
@@ -1427,7 +1421,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
return (struct svc_xprt *)svsk;
bummer:
- dprintk("svc: svc_create_socket error = %d\n", -error);
sock_release(sock);
return ERR_PTR(error);
}
@@ -1441,8 +1434,6 @@ static void svc_sock_detach(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk;

- dprintk("svc: svc_sock_detach(%p)\n", svsk);
-
/* put back the old socket callbacks */
lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate;
@@ -1459,8 +1450,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);

- dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk);
-
svc_sock_detach(xprt);

if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
@@ -1475,7 +1464,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- dprintk("svc: svc_sock_free(%p)\n", svsk);

if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 0a1125277a48..aa68dc706006 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -314,11 +314,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct svcxprt_rdma *cma_xprt;
int ret;

- dprintk("svcrdma: Creating RDMA listener\n");
- if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
- dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
+ if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
- }
cma_xprt = svc_rdma_create_xprt(serv, net);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
@@ -329,7 +326,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
- dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
goto err0;
}

@@ -338,23 +334,17 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
*/
#if IS_ENABLED(CONFIG_IPV6)
ret = rdma_set_afonly(listen_id, 1);
- if (ret) {
- dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
+ if (ret)
goto err1;
- }
#endif
ret = rdma_bind_addr(listen_id, sa);
- if (ret) {
- dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
+ if (ret)
goto err1;
- }
cma_xprt->sc_cm_id = listen_id;

ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
- if (ret) {
- dprintk("svcrdma: rdma_listen failed = %d\n", ret);
+ if (ret)
goto err1;
- }

/*
* We need to use the address from the cm_id in case the
@@ -540,12 +530,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" ord : %d\n", conn_param.initiator_depth);
#endif

- trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt;

errout:
- dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
- trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
/* Take a reference in case the DTO handler runs */
svc_xprt_get(&newxprt->sc_xprt);
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
@@ -583,8 +570,6 @@ static void __svc_rdma_free(struct work_struct *work)
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;

- trace_svcrdma_xprt_free(xprt);
-
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);