2018-01-16 14:28:44

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 0/2] add tracepoints for nvme command submission and completion

Add tracepoints for nvme command submission and completion. The tracepoints
are modeled after SCSI's trace_scsi_dispatch_cmd_start() and
trace_scsi_dispatch_cmd_done() tracepoints and fulfil a similar purpose,
namely a fast way to check which command is going to be queued into the HW or
Fabric driver and which command is completed again.

Here's an example output using the qemu emulated pci nvme:

# tracer: nop
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
dd-205 [002] .... 7.128368: nvme_setup_cmd: nsid=1, command_id=568, flags=0x0, metadata=0x0, cmd=(nvme_cmd_read slba=0, length=31, control=0x0, dsmgmt=0, reftag=0)
<idle>-0 [002] d.h. 7.129098: nvme_complete_rq: command_id=568, result=0, retries=0, flags=0x0, status=0
dd-205 [002] .... 7.129150: nvme_setup_cmd: nsid=1, command_id=568, flags=0x0, metadata=0x0, cmd=(nvme_cmd_read slba=32, length=63, control=0x0, dsmgmt=0, reftag=0)
dd-205 [002] .... 7.129189: nvme_setup_cmd: nsid=1, command_id=569, flags=0x0, metadata=0x0, cmd=(nvme_cmd_read slba=96, length=127, control=0x0, dsmgmt=0, reftag=0)
<idle>-0 [002] d.h. 7.129318: nvme_complete_rq: command_id=568, result=0, retries=0, flags=0x0, status=0
dd-205 [002] d.h. 7.129355: nvme_complete_rq: command_id=569, result=0, retries=0, flags=0x0, status=0
dd-206 [003] .... 7.137389: nvme_setup_cmd: nsid=1, command_id=22, flags=0x0, metadata=0x0, cmd=(nvme_cmd_write slba=0, length=2559, control=0x0, dsmgmt=0, reftag=0)
dd-206 [003] .... 7.137888: nvme_setup_cmd: nsid=1, command_id=23, flags=0x0, metadata=0x0, cmd=(nvme_cmd_write slba=2560, length=2559, control=0x0, dsmgmt=0, reftag=0)
dd-206 [003] .... 7.138365: nvme_setup_cmd: nsid=1, command_id=24, flags=0x0, metadata=0x0, cmd=(nvme_cmd_write slba=5120, length=2559, control=0x0, dsmgmt=0, reftag=0)
dd-206 [003] d.h. 7.138634: nvme_complete_rq: command_id=22, result=0, retries=0, flags=0x0, status=0
dd-206 [003] .... 7.139014: nvme_setup_cmd: nsid=1, command_id=25, flags=0x0, metadata=0x0, cmd=(nvme_cmd_write slba=7680, length=2559, control=0x0, dsmgmt=0, reftag=0)
<idle>-0 [003] d.h. 7.139658: nvme_complete_rq: command_id=23, result=0, retries=0, flags=0x0, status=0
<idle>-0 [003] d.h. 7.140650: nvme_complete_rq: command_id=24, result=0, retries=0, flags=0x0, status=0
<idle>-0 [003] d.h. 7.141670: nvme_complete_rq: command_id=25, result=0, retries=0, flags=0x0, status=0

Johannes Thumshirn (2):
nvme: add tracepoint for nvme_setup_cmd
nvme: add tracepoint for nvme_complete_rq

drivers/nvme/host/core.c | 71 ++++++++++++++++++++++++++++++++
drivers/nvme/host/trace.h | 100 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 171 insertions(+)
create mode 100644 drivers/nvme/host/trace.h

--
2.12.3


2018-01-16 14:28:43

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 1/2] nvme: add tracepoint for nvme_setup_cmd

Signed-off-by: Johannes Thumshirn <[email protected]>

---
Changes to v1:
* Fix typo (Hannes)
* move include/trace/events/nvme.h -> drivers/nvme/host/trace.h (Christoph)
---
drivers/nvme/host/core.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/nvme/host/trace.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 138 insertions(+)
create mode 100644 drivers/nvme/host/trace.h

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 839650e0926a..776fe7c0a411 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -29,6 +29,9 @@
#include <linux/pm_qos.h>
#include <asm/unaligned.h>

+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#include "nvme.h"
#include "fabrics.h"

@@ -80,6 +83,71 @@ static struct class *nvme_subsys_class;
static void nvme_ns_remove(struct nvme_ns *ns);
static int nvme_revalidate_disk(struct gendisk *disk);

+static const char *nvme_trace_read_write(struct trace_seq *p,
+ struct nvme_rw_command rw)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "slba=%llu, length=%u, control=0x%x, dsmgmt=%u, reftag=%u",
+ (unsigned long long)le64_to_cpu(rw.slba),
+ le16_to_cpu(rw.length), le16_to_cpu(rw.control),
+ le32_to_cpu(rw.dsmgmt), le32_to_cpu(rw.reftag));
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_dsm(struct trace_seq *p, struct nvme_dsm_cmd dsm)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "nr=%u, attributes=%u",
+ le32_to_cpu(dsm.nr), le32_to_cpu(dsm.attributes));
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_write_zeroes(struct trace_seq *p,
+ struct nvme_write_zeroes_cmd wz)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "slba=%llu, length=%u, control=%u, dsmgmt=%u, reftag=%u",
+ (unsigned long long) le64_to_cpu(wz.slba),
+ le16_to_cpu(wz.length), le16_to_cpu(wz.control),
+ le32_to_cpu(wz.dsmgmt), le32_to_cpu(wz.reftag));
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_common(struct trace_seq *p,
+ struct nvme_common_command c)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+
+ trace_seq_printf(p, "cdw2=%*ph, cdw10=%*ph", 4, c.cdw2, 24, c.cdw10);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+const char *nvme_trace_parse_cmd(struct trace_seq *p, struct nvme_command cmd)
+{
+ switch (cmd.common.opcode) {
+ case nvme_cmd_read:
+ case nvme_cmd_write:
+ return nvme_trace_read_write(p, cmd.rw);
+ case nvme_cmd_dsm:
+ return nvme_trace_dsm(p, cmd.dsm);
+ case nvme_cmd_write_zeroes:
+ return nvme_trace_write_zeroes(p, cmd.write_zeroes);
+ default:
+ return nvme_trace_common(p, cmd.common);
+ }
+}
+
static __le32 nvme_get_log_dw10(u8 lid, size_t size)
{
return cpu_to_le32((((size / 4) - 1) << 16) | lid);
@@ -591,6 +659,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
}

cmd->common.command_id = req->tag;
+ trace_nvme_setup_cmd(cmd);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_setup_cmd);
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
new file mode 100644
index 000000000000..7b6bb1b5995e
--- /dev/null
+++ b/drivers/nvme/host/trace.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM nvme
+
+#if !defined(_TRACE_NVME_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NVME_H
+
+#include <linux/nvme.h>
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#define nvme_opcode_name(opcode) { opcode, #opcode }
+#define show_opcode_name(val) \
+ __print_symbolic(val, \
+ nvme_opcode_name(nvme_cmd_flush), \
+ nvme_opcode_name(nvme_cmd_write), \
+ nvme_opcode_name(nvme_cmd_read), \
+ nvme_opcode_name(nvme_cmd_write_uncor), \
+ nvme_opcode_name(nvme_cmd_compare), \
+ nvme_opcode_name(nvme_cmd_write_zeroes), \
+ nvme_opcode_name(nvme_cmd_dsm), \
+ nvme_opcode_name(nvme_cmd_resv_register), \
+ nvme_opcode_name(nvme_cmd_resv_report), \
+ nvme_opcode_name(nvme_cmd_resv_acquire), \
+ nvme_opcode_name(nvme_cmd_resv_release))
+
+const char *nvme_trace_parse_cmd(struct trace_seq *p, struct nvme_command cmnd);
+#define __parse_nvme_cmd(cmd) nvme_trace_parse_cmd(p, cmd)
+
+TRACE_EVENT(nvme_setup_cmd,
+
+ TP_PROTO(struct nvme_command *cmd),
+
+ TP_ARGS(cmd),
+
+ TP_STRUCT__entry(
+ __field( __u8, opcode )
+ __field( __u8, flags )
+ __field( __u16, cid )
+ __field( __le32, nsid )
+ __field( __le64, metadata )
+ __field_struct( struct nvme_command, cmnd )
+ ),
+
+ TP_fast_assign(
+ __entry->opcode = cmd->common.opcode;
+ __entry->flags = cmd->common.flags;
+ __entry->cid = cmd->common.command_id;
+ __entry->nsid = cmd->common.nsid;
+ __entry->metadata = cmd->common.metadata;
+ memcpy(&__entry->cmnd, cmd, sizeof(__entry->cmnd));
+ ),
+
+ TP_printk("nsid=%u, command_id=%u, flags=0x%x, metadata=0x%llx, cmd=(%s %s)",
+ le32_to_cpu(__entry->nsid), __entry->cid, __entry->flags,
+ (unsigned long long) le64_to_cpu(__entry->metadata),
+ show_opcode_name(__entry->opcode),
+ __parse_nvme_cmd(__entry->cmnd))
+);
+
+#endif /* _TRACE_NVME_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/nvme/host
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
2.12.3

2018-01-16 14:28:41

by Johannes Thumshirn

[permalink] [raw]
Subject: [PATCH v2 2/2] nvme: add tracepoint for nvme_complete_rq

Signed-off-by: Johannes Thumshirn <[email protected]>
Reviewed-by: Hannes Reinecke <[email protected]>
---
drivers/nvme/host/core.c | 2 ++
drivers/nvme/host/trace.h | 31 +++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 776fe7c0a411..d62b4d429921 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -258,6 +258,8 @@ static inline bool nvme_req_needs_retry(struct request *req)

void nvme_complete_rq(struct request *req)
{
+ trace_nvme_complete_rq(nvme_req(req), req->tag);
+
if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
if (nvme_req_needs_failover(req)) {
nvme_failover_req(req);
diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h
index 7b6bb1b5995e..c1596aec37fd 100644
--- a/drivers/nvme/host/trace.h
+++ b/drivers/nvme/host/trace.h
@@ -9,6 +9,8 @@
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>

+#include "nvme.h"
+
#define nvme_opcode_name(opcode) { opcode, #opcode }
#define show_opcode_name(val) \
__print_symbolic(val, \
@@ -58,6 +60,35 @@ TRACE_EVENT(nvme_setup_cmd,
__parse_nvme_cmd(__entry->cmnd))
);

+TRACE_EVENT(nvme_complete_rq,
+
+ TP_PROTO(struct nvme_request *req, int tag),
+
+ TP_ARGS(req, tag),
+
+ TP_STRUCT__entry(
+ __field( int, cid )
+ __field( __le64, result )
+ __field( u8, retries )
+ __field( u8, flags )
+ __field( u16, status )
+ ),
+
+ TP_fast_assign(
+ __entry->cid = tag;
+ __entry->result = req->result.u64;
+ __entry->retries = req->retries;
+ __entry->flags = req->flags;
+ __entry->status = req->status;
+ ),
+
+ TP_printk("command_id=%u, result=%llu, retries=%u, flags=0x%x, status=%u",
+ __entry->cid,
+ (unsigned long long)le64_to_cpu(__entry->result),
+ __entry->retries, __entry->flags, __entry->status)
+
+);
+
#endif /* _TRACE_NVME_H */

#undef TRACE_INCLUDE_PATH
--
2.12.3

2018-01-16 14:52:43

by Keith Busch

[permalink] [raw]
Subject: Re: [PATCH v2 0/2] add tracepoints for nvme command submission and completion

On Tue, Jan 16, 2018 at 03:28:19PM +0100, Johannes Thumshirn wrote:
> Add tracepoints for nvme command submission and completion. The tracepoints
> are modeled after SCSI's trace_scsi_dispatch_cmd_start() and
> trace_scsi_dispatch_cmd_done() tracepoints and fulfil a similar purpose,
> namely a fast way to check which command is going to be queued into the HW or
> Fabric driver and which command is completed again.

I like this very much, thanks for doing this. I think you could make the
submission trace point tighter for PCI as Hannes was suggesting since
an MMIO write can't fail, but doesn't look as doable for FC and RDMA.

2018-01-16 15:00:06

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH v2 0/2] add tracepoints for nvme command submission and completion

On Tue, Jan 16, 2018 at 07:55:55AM -0700, Keith Busch wrote:
> On Tue, Jan 16, 2018 at 03:28:19PM +0100, Johannes Thumshirn wrote:
> > Add tracepoints for nvme command submission and completion. The tracepoints
> > are modeled after SCSI's trace_scsi_dispatch_cmd_start() and
> > trace_scsi_dispatch_cmd_done() tracepoints and fulfil a similar purpose,
> > namely a fast way to check which command is going to be queued into the HW or
> > Fabric driver and which command is completed again.
>
> I like this very much, thanks for doing this. I think you could make the
> submission trace point tighter for PCI as Hannes was suggesting since
> an MMIO write can't fail, but doesn't look as doable for FC and RDMA.

That's what I concluded as well and I wanted to have some sort of smallest
common divisor when tracing customer problems on nvme.

I think I can come up with additional tracepoints in pci/rdma/fc but these can
be added later as well and should not hold up this series.

Byte,
Johannes
--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

2018-01-16 16:49:57

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] nvme: add tracepoint for nvme_setup_cmd

> + trace_seq_printf(p, "slba=%llu, length=%u, control=0x%x, dsmgmt=%u, reftag=%u",

Overly long line.

> + (unsigned long long)le64_to_cpu(rw.slba),

u64 now always is an unsigned long long, no need for the cast.

> + le16_to_cpu(rw.length), le16_to_cpu(rw.control),
> + le32_to_cpu(rw.dsmgmt), le32_to_cpu(rw.reftag));
> + trace_seq_putc(p, 0);

This look weird, is there a a good reason for putting a 0 character?

Also I'd be tempted to add a new trace.c file for these and make them
dependent on whatever kernel option enables the tracing.

> --- /dev/null
> +++ b/drivers/nvme/host/trace.h
> @@ -0,0 +1,69 @@
> +/* SPDX-License-Identifier: GPL-2.0 */

Please add a real copyright header including the author.

const char *nvme_trace_parse_cmd(struct trace_seq *p, struct nvme_command cmnd);

> +#define __parse_nvme_cmd(cmd) nvme_trace_parse_cmd(p, cmd)

This looks weird.

> +TRACE_EVENT(nvme_setup_cmd,
> +
> + TP_PROTO(struct nvme_command *cmd),
> +
> + TP_ARGS(cmd),
> +

Please remove these empty lines.

> + TP_STRUCT__entry(
> + __field( __u8, opcode )
> + __field( __u8, flags )
> + __field( __u16, cid )
> + __field( __le32, nsid )
> + __field( __le64, metadata )

> + __field_struct( struct nvme_command, cmnd )
And the additional whitespaces before and after the braces

> + TP_fast_assign(
> + __entry->opcode = cmd->common.opcode;
> + __entry->flags = cmd->common.flags;
> + __entry->cid = cmd->common.command_id;
> + __entry->nsid = cmd->common.nsid;
> + __entry->metadata = cmd->common.metadata;
> + memcpy(&__entry->cmnd, cmd, sizeof(__entry->cmnd));

Copying the whole SQE in tracing seems rather excessive.

> + TP_printk("nsid=%u, command_id=%u, flags=0x%x, metadata=0x%llx, cmd=(%s %s)",

Too long line.

> + le32_to_cpu(__entry->nsid), __entry->cid, __entry->flags,
> + (unsigned long long) le64_to_cpu(__entry->metadata),

no need for the cast.

> +#undef TRACE_INCLUDE_PATH
> +#define TRACE_INCLUDE_PATH ../../drivers/nvme/host

This seems wrong. Instead you should make sure we include the srcdir
in the makefile, take a look at fs/xfs/Makefile for example.

2018-01-16 16:50:30

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] nvme: add tracepoint for nvme_complete_rq

> void nvme_complete_rq(struct request *req)
> {
> + trace_nvme_complete_rq(nvme_req(req), req->tag);
> +

Just pass the struct request?

2018-01-17 07:46:50

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] nvme: add tracepoint for nvme_setup_cmd

On Tue, Jan 16, 2018 at 05:49:54PM +0100, Christoph Hellwig wrote:
> > + trace_seq_printf(p, "slba=%llu, length=%u, control=0x%x, dsmgmt=%u, reftag=%u",
>
> Overly long line.
>
> > + (unsigned long long)le64_to_cpu(rw.slba),
>
> u64 now always is an unsigned long long, no need for the cast.

Ah OK. Didn't know that. Will fix.

> > + le16_to_cpu(rw.length), le16_to_cpu(rw.control),
> > + le32_to_cpu(rw.dsmgmt), le32_to_cpu(rw.reftag));
> > + trace_seq_putc(p, 0);
>
> This look weird, is there a a good reason for putting a 0 character?
>
> Also I'd be tempted to add a new trace.c file for these and make them
> dependent on whatever kernel option enables the tracing.

I was considering this as well so let's just do it.

>
> > --- /dev/null
> > +++ b/drivers/nvme/host/trace.h
> > @@ -0,0 +1,69 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
>
> Please add a real copyright header including the author.

I thought this is the new standard. OK will add a copyright header.

> const char *nvme_trace_parse_cmd(struct trace_seq *p, struct nvme_command cmnd);
>
> > +#define __parse_nvme_cmd(cmd) nvme_trace_parse_cmd(p, cmd)
>
> This looks weird.

Why? For instance that's exactly what SCSI does as well and nicely hides the
implicit "p".

> > +TRACE_EVENT(nvme_setup_cmd,
> > +
> > + TP_PROTO(struct nvme_command *cmd),
> > +
> > + TP_ARGS(cmd),
> > +
>
> Please remove these empty lines.

Standard coding style in include/trace/events/ that's why I adopted it.

> > + TP_STRUCT__entry(
> > + __field( __u8, opcode )
> > + __field( __u8, flags )
> > + __field( __u16, cid )
> > + __field( __le32, nsid )
> > + __field( __le64, metadata )
>
> > + __field_struct( struct nvme_command, cmnd )
> And the additional whitespaces before and after the braces

See above.

> > + TP_fast_assign(
> > + __entry->opcode = cmd->common.opcode;
> > + __entry->flags = cmd->common.flags;
> > + __entry->cid = cmd->common.command_id;
> > + __entry->nsid = cmd->common.nsid;
> > + __entry->metadata = cmd->common.metadata;
> > + memcpy(&__entry->cmnd, cmd, sizeof(__entry->cmnd));
>
> Copying the whole SQE in tracing seems rather excessive.
>
> > + TP_printk("nsid=%u, command_id=%u, flags=0x%x, metadata=0x%llx, cmd=(%s %s)",
>
> Too long line.
>
> > + le32_to_cpu(__entry->nsid), __entry->cid, __entry->flags,
> > + (unsigned long long) le64_to_cpu(__entry->metadata),
>
> no need for the cast.
>
> > +#undef TRACE_INCLUDE_PATH
> > +#define TRACE_INCLUDE_PATH ../../drivers/nvme/host
>
> This seems wrong. Instead you should make sure we include the srcdir
> in the makefile, take a look at fs/xfs/Makefile for example.

That's what dma-buf, drm and ras do so I copied it over when moving the TP
header out of include/trace/events

--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850

2018-01-17 08:50:53

by Johannes Thumshirn

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] nvme: add tracepoint for nvme_complete_rq

On Tue, Jan 16, 2018 at 05:50:26PM +0100, Christoph Hellwig wrote:
> Just pass the struct request?

OK

--
Johannes Thumshirn Storage
[email protected] +49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 N?rnberg
GF: Felix Imend?rffer, Jane Smithard, Graham Norton
HRB 21284 (AG N?rnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850