2022-10-10 22:45:01

by Ira Weiny

[permalink] [raw]
Subject: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

From: Ira Weiny <[email protected]>

CXL rev 3.0 section 8.2.9.2.1.1 defines the General Media Event Record.

Determine if the event read is a general media record and if so trace
the record.

Signed-off-by: Ira Weiny <[email protected]>

---
Changes from RFC:
Add reserved byte array
Use common CXL event header record macros
Jonathan
Use unaligned_le{24,16} for unaligned fields
Don't use the inverse of phy addr mask
Dave Jiang
s/cxl_gen_media_event/general_media
s/cxl_evt_gen_media/cxl_event_gen_media
---
drivers/cxl/core/mbox.c | 30 ++++++++++-
drivers/cxl/cxlmem.h | 20 +++++++
include/trace/events/cxl.h | 108 +++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+), 2 deletions(-)

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index bc4e42b3e01b..1097250c115a 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -712,6 +712,32 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);

+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+static const uuid_t gen_media_event_uuid =
+ UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
+ 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
+
+static void cxl_trace_event_record(const char *dev_name,
+ enum cxl_event_log_type type,
+ struct cxl_get_event_payload *payload)
+{
+ uuid_t *id = &payload->record.hdr.id;
+
+ if (uuid_equal(id, &gen_media_event_uuid)) {
+ struct cxl_event_gen_media *rec =
+ (struct cxl_event_gen_media *)&payload->record;
+
+ trace_general_media(dev_name, type, rec);
+ return;
+ }
+
+ /* For unknown record types print just the header */
+ trace_generic_event(dev_name, type, &payload->record);
+}
+
static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
enum cxl_event_log_type log,
__le16 handle)
@@ -745,8 +771,8 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
}

if (le16_to_cpu(payload.record_count) == 1) {
- trace_generic_event(dev_name(cxlds->dev), type,
- &payload.record);
+ cxl_trace_event_record(dev_name(cxlds->dev), type,
+ &payload);
rc = cxl_clear_event_record(cxlds, type,
payload.record.hdr.handle);
if (rc) {
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 75aea34f3ffb..b5c120bd4068 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -411,6 +411,26 @@ struct cxl_mbox_clear_event_payload {
__le16 handle;
};

+/*
+ * General Media Event Record
+ * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
+ */
+#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
+#define CXL_EVENT_GEN_MED_RES_SIZE 0x2e
+struct cxl_event_gen_media {
+ struct cxl_event_record_hdr hdr;
+ __le64 phys_addr;
+ u8 descriptor;
+ u8 type;
+ u8 transaction_type;
+ u8 validity_flags[2];
+ u8 channel;
+ u8 rank;
+ u8 device[3];
+ u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
+ u8 reserved[CXL_EVENT_GEN_MED_RES_SIZE];
+} __packed;
+
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
index 318ba5fe046e..82a8d3b750a2 100644
--- a/include/trace/events/cxl.h
+++ b/include/trace/events/cxl.h
@@ -122,6 +122,114 @@ TRACE_EVENT(generic_event,
__print_hex(__entry->data, CXL_EVENT_RECORD_DATA_LENGTH))
);

+/*
+ * General Media Event Record - GMER
+ * CXL v2.0 Section 8.2.9.1.1.1; Table 154
+ */
+#define CXL_GMER_PHYS_ADDR_VOLATILE BIT(0)
+#define CXL_GMER_PHYS_ADDR_MASK 0xFFFFFFFFFFFFFF80
+
+#define CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT BIT(0)
+#define CXL_GMER_EVT_DESC_THRESHOLD_EVENT BIT(1)
+#define CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW BIT(2)
+#define show_event_desc_flags(flags) __print_flags(flags, "|", \
+ { CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT, "Uncorrectable Event" }, \
+ { CXL_GMER_EVT_DESC_THRESHOLD_EVENT, "Threshold event" }, \
+ { CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW, "Poison List Overflow" } \
+)
+
+#define CXL_GMER_MEM_EVT_TYPE_ECC_ERROR 0x00
+#define CXL_GMER_MEM_EVT_TYPE_INV_ADDR 0x01
+#define CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR 0x02
+#define show_mem_event_type(type) __print_symbolic(type, \
+ { CXL_GMER_MEM_EVT_TYPE_ECC_ERROR, "ECC Error" }, \
+ { CXL_GMER_MEM_EVT_TYPE_INV_ADDR, "Invalid Address" }, \
+ { CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR, "Data Path Error" } \
+)
+
+#define CXL_GMER_TRANS_UNKNOWN 0x00
+#define CXL_GMER_TRANS_HOST_READ 0x01
+#define CXL_GMER_TRANS_HOST_WRITE 0x02
+#define CXL_GMER_TRANS_HOST_SCAN_MEDIA 0x03
+#define CXL_GMER_TRANS_HOST_INJECT_POISON 0x04
+#define CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB 0x05
+#define CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT 0x06
+#define show_trans_type(type) __print_symbolic(type, \
+ { CXL_GMER_TRANS_UNKNOWN, "Unknown" }, \
+ { CXL_GMER_TRANS_HOST_READ, "Host Read" }, \
+ { CXL_GMER_TRANS_HOST_WRITE, "Host Write" }, \
+ { CXL_GMER_TRANS_HOST_SCAN_MEDIA, "Host Scan Media" }, \
+ { CXL_GMER_TRANS_HOST_INJECT_POISON, "Host Inject Poison" }, \
+ { CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB, "Internal Media Scrub" }, \
+ { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \
+)
+
+#define CXL_GMER_VALID_CHANNEL BIT(0)
+#define CXL_GMER_VALID_RANK BIT(1)
+#define CXL_GMER_VALID_DEVICE BIT(2)
+#define CXL_GMER_VALID_COMPONENT BIT(3)
+#define show_valid_flags(flags) __print_flags(flags, "|", \
+ { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
+ { CXL_GMER_VALID_RANK, "RANK" }, \
+ { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
+ { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
+)
+
+TRACE_EVENT(general_media,
+
+ TP_PROTO(const char *dev_name, enum cxl_event_log_type log,
+ struct cxl_event_gen_media *rec),
+
+ TP_ARGS(dev_name, log, rec),
+
+ TP_STRUCT__entry(
+ CXL_EVT_TP_entry
+ /* General Media */
+ __field(u64, phys_addr)
+ __field(u8, descriptor)
+ __field(u8, type)
+ __field(u8, transaction_type)
+ __field(u8, channel)
+ __field(u32, device)
+ __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
+ __array(u8, reserved, CXL_EVENT_GEN_MED_RES_SIZE)
+ __field(u16, validity_flags)
+ __field(u8, rank) /* Out of order to pack trace record */
+ ),
+
+ TP_fast_assign(
+ CXL_EVT_TP_fast_assign(dev_name, log, rec->hdr);
+
+ /* General Media */
+ __entry->phys_addr = le64_to_cpu(rec->phys_addr);
+ __entry->descriptor = rec->descriptor;
+ __entry->type = rec->type;
+ __entry->transaction_type = rec->transaction_type;
+ __entry->channel = rec->channel;
+ __entry->rank = rec->rank;
+ __entry->device = get_unaligned_le24(rec->device);
+ memcpy(__entry->comp_id, &rec->component_id,
+ CXL_EVENT_GEN_MED_COMP_ID_SIZE);
+ memcpy(__entry->reserved, &rec->reserved,
+ CXL_EVENT_GEN_MED_RES_SIZE);
+ __entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
+ ),
+
+ CXL_EVT_TP_printk("phys_addr=%llx volatile=%s desc='%s' type='%s' " \
+ "trans_type='%s' channel=%u rank=%u device=%x comp_id=%s " \
+ "valid_flags='%s' reserved=%s",
+ __entry->phys_addr & CXL_GMER_PHYS_ADDR_MASK,
+ (__entry->phys_addr & CXL_GMER_PHYS_ADDR_VOLATILE) ? "TRUE" : "FALSE",
+ show_event_desc_flags(__entry->descriptor),
+ show_mem_event_type(__entry->type),
+ show_trans_type(__entry->transaction_type),
+ __entry->channel, __entry->rank, __entry->device,
+ __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
+ show_valid_flags(__entry->validity_flags),
+ __print_hex(__entry->reserved, CXL_EVENT_GEN_MED_RES_SIZE)
+ )
+);
+
#endif /* _CXL_TRACE_EVENTS_H */

/* This part must be outside protection */
--
2.37.2


2022-10-11 13:06:27

by Jonathan Cameron

[permalink] [raw]
Subject: Re: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

On Mon, 10 Oct 2022 15:41:25 -0700
[email protected] wrote:

> From: Ira Weiny <[email protected]>
>
> CXL rev 3.0 section 8.2.9.2.1.1 defines the General Media Event Record.
>
> Determine if the event read is a general media record and if so trace
> the record.
>
> Signed-off-by: Ira Weiny <[email protected]>

I'll review the rest of these with the assumption that the question
of reserved bytes in tracepoints will get resolved before these go in.

One minor comment on a comment inline. Other than those lgtm

Reviewed-by: Jonathan Cameron <[email protected]>

>
> ---
> Changes from RFC:
> Add reserved byte array
> Use common CXL event header record macros
> Jonathan
> Use unaligned_le{24,16} for unaligned fields
> Don't use the inverse of phy addr mask
> Dave Jiang
> s/cxl_gen_media_event/general_media
> s/cxl_evt_gen_media/cxl_event_gen_media
> ---
> drivers/cxl/core/mbox.c | 30 ++++++++++-
> drivers/cxl/cxlmem.h | 20 +++++++
> include/trace/events/cxl.h | 108 +++++++++++++++++++++++++++++++++++++
> 3 files changed, 156 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index bc4e42b3e01b..1097250c115a 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -712,6 +712,32 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
> }
> EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
>
> +/*
> + * General Media Event Record
> + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
> + */
> +static const uuid_t gen_media_event_uuid =
> + UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
> + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
> +
> +static void cxl_trace_event_record(const char *dev_name,
> + enum cxl_event_log_type type,
> + struct cxl_get_event_payload *payload)
> +{
> + uuid_t *id = &payload->record.hdr.id;
> +
> + if (uuid_equal(id, &gen_media_event_uuid)) {
> + struct cxl_event_gen_media *rec =
> + (struct cxl_event_gen_media *)&payload->record;
> +
> + trace_general_media(dev_name, type, rec);
> + return;
> + }
> +
> + /* For unknown record types print just the header */
> + trace_generic_event(dev_name, type, &payload->record);

Looks like it prints the whole thing now...


> +}
> +


> diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
> index 318ba5fe046e..82a8d3b750a2 100644
> --- a/include/trace/events/cxl.h
> +++ b/include/trace/events/cxl.h
> @@ -122,6 +122,114 @@ TRACE_EVENT(generic_event,


> +#define CXL_GMER_VALID_CHANNEL BIT(0)
> +#define CXL_GMER_VALID_RANK BIT(1)
> +#define CXL_GMER_VALID_DEVICE BIT(2)
> +#define CXL_GMER_VALID_COMPONENT BIT(3)
> +#define show_valid_flags(flags) __print_flags(flags, "|", \
> + { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
> + { CXL_GMER_VALID_RANK, "RANK" }, \
> + { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
> + { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
> +)

I'd still rather we only had the TP_printk only print those parts that are valid
rather than the mess of having to go check the validity bit before deciding whether
to take notice of the field. But meh, not that important given thats
not the main intended way to consume this data.


> +
> +TRACE_EVENT(general_media,
> +
> + TP_PROTO(const char *dev_name, enum cxl_event_log_type log,
> + struct cxl_event_gen_media *rec),
> +
> + TP_ARGS(dev_name, log, rec),
> +
> + TP_STRUCT__entry(
> + CXL_EVT_TP_entry
> + /* General Media */
> + __field(u64, phys_addr)
> + __field(u8, descriptor)
> + __field(u8, type)
> + __field(u8, transaction_type)
> + __field(u8, channel)
> + __field(u32, device)
> + __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
> + __array(u8, reserved, CXL_EVENT_GEN_MED_RES_SIZE)
> + __field(u16, validity_flags)
> + __field(u8, rank) /* Out of order to pack trace record */
> + ),
> +
> + TP_fast_assign(
> + CXL_EVT_TP_fast_assign(dev_name, log, rec->hdr);
> +
> + /* General Media */
> + __entry->phys_addr = le64_to_cpu(rec->phys_addr);
> + __entry->descriptor = rec->descriptor;
> + __entry->type = rec->type;
> + __entry->transaction_type = rec->transaction_type;
> + __entry->channel = rec->channel;
> + __entry->rank = rec->rank;
> + __entry->device = get_unaligned_le24(rec->device);
> + memcpy(__entry->comp_id, &rec->component_id,
> + CXL_EVENT_GEN_MED_COMP_ID_SIZE);
> + memcpy(__entry->reserved, &rec->reserved,
> + CXL_EVENT_GEN_MED_RES_SIZE);
> + __entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
> + ),

2022-10-14 23:59:05

by Ira Weiny

[permalink] [raw]
Subject: Re: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

On Tue, Oct 11, 2022 at 01:57:02PM +0100, Jonathan Cameron wrote:
> On Mon, 10 Oct 2022 15:41:25 -0700
> [email protected] wrote:
>
> > From: Ira Weiny <[email protected]>
> >
> > CXL rev 3.0 section 8.2.9.2.1.1 defines the General Media Event Record.
> >
> > Determine if the event read is a general media record and if so trace
> > the record.
> >
> > Signed-off-by: Ira Weiny <[email protected]>
>
> I'll review the rest of these with the assumption that the question
> of reserved bytes in tracepoints will get resolved before these go in.

Yea I'm removing them. I think I messed up somehow because I thought I had
removed the reserved fields from the records. But perhaps that was only in my
dreams... :-/ Sorry. :-)

>
> One minor comment on a comment inline. Other than those lgtm
>
> Reviewed-by: Jonathan Cameron <[email protected]>
>

Thanks!

[snip]

> >
> > +/*
> > + * General Media Event Record
> > + * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
> > + */
> > +static const uuid_t gen_media_event_uuid =
> > + UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
> > + 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
> > +
> > +static void cxl_trace_event_record(const char *dev_name,
> > + enum cxl_event_log_type type,
> > + struct cxl_get_event_payload *payload)
> > +{
> > + uuid_t *id = &payload->record.hdr.id;
> > +
> > + if (uuid_equal(id, &gen_media_event_uuid)) {
> > + struct cxl_event_gen_media *rec =
> > + (struct cxl_event_gen_media *)&payload->record;
> > +
> > + trace_general_media(dev_name, type, rec);
> > + return;
> > + }
> > +
> > + /* For unknown record types print just the header */
> > + trace_generic_event(dev_name, type, &payload->record);
>
> Looks like it prints the whole thing now...

An unknown record is dumped yes. I'm ok with skipping this but it was
discussed early on that any unknown record would be passed through.

>
>
> > +}
> > +
>
>
> > diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
> > index 318ba5fe046e..82a8d3b750a2 100644
> > --- a/include/trace/events/cxl.h
> > +++ b/include/trace/events/cxl.h
> > @@ -122,6 +122,114 @@ TRACE_EVENT(generic_event,
>
>
> > +#define CXL_GMER_VALID_CHANNEL BIT(0)
> > +#define CXL_GMER_VALID_RANK BIT(1)
> > +#define CXL_GMER_VALID_DEVICE BIT(2)
> > +#define CXL_GMER_VALID_COMPONENT BIT(3)
> > +#define show_valid_flags(flags) __print_flags(flags, "|", \
> > + { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
> > + { CXL_GMER_VALID_RANK, "RANK" }, \
> > + { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
> > + { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
> > +)
>
> I'd still rather we only had the TP_printk only print those parts that are valid
> rather than the mess of having to go check the validity bit before deciding whether
> to take notice of the field. But meh, not that important given thats
> not the main intended way to consume this data.
>

Ok I spent some time really thinking about this and below is an attempt at
that.

However, I must be missing something in what you are proposing because I don't
like having extra space in the trace buffer to print into like this and I
can't figure out where else to put a print buffer.

Also note that this will have no impact on most of the user space tools which
use libtracefs as they will see all the fields and will need to do a similar
decode.

Do you really think this is worth the effort?

Ira


commit 54c4ee67bcac6a38cbc9b0ea2ef952e197356dee
Author: Ira Weiny <[email protected]>
Date: Fri Oct 14 16:15:27 2022 -0700

squash: Attempt to print only valid fields per Jonathan suggestion

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 2add473fc168..9e15028af79c 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -720,6 +720,28 @@ static const uuid_t gen_media_event_uuid =
UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);

+const char *cxl_gem_print_valid(u8 *buf, int nr, u16 valid_flags, u8 channel,
+ u8 rank, u32 device, u8 *comp_id)
+{
+ char *str = buf;
+ int rc = 0;
+
+ if (valid_flags & CXL_GMER_VALID_CHANNEL)
+ rc = snprintf(str, nr, "channel=%u ", channel);
+
+ if (valid_flags & CXL_GMER_VALID_RANK)
+ rc = snprintf(str + rc, nr - rc, "rank=%u ", rank);
+
+ if (valid_flags & CXL_GMER_VALID_DEVICE)
+ rc = snprintf(str + rc, nr - rc, "device=%u ", device);
+
+ if (valid_flags & CXL_GMER_VALID_COMPONENT)
+ rc = snprintf(str + rc, nr - rc, "comp_id=%*ph ",
+ CXL_EVENT_GEN_MED_COMP_ID_SIZE, comp_id);
+
+ return str;
+}
+
static void cxl_trace_event_record(const char *dev_name,
enum cxl_event_log_type type,
struct cxl_get_event_payload *payload)
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 8fbd20d8a0b2..3d3bfef69d32 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -429,6 +429,13 @@ struct cxl_event_gen_media {
u8 reserved[0x2e];
} __packed;

+#define CXL_GMER_VALID_CHANNEL BIT(0)
+#define CXL_GMER_VALID_RANK BIT(1)
+#define CXL_GMER_VALID_DEVICE BIT(2)
+#define CXL_GMER_VALID_COMPONENT BIT(3)
+const char *cxl_gem_print_valid(u8 *buf, int nr, u16 valid_flags, u8 channel,
+ u8 rank, u32 device, u8 *comp_id);
+
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
index e3e11c9055ba..27bb790cb685 100644
--- a/include/trace/events/cxl.h
+++ b/include/trace/events/cxl.h
@@ -161,16 +161,7 @@ TRACE_EVENT(generic_event,
{ CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \
)

-#define CXL_GMER_VALID_CHANNEL BIT(0)
-#define CXL_GMER_VALID_RANK BIT(1)
-#define CXL_GMER_VALID_DEVICE BIT(2)
-#define CXL_GMER_VALID_COMPONENT BIT(3)
-#define show_valid_flags(flags) __print_flags(flags, "|", \
- { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
- { CXL_GMER_VALID_RANK, "RANK" }, \
- { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
- { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
-)
+#define CXL_VALID_PRINT_STR_LEN 512

TRACE_EVENT(general_media,

@@ -191,6 +182,7 @@ TRACE_EVENT(general_media,
__array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
__field(u16, validity_flags)
__field(u8, rank) /* Out of order to pack trace record */
+ __array(u8, str, CXL_VALID_PRINT_STR_LEN)
),

TP_fast_assign(
@@ -209,17 +201,17 @@ TRACE_EVENT(general_media,
__entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
),

- CXL_EVT_TP_printk("phys_addr=%llx volatile=%s desc='%s' type='%s' " \
- "trans_type='%s' channel=%u rank=%u device=%x comp_id=%s " \
- "valid_flags='%s'",
+ CXL_EVT_TP_printk("phys_addr=%llx volatile=%s desc='%s' type='%s' " \
+ "trans_type='%s' %s",
__entry->phys_addr & CXL_GMER_PHYS_ADDR_MASK,
(__entry->phys_addr & CXL_GMER_PHYS_ADDR_VOLATILE) ? "TRUE" : "FALSE",
show_event_desc_flags(__entry->descriptor),
show_mem_event_type(__entry->type),
show_trans_type(__entry->transaction_type),
- __entry->channel, __entry->rank, __entry->device,
- __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
- show_valid_flags(__entry->validity_flags)
+ cxl_gem_print_valid(__entry->str, CXL_VALID_PRINT_STR_LEN,
+ __entry->validity_flags, __entry->channel,
+ __entry->rank, __entry->device,
+ __entry->comp_id)
)
);

2022-10-15 11:33:19

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

On Mon, 10 Oct 2022 15:41:25 -0700
[email protected] wrote:

> +static void cxl_trace_event_record(const char *dev_name,
> + enum cxl_event_log_type type,
> + struct cxl_get_event_payload *payload)
> +{
> + uuid_t *id = &payload->record.hdr.id;
> +

Perhaps you want to add here:

if (!trace_cxl_general_media_enabled() && !trace_clx_generic_event_enabled())
return;

This way the below logic is only executed if either event is enabled.
The above uses static_branches, so if the architecture supports them,
they are not compare and branch, but jumps and/or nops.

-- Steve


> + if (uuid_equal(id, &gen_media_event_uuid)) {
> + struct cxl_event_gen_media *rec =
> + (struct cxl_event_gen_media *)&payload->record;
> +
> + trace_general_media(dev_name, type, rec);
> + return;
> + }
> +
> + /* For unknown record types print just the header */
> + trace_generic_event(dev_name, type, &payload->record);
> +}

2022-10-17 16:44:29

by Jonathan Cameron

[permalink] [raw]
Subject: Re: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

> >
> > > diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
> > > index 318ba5fe046e..82a8d3b750a2 100644
> > > --- a/include/trace/events/cxl.h
> > > +++ b/include/trace/events/cxl.h
> > > @@ -122,6 +122,114 @@ TRACE_EVENT(generic_event,
> >
> >
> > > +#define CXL_GMER_VALID_CHANNEL BIT(0)
> > > +#define CXL_GMER_VALID_RANK BIT(1)
> > > +#define CXL_GMER_VALID_DEVICE BIT(2)
> > > +#define CXL_GMER_VALID_COMPONENT BIT(3)
> > > +#define show_valid_flags(flags) __print_flags(flags, "|", \
> > > + { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
> > > + { CXL_GMER_VALID_RANK, "RANK" }, \
> > > + { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
> > > + { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
> > > +)
> >
> > I'd still rather we only had the TP_printk only print those parts that are valid
> > rather than the mess of having to go check the validity bit before deciding whether
> > to take notice of the field. But meh, not that important given thats
> > not the main intended way to consume this data.
> >
>
> Ok I spent some time really thinking about this and below is an attempt at
> that.
>
> However, I must be missing something in what you are proposing because I don't
> like having extra space in the trace buffer to print into like this and I
> can't figure out where else to put a print buffer.

Putting the space in the trace buffer definitely doesn't makes sense.
Ah. Looking back the CCIX code I assumed it was serialized so could use a
static buffer.

Looking at other similar cases though and we have a lot of use
of trace_seq_printf() e.g. libata_trace_parse_status() though note
there is some magic macro stuff in include/trace/events/libata.h
to tie that together.
https://elixir.bootlin.com/linux/latest/source/drivers/ata/libata-trace.c#L14

That seems to get you access to the actual buffer we are printing into
in similar cases.

>
> Also note that this will have no impact on most of the user space tools which
> use libtracefs as they will see all the fields and will need to do a similar
> decode.
>
> Do you really think this is worth the effort?

With the allocation problem, definitely not. With that solved, it's not a huge amount
of extra code. Also rather nicely 'documents' meaning of the valid bits.

I'm a kernel hacker. I like to not need much beyond echo and cat :)

Jonathan

>
> Ira
>
>
> commit 54c4ee67bcac6a38cbc9b0ea2ef952e197356dee
> Author: Ira Weiny <[email protected]>
> Date: Fri Oct 14 16:15:27 2022 -0700
>
> squash: Attempt to print only valid fields per Jonathan suggestion
>
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 2add473fc168..9e15028af79c 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -720,6 +720,28 @@ static const uuid_t gen_media_event_uuid =
> UUID_INIT(0xfbcd0a77, 0xc260, 0x417f,
> 0x85, 0xa9, 0x08, 0x8b, 0x16, 0x21, 0xeb, 0xa6);
>
> +const char *cxl_gem_print_valid(u8 *buf, int nr, u16 valid_flags, u8 channel,
> + u8 rank, u32 device, u8 *comp_id)
> +{
> + char *str = buf;
> + int rc = 0;
> +
> + if (valid_flags & CXL_GMER_VALID_CHANNEL)
> + rc = snprintf(str, nr, "channel=%u ", channel);
> +
> + if (valid_flags & CXL_GMER_VALID_RANK)
> + rc = snprintf(str + rc, nr - rc, "rank=%u ", rank);
> +
> + if (valid_flags & CXL_GMER_VALID_DEVICE)
> + rc = snprintf(str + rc, nr - rc, "device=%u ", device);
> +
> + if (valid_flags & CXL_GMER_VALID_COMPONENT)
> + rc = snprintf(str + rc, nr - rc, "comp_id=%*ph ",
> + CXL_EVENT_GEN_MED_COMP_ID_SIZE, comp_id);
> +
> + return str;
> +}
> +
> static void cxl_trace_event_record(const char *dev_name,
> enum cxl_event_log_type type,
> struct cxl_get_event_payload *payload)
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 8fbd20d8a0b2..3d3bfef69d32 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -429,6 +429,13 @@ struct cxl_event_gen_media {
> u8 reserved[0x2e];
> } __packed;
>
> +#define CXL_GMER_VALID_CHANNEL BIT(0)
> +#define CXL_GMER_VALID_RANK BIT(1)
> +#define CXL_GMER_VALID_DEVICE BIT(2)
> +#define CXL_GMER_VALID_COMPONENT BIT(3)
> +const char *cxl_gem_print_valid(u8 *buf, int nr, u16 valid_flags, u8 channel,
> + u8 rank, u32 device, u8 *comp_id);
> +
> struct cxl_mbox_get_partition_info {
> __le64 active_volatile_cap;
> __le64 active_persistent_cap;
> diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h
> index e3e11c9055ba..27bb790cb685 100644
> --- a/include/trace/events/cxl.h
> +++ b/include/trace/events/cxl.h
> @@ -161,16 +161,7 @@ TRACE_EVENT(generic_event,
> { CXL_GMER_TRANS_INTERNAL_MEDIA_MANAGEMENT, "Internal Media Management" } \
> )
>
> -#define CXL_GMER_VALID_CHANNEL BIT(0)
> -#define CXL_GMER_VALID_RANK BIT(1)
> -#define CXL_GMER_VALID_DEVICE BIT(2)
> -#define CXL_GMER_VALID_COMPONENT BIT(3)
> -#define show_valid_flags(flags) __print_flags(flags, "|", \
> - { CXL_GMER_VALID_CHANNEL, "CHANNEL" }, \
> - { CXL_GMER_VALID_RANK, "RANK" }, \
> - { CXL_GMER_VALID_DEVICE, "DEVICE" }, \
> - { CXL_GMER_VALID_COMPONENT, "COMPONENT" } \
> -)
> +#define CXL_VALID_PRINT_STR_LEN 512
>
> TRACE_EVENT(general_media,
>
> @@ -191,6 +182,7 @@ TRACE_EVENT(general_media,
> __array(u8, comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE)
> __field(u16, validity_flags)
> __field(u8, rank) /* Out of order to pack trace record */
> + __array(u8, str, CXL_VALID_PRINT_STR_LEN)
> ),
>
> TP_fast_assign(
> @@ -209,17 +201,17 @@ TRACE_EVENT(general_media,
> __entry->validity_flags = get_unaligned_le16(&rec->validity_flags);
> ),
>
> - CXL_EVT_TP_printk("phys_addr=%llx volatile=%s desc='%s' type='%s' " \
> - "trans_type='%s' channel=%u rank=%u device=%x comp_id=%s " \
> - "valid_flags='%s'",
> + CXL_EVT_TP_printk("phys_addr=%llx volatile=%s desc='%s' type='%s' " \
> + "trans_type='%s' %s",
> __entry->phys_addr & CXL_GMER_PHYS_ADDR_MASK,
> (__entry->phys_addr & CXL_GMER_PHYS_ADDR_VOLATILE) ? "TRUE" : "FALSE",
> show_event_desc_flags(__entry->descriptor),
> show_mem_event_type(__entry->type),
> show_trans_type(__entry->transaction_type),
> - __entry->channel, __entry->rank, __entry->device,
> - __print_hex(__entry->comp_id, CXL_EVENT_GEN_MED_COMP_ID_SIZE),
> - show_valid_flags(__entry->validity_flags)
> + cxl_gem_print_valid(__entry->str, CXL_VALID_PRINT_STR_LEN,
> + __entry->validity_flags, __entry->channel,
> + __entry->rank, __entry->device,
> + __entry->comp_id)
> )
> );
>
>

2022-10-17 17:46:02

by Steven Rostedt

[permalink] [raw]
Subject: Re: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

On Mon, 17 Oct 2022 17:37:17 +0100
Jonathan Cameron <[email protected]> wrote:

> Looking at other similar cases though and we have a lot of use
> of trace_seq_printf() e.g. libata_trace_parse_status() though note
> there is some magic macro stuff in include/trace/events/libata.h
> to tie that together.
> https://elixir.bootlin.com/linux/latest/source/drivers/ata/libata-trace.c#L14
>
> That seems to get you access to the actual buffer we are printing into
> in similar cases.

Looking at the code you linked to, I wonder why __print_flags() wasn't used?

For instance, you have:

const char *
libata_trace_parse_status(struct trace_seq *p, unsigned char status)
{
const char *ret = trace_seq_buffer_ptr(p);

trace_seq_printf(p, "{ ");
if (status & ATA_BUSY)
trace_seq_printf(p, "BUSY ");
if (status & ATA_DRDY)
trace_seq_printf(p, "DRDY ");
if (status & ATA_DF)
trace_seq_printf(p, "DF ");
if (status & ATA_DSC)
trace_seq_printf(p, "DSC ");
if (status & ATA_DRQ)
trace_seq_printf(p, "DRQ ");
if (status & ATA_CORR)
trace_seq_printf(p, "CORR ");
if (status & ATA_SENSE)
trace_seq_printf(p, "SENSE ");
if (status & ATA_ERR)
trace_seq_printf(p, "ERR ");
trace_seq_putc(p, '}');
trace_seq_putc(p, 0);

return ret;
}


Which is just a re-implementation of:

__print_flags(status, " ",
{ ATA_BUSY, "BUSY" },
{ ATA_DRDY, "DRDY" },
{ ATA_DF, "DF" },
{ ATA_DSC, "DSC" },
{ ATA_DRQ, "DRQ" },
{ ATA_CORR, "CORR" },
{ ATA_SENSE, "SENSE" },
{ ATA_ERR, "ERR" })


The major difference between the two, is that libtraceevent will be able to
parse the above and convert the status bits into strings, whereas using
libata_trace_parse_status() will just give you a parsing error.

That is, perf and trace-cmd will not be able to parse it unless you write a
separate plugin for libtraceevent to do it but that means you'll have
duplicate code.

I know you just want echo and cat, but that will still work, and this will
make it work for the tooling as well.

-- Steve

2022-10-18 10:13:48

by Jonathan Cameron

[permalink] [raw]
Subject: Re: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

On Mon, 17 Oct 2022 13:21:43 -0400
Steven Rostedt <[email protected]> wrote:

> On Mon, 17 Oct 2022 17:37:17 +0100
> Jonathan Cameron <[email protected]> wrote:
>
> > Looking at other similar cases though and we have a lot of use
> > of trace_seq_printf() e.g. libata_trace_parse_status() though note
> > there is some magic macro stuff in include/trace/events/libata.h
> > to tie that together.
> > https://elixir.bootlin.com/linux/latest/source/drivers/ata/libata-trace.c#L14
> >
> > That seems to get you access to the actual buffer we are printing into
> > in similar cases.
>
> Looking at the code you linked to, I wonder why __print_flags() wasn't used?
>
> For instance, you have:
>
> const char *
> libata_trace_parse_status(struct trace_seq *p, unsigned char status)
> {
> const char *ret = trace_seq_buffer_ptr(p);
>
> trace_seq_printf(p, "{ ");
> if (status & ATA_BUSY)
> trace_seq_printf(p, "BUSY ");
> if (status & ATA_DRDY)
> trace_seq_printf(p, "DRDY ");
> if (status & ATA_DF)
> trace_seq_printf(p, "DF ");
> if (status & ATA_DSC)
> trace_seq_printf(p, "DSC ");
> if (status & ATA_DRQ)
> trace_seq_printf(p, "DRQ ");
> if (status & ATA_CORR)
> trace_seq_printf(p, "CORR ");
> if (status & ATA_SENSE)
> trace_seq_printf(p, "SENSE ");
> if (status & ATA_ERR)
> trace_seq_printf(p, "ERR ");
> trace_seq_putc(p, '}');
> trace_seq_putc(p, 0);
>
> return ret;
> }
>
>
> Which is just a re-implementation of:
>
> __print_flags(status, " ",
> { ATA_BUSY, "BUSY" },
> { ATA_DRDY, "DRDY" },
> { ATA_DF, "DF" },
> { ATA_DSC, "DSC" },
> { ATA_DRQ, "DRQ" },
> { ATA_CORR, "CORR" },
> { ATA_SENSE, "SENSE" },
> { ATA_ERR, "ERR" })
>
>
> The major difference between the two, is that libtraceevent will be able to
> parse the above and convert the status bits into strings, whereas using
> libata_trace_parse_status() will just give you a parsing error.
>
> That is, perf and trace-cmd will not be able to parse it unless you write a
> separate plugin for libtraceevent to do it but that means you'll have
> duplicate code.
>
> I know you just want echo and cat, but that will still work, and this will
> make it work for the tooling as well.

Excellent point, though in the case we are interested in for CXL,
__print_flags() is not enough.

We have a mass of fields that only contain something useful to print if
the valid bits in a mask are set. I just pulled that example to
show how trace_seq_printf() could be used to achieve optional printing
as opposed to current situation where the reader of the print has
to interpret the mask to work out if fields contain anything useful.

To do something nice with them in perf (well probably ras-daemon in
this case) we'll have to parse the valid bits anyway so effectively
write such a plugin. There we need to do a bunch of mangling to get
the events stored in a DB anyway, so this isn't a huge overhead.

Jonathan


>
> -- Steve
>

2022-10-21 05:57:01

by Ira Weiny

[permalink] [raw]
Subject: Re: [RFC V2 PATCH 05/11] cxl/mem: Trace General Media Event Record

On Tue, Oct 18, 2022 at 10:46:36AM +0100, Jonathan Cameron wrote:
> On Mon, 17 Oct 2022 13:21:43 -0400
> Steven Rostedt <[email protected]> wrote:
>
> > On Mon, 17 Oct 2022 17:37:17 +0100
> > Jonathan Cameron <[email protected]> wrote:
> >
> > > Looking at other similar cases though and we have a lot of use
> > > of trace_seq_printf() e.g. libata_trace_parse_status() though note
> > > there is some magic macro stuff in include/trace/events/libata.h
> > > to tie that together.
> > > https://elixir.bootlin.com/linux/latest/source/drivers/ata/libata-trace.c#L14
> > >
> > > That seems to get you access to the actual buffer we are printing into
> > > in similar cases.
> >
> > Looking at the code you linked to, I wonder why __print_flags() wasn't used?
> >
> > For instance, you have:
> >
> > const char *
> > libata_trace_parse_status(struct trace_seq *p, unsigned char status)
> > {
> > const char *ret = trace_seq_buffer_ptr(p);
> >
> > trace_seq_printf(p, "{ ");
> > if (status & ATA_BUSY)
> > trace_seq_printf(p, "BUSY ");
> > if (status & ATA_DRDY)
> > trace_seq_printf(p, "DRDY ");
> > if (status & ATA_DF)
> > trace_seq_printf(p, "DF ");
> > if (status & ATA_DSC)
> > trace_seq_printf(p, "DSC ");
> > if (status & ATA_DRQ)
> > trace_seq_printf(p, "DRQ ");
> > if (status & ATA_CORR)
> > trace_seq_printf(p, "CORR ");
> > if (status & ATA_SENSE)
> > trace_seq_printf(p, "SENSE ");
> > if (status & ATA_ERR)
> > trace_seq_printf(p, "ERR ");
> > trace_seq_putc(p, '}');
> > trace_seq_putc(p, 0);
> >
> > return ret;
> > }
> >
> >
> > Which is just a re-implementation of:
> >
> > __print_flags(status, " ",
> > { ATA_BUSY, "BUSY" },
> > { ATA_DRDY, "DRDY" },
> > { ATA_DF, "DF" },
> > { ATA_DSC, "DSC" },
> > { ATA_DRQ, "DRQ" },
> > { ATA_CORR, "CORR" },
> > { ATA_SENSE, "SENSE" },
> > { ATA_ERR, "ERR" })
> >
> >
> > The major difference between the two, is that libtraceevent will be able to
> > parse the above and convert the status bits into strings, whereas using
> > libata_trace_parse_status() will just give you a parsing error.
> >
> > That is, perf and trace-cmd will not be able to parse it unless you write a
> > separate plugin for libtraceevent to do it but that means you'll have
> > duplicate code.
> >
> > I know you just want echo and cat, but that will still work, and this will
> > make it work for the tooling as well.
>
> Excellent point, though in the case we are interested in for CXL,
> __print_flags() is not enough.
>
> We have a mass of fields that only contain something useful to print if
> the valid bits in a mask are set. I just pulled that example to
> show how trace_seq_printf() could be used to achieve optional printing
> as opposed to current situation where the reader of the print has
> to interpret the mask to work out if fields contain anything useful.
>
> To do something nice with them in perf (well probably ras-daemon in
> this case) we'll have to parse the valid bits anyway so effectively
> write such a plugin. There we need to do a bunch of mangling to get
> the events stored in a DB anyway, so this isn't a huge overhead.

Given this information I think I'm going to punt on this and take your reviewed
by on the code as it is.

We can certainly try to change it later but for now I think it serves our
purpose. Better to focus on getting the code working with irq's.

Ira