This series adds decoding for the CXL Protocol Errors Common Platform
Error Record.
Smita Koralahalli (2):
efi/cper, cxl: Decode CXL Protocol Error Section
efi/cper, cxl: Decode CXL Error Log
drivers/firmware/efi/Makefile | 2 +-
drivers/firmware/efi/cper.c | 9 ++
drivers/firmware/efi/cper_cxl.c | 179 ++++++++++++++++++++++++++++++++
drivers/firmware/efi/cper_cxl.h | 66 ++++++++++++
include/linux/cxl_err.h | 22 ++++
5 files changed, 277 insertions(+), 1 deletion(-)
create mode 100644 drivers/firmware/efi/cper_cxl.c
create mode 100644 drivers/firmware/efi/cper_cxl.h
create mode 100644 include/linux/cxl_err.h
--
2.17.1
Add support for decoding CXL Protocol Error Section as defined in UEFI 2.10
Section N.2.13.
Do the section decoding in a new cper_cxl.c file. This new file will be
used in the future for more CXL CPERs decode support. Add this to the
existing UEFI_CPER config.
Signed-off-by: Smita Koralahalli <[email protected]>
---
v2:
Added all fields for decoding. Printed DVSEC and Capability ID.
Added additional CXL Agent Types based on UEFI 2.10.
Named the unnamed union to agent addr.
Changed field name from agent_addr -> rcrb_base_addr.
subsystem_device_id -> subsystem_id.
Commented why different union elements are relevant.
Provided other comments wherever necessary.
---
drivers/firmware/efi/Makefile | 2 +-
drivers/firmware/efi/cper.c | 9 ++
drivers/firmware/efi/cper_cxl.c | 152 ++++++++++++++++++++++++++++++++
drivers/firmware/efi/cper_cxl.h | 66 ++++++++++++++
4 files changed, 228 insertions(+), 1 deletion(-)
create mode 100644 drivers/firmware/efi/cper_cxl.c
create mode 100644 drivers/firmware/efi/cper_cxl.h
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index 8d151e332584..4f332de54173 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -19,7 +19,7 @@ endif
obj-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdtparams.o
obj-$(CONFIG_EFI_ESRT) += esrt.o
obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o
-obj-$(CONFIG_UEFI_CPER) += cper.o
+obj-$(CONFIG_UEFI_CPER) += cper.o cper_cxl.o
obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o
obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o
subdir-$(CONFIG_EFI_STUB) += libstub
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index e4e5ea7ce910..181deb9af527 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -24,6 +24,7 @@
#include <linux/bcd.h>
#include <acpi/ghes.h>
#include <ras/ras_event.h>
+#include "cper_cxl.h"
/*
* CPER record ID need to be unique even after reboot, because record
@@ -595,6 +596,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
cper_print_fw_err(newpfx, gdata, fw_err);
else
goto err_section_too_small;
+ } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
+ struct cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
+
+ printk("%ssection_type: CXL Protocol Error\n", newpfx);
+ if (gdata->error_data_length >= sizeof(*prot_err))
+ cper_print_prot_err(newpfx, prot_err);
+ else
+ goto err_section_too_small;
} else {
const void *err = acpi_hest_get_payload(gdata);
diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
new file mode 100644
index 000000000000..6c94af234be9
--- /dev/null
+++ b/drivers/firmware/efi/cper_cxl.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UEFI Common Platform Error Record (CPER) support for CXL Section.
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Smita Koralahalli <[email protected]>
+ */
+
+#include <linux/cper.h>
+#include "cper_cxl.h"
+
+#define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0)
+#define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1)
+#define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2)
+#define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3)
+#define PROT_ERR_VALID_CAPABILITY BIT_ULL(4)
+#define PROT_ERR_VALID_DVSEC BIT_ULL(5)
+
+static const char * const prot_err_agent_type_strs[] = {
+ "Restricted CXL Device",
+ "Restricted CXL Host Downstream Port",
+ "CXL Device",
+ "CXL Logical Device",
+ "CXL Fabric Manager managed Logical Device",
+ "CXL Root Port",
+ "CXL Downstream Switch Port",
+ "CXL Upstream Switch Port",
+};
+
+/*
+ * The layout of the enumeration and the values matches CXL Agent Type
+ * field in the UEFI 2.10 Section N.2.13,
+ */
+enum {
+ RCD, /* Restricted CXL Device */
+ RCH_DP, /* Restricted CXL Host Downstream Port */
+ DEVICE, /* CXL Device */
+ LD, /* CXL Logical Device */
+ FMLD, /* CXL Fabric Manager managed Logical Device */
+ RP, /* CXL Root Port */
+ DSP, /* CXL Downstream Switch Port */
+ USP, /* CXL Upstream Switch Port */
+};
+
+void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err)
+{
+ if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE)
+ pr_info("%s agent_type: %d, %s\n", pfx, prot_err->agent_type,
+ prot_err->agent_type < ARRAY_SIZE(prot_err_agent_type_strs)
+ ? prot_err_agent_type_strs[prot_err->agent_type]
+ : "unknown");
+
+ if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS) {
+ switch (prot_err->agent_type) {
+ /*
+ * According to UEFI 2.10 Section N.2.13, the term CXL Device
+ * is used to refer to Restricted CXL Device, CXL Device, CXL
+ * Logical Device or a CXL Fabric Manager Managed Logical
+ * Device.
+ */
+ case RCD:
+ case DEVICE:
+ case LD:
+ case FMLD:
+ case RP:
+ case DSP:
+ case USP:
+ pr_info("%s agent_address: %04x:%02x:%02x.%x\n",
+ pfx, prot_err->agent_addr.segment,
+ prot_err->agent_addr.bus,
+ prot_err->agent_addr.device,
+ prot_err->agent_addr.function);
+ break;
+ case RCH_DP:
+ pr_info("%s rcrb_base_address: 0x%016llx\n", pfx,
+ prot_err->agent_addr.rcrb_base_addr);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (prot_err->valid_bits & PROT_ERR_VALID_DEVICE_ID) {
+ const __u8 *class_code;
+
+ switch (prot_err->agent_type) {
+ case RCD:
+ case DEVICE:
+ case LD:
+ case FMLD:
+ case RP:
+ case DSP:
+ case USP:
+ pr_info("%s slot: %d\n", pfx,
+ prot_err->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
+ pr_info("%s vendor_id: 0x%04x, device_id: 0x%04x\n",
+ pfx, prot_err->device_id.vendor_id,
+ prot_err->device_id.device_id);
+ pr_info("%s sub_vendor_id: 0x%04x, sub_device_id: 0x%04x\n",
+ pfx, prot_err->device_id.subsystem_vendor_id,
+ prot_err->device_id.subsystem_id);
+ class_code = prot_err->device_id.class_code;
+ pr_info("%s class_code: %02x%02x\n", pfx,
+ class_code[1], class_code[0]);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER) {
+ switch (prot_err->agent_type) {
+ case RCD:
+ case DEVICE:
+ case LD:
+ case FMLD:
+ pr_info("%s lower_dw: 0x%08x, upper_dw: 0x%08x\n", pfx,
+ prot_err->dev_serial_num.lower_dw,
+ prot_err->dev_serial_num.upper_dw);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (prot_err->valid_bits & PROT_ERR_VALID_CAPABILITY) {
+ switch (prot_err->agent_type) {
+ case RCD:
+ case DEVICE:
+ case LD:
+ case FMLD:
+ case RP:
+ case DSP:
+ case USP:
+ print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4,
+ prot_err->capability,
+ sizeof(prot_err->capability), 0);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (prot_err->valid_bits & PROT_ERR_VALID_DVSEC) {
+ pr_info("%s DVSEC length: 0x%04x\n", pfx, prot_err->dvsec_len);
+
+ pr_info("%s CXL DVSEC:\n", pfx);
+ print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, (prot_err + 1),
+ prot_err->dvsec_len, 0);
+ }
+}
diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h
new file mode 100644
index 000000000000..86bfcf7909ec
--- /dev/null
+++ b/drivers/firmware/efi/cper_cxl.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * UEFI Common Platform Error Record (CPER) support for CXL Section.
+ *
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Smita Koralahalli <[email protected]>
+ */
+
+#ifndef LINUX_CPER_CXL_H
+#define LINUX_CPER_CXL_H
+
+/* CXL Protocol Error Section */
+#define CPER_SEC_CXL_PROT_ERR \
+ GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \
+ 0x4B, 0x77, 0x10, 0x48)
+
+#pragma pack(1)
+
+/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */
+struct cper_sec_prot_err {
+ u64 valid_bits;
+ u8 agent_type;
+ u8 reserved[7];
+
+ /*
+ * Except for RCH Downstream Port, all the remaining CXL Agent
+ * types are uniquely identified by the PCIe compatible SBDF number.
+ */
+ union {
+ u64 rcrb_base_addr;
+ struct {
+ u8 function;
+ u8 device;
+ u8 bus;
+ u16 segment;
+ u8 reserved_1[3];
+ };
+ } agent_addr;
+
+ struct {
+ u16 vendor_id;
+ u16 device_id;
+ u16 subsystem_vendor_id;
+ u16 subsystem_id;
+ u8 class_code[2];
+ u16 slot;
+ u8 reserved_1[4];
+ } device_id;
+
+ struct {
+ u32 lower_dw;
+ u32 upper_dw;
+ } dev_serial_num;
+
+ u8 capability[60];
+ u16 dvsec_len;
+ u16 err_len;
+ u8 reserved_2[4];
+};
+
+#pragma pack()
+
+void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err);
+
+#endif //__CPER_CXL_
--
2.17.1
On Fri, 28 Oct 2022 20:09:49 +0000
Smita Koralahalli <[email protected]> wrote:
> Add support for decoding CXL Protocol Error Section as defined in UEFI 2.10
> Section N.2.13.
>
> Do the section decoding in a new cper_cxl.c file. This new file will be
> used in the future for more CXL CPERs decode support. Add this to the
> existing UEFI_CPER config.
>
> Signed-off-by: Smita Koralahalli <[email protected]>
A few really trivial things inline.
Reviewed-by: Jonathan Cameron <[email protected]>
> ---
> v2:
> Added all fields for decoding. Printed DVSEC and Capability ID.
> Added additional CXL Agent Types based on UEFI 2.10.
> Named the unnamed union to agent addr.
> Changed field name from agent_addr -> rcrb_base_addr.
> subsystem_device_id -> subsystem_id.
> Commented why different union elements are relevant.
> Provided other comments wherever necessary.
> ---
> drivers/firmware/efi/Makefile | 2 +-
> drivers/firmware/efi/cper.c | 9 ++
> drivers/firmware/efi/cper_cxl.c | 152 ++++++++++++++++++++++++++++++++
> drivers/firmware/efi/cper_cxl.h | 66 ++++++++++++++
> 4 files changed, 228 insertions(+), 1 deletion(-)
> create mode 100644 drivers/firmware/efi/cper_cxl.c
> create mode 100644 drivers/firmware/efi/cper_cxl.h
>
> diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
> index 8d151e332584..4f332de54173 100644
> --- a/drivers/firmware/efi/Makefile
> +++ b/drivers/firmware/efi/Makefile
> @@ -19,7 +19,7 @@ endif
> obj-$(CONFIG_EFI_PARAMS_FROM_FDT) += fdtparams.o
> obj-$(CONFIG_EFI_ESRT) += esrt.o
> obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o
> -obj-$(CONFIG_UEFI_CPER) += cper.o
> +obj-$(CONFIG_UEFI_CPER) += cper.o cper_cxl.o
> obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o
> obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o
> subdir-$(CONFIG_EFI_STUB) += libstub
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index e4e5ea7ce910..181deb9af527 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -24,6 +24,7 @@
> #include <linux/bcd.h>
> #include <acpi/ghes.h>
> #include <ras/ras_event.h>
> +#include "cper_cxl.h"
>
> /*
> * CPER record ID need to be unique even after reboot, because record
> @@ -595,6 +596,14 @@ cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata
> cper_print_fw_err(newpfx, gdata, fw_err);
> else
> goto err_section_too_small;
> + } else if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR)) {
> + struct cper_sec_prot_err *prot_err = acpi_hest_get_payload(gdata);
> +
> + printk("%ssection_type: CXL Protocol Error\n", newpfx);
> + if (gdata->error_data_length >= sizeof(*prot_err))
> + cper_print_prot_err(newpfx, prot_err);
> + else
> + goto err_section_too_small;
> } else {
> const void *err = acpi_hest_get_payload(gdata);
>
> diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
> new file mode 100644
> index 000000000000..6c94af234be9
> --- /dev/null
> +++ b/drivers/firmware/efi/cper_cxl.c
> @@ -0,0 +1,152 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * UEFI Common Platform Error Record (CPER) support for CXL Section.
> + *
> + * Copyright (C) 2022 Advanced Micro Devices, Inc.
> + *
> + * Author: Smita Koralahalli <[email protected]>
> + */
> +
> +#include <linux/cper.h>
> +#include "cper_cxl.h"
> +
> +#define PROT_ERR_VALID_AGENT_TYPE BIT_ULL(0)
> +#define PROT_ERR_VALID_AGENT_ADDRESS BIT_ULL(1)
> +#define PROT_ERR_VALID_DEVICE_ID BIT_ULL(2)
> +#define PROT_ERR_VALID_SERIAL_NUMBER BIT_ULL(3)
> +#define PROT_ERR_VALID_CAPABILITY BIT_ULL(4)
> +#define PROT_ERR_VALID_DVSEC BIT_ULL(5)
Error log valid should probably be here. Bit 6.
We might not use it, but it's a bit odd to have all but one entry
from the spec defined.
> +
> +static const char * const prot_err_agent_type_strs[] = {
> + "Restricted CXL Device",
> + "Restricted CXL Host Downstream Port",
> + "CXL Device",
> + "CXL Logical Device",
> + "CXL Fabric Manager managed Logical Device",
> + "CXL Root Port",
> + "CXL Downstream Switch Port",
> + "CXL Upstream Switch Port",
> +};
> +
> +/*
> + * The layout of the enumeration and the values matches CXL Agent Type
> + * field in the UEFI 2.10 Section N.2.13,
> + */
> +enum {
> + RCD, /* Restricted CXL Device */
> + RCH_DP, /* Restricted CXL Host Downstream Port */
> + DEVICE, /* CXL Device */
> + LD, /* CXL Logical Device */
> + FMLD, /* CXL Fabric Manager managed Logical Device */
> + RP, /* CXL Root Port */
> + DSP, /* CXL Downstream Switch Port */
> + USP, /* CXL Upstream Switch Port */
> +};
I would move this above the prot_err_agent_type_strs and use
[RCD] = "Restricted CXL Device". Saves on the trivial effort
for a reviewer of checking the two match up.
Probably don't need the comments on the enum as well as the
strings that say the same thing...
> +
> +void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err)
> +{
> + if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE)
> + pr_info("%s agent_type: %d, %s\n", pfx, prot_err->agent_type,
> + prot_err->agent_type < ARRAY_SIZE(prot_err_agent_type_strs)
> + ? prot_err_agent_type_strs[prot_err->agent_type]
> + : "unknown");
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS) {
> + switch (prot_err->agent_type) {
> + /*
> + * According to UEFI 2.10 Section N.2.13, the term CXL Device
> + * is used to refer to Restricted CXL Device, CXL Device, CXL
> + * Logical Device or a CXL Fabric Manager Managed Logical
> + * Device.
> + */
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + case RP:
> + case DSP:
> + case USP:
> + pr_info("%s agent_address: %04x:%02x:%02x.%x\n",
> + pfx, prot_err->agent_addr.segment,
> + prot_err->agent_addr.bus,
> + prot_err->agent_addr.device,
> + prot_err->agent_addr.function);
> + break;
> + case RCH_DP:
> + pr_info("%s rcrb_base_address: 0x%016llx\n", pfx,
> + prot_err->agent_addr.rcrb_base_addr);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_DEVICE_ID) {
> + const __u8 *class_code;
> +
> + switch (prot_err->agent_type) {
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + case RP:
> + case DSP:
> + case USP:
> + pr_info("%s slot: %d\n", pfx,
> + prot_err->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
Probably not in keeping with other CPER handling (because that's all ancient) but
I'm a great fan of FIELD_GET() and masks for cases like this.
I this particular case the slot field goes all they way to bit 15 so there
are no reserved bits up there, but I had to go check that which a FIELD_GET()/ mask
approach would have saved me doing.
> + pr_info("%s vendor_id: 0x%04x, device_id: 0x%04x\n",
> + pfx, prot_err->device_id.vendor_id,
> + prot_err->device_id.device_id);
> + pr_info("%s sub_vendor_id: 0x%04x, sub_device_id: 0x%04x\n",
> + pfx, prot_err->device_id.subsystem_vendor_id,
> + prot_err->device_id.subsystem_id);
> + class_code = prot_err->device_id.class_code;
> + pr_info("%s class_code: %02x%02x\n", pfx,
> + class_code[1], class_code[0]);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER) {
> + switch (prot_err->agent_type) {
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + pr_info("%s lower_dw: 0x%08x, upper_dw: 0x%08x\n", pfx,
> + prot_err->dev_serial_num.lower_dw,
> + prot_err->dev_serial_num.upper_dw);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_CAPABILITY) {
> + switch (prot_err->agent_type) {
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + case RP:
> + case DSP:
> + case USP:
> + print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4,
> + prot_err->capability,
> + sizeof(prot_err->capability), 0);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_DVSEC) {
> + pr_info("%s DVSEC length: 0x%04x\n", pfx, prot_err->dvsec_len);
> +
> + pr_info("%s CXL DVSEC:\n", pfx);
> + print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, (prot_err + 1),
Excess brackets?
> + prot_err->dvsec_len, 0);
> + }
> +}
> diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h
> new file mode 100644
> index 000000000000..86bfcf7909ec
> --- /dev/null
> +++ b/drivers/firmware/efi/cper_cxl.h
> @@ -0,0 +1,66 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * UEFI Common Platform Error Record (CPER) support for CXL Section.
> + *
> + * Copyright (C) 2022 Advanced Micro Devices, Inc.
> + *
> + * Author: Smita Koralahalli <[email protected]>
> + */
> +
> +#ifndef LINUX_CPER_CXL_H
> +#define LINUX_CPER_CXL_H
> +
> +/* CXL Protocol Error Section */
> +#define CPER_SEC_CXL_PROT_ERR \
> + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \
> + 0x4B, 0x77, 0x10, 0x48)
> +
> +#pragma pack(1)
> +
> +/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */
> +struct cper_sec_prot_err {
> + u64 valid_bits;
> + u8 agent_type;
> + u8 reserved[7];
> +
> + /*
> + * Except for RCH Downstream Port, all the remaining CXL Agent
> + * types are uniquely identified by the PCIe compatible SBDF number.
> + */
> + union {
> + u64 rcrb_base_addr;
> + struct {
> + u8 function;
> + u8 device;
> + u8 bus;
> + u16 segment;
> + u8 reserved_1[3];
> + };
> + } agent_addr;
> +
> + struct {
> + u16 vendor_id;
> + u16 device_id;
> + u16 subsystem_vendor_id;
> + u16 subsystem_id;
> + u8 class_code[2];
> + u16 slot;
> + u8 reserved_1[4];
> + } device_id;
> +
> + struct {
> + u32 lower_dw;
> + u32 upper_dw;
> + } dev_serial_num;
> +
> + u8 capability[60];
> + u16 dvsec_len;
> + u16 err_len;
> + u8 reserved_2[4];
> +};
> +
> +#pragma pack()
> +
> +void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err);
> +
> +#endif //__CPER_CXL_
On Fri, 28 Oct 2022 at 22:10, Smita Koralahalli
<[email protected]> wrote:
>
> This series adds decoding for the CXL Protocol Errors Common Platform
> Error Record.
>
> Smita Koralahalli (2):
> efi/cper, cxl: Decode CXL Protocol Error Section
> efi/cper, cxl: Decode CXL Error Log
>
I've queued these up now (with Jonathan's ack)
> drivers/firmware/efi/Makefile | 2 +-
> drivers/firmware/efi/cper.c | 9 ++
> drivers/firmware/efi/cper_cxl.c | 179 ++++++++++++++++++++++++++++++++
> drivers/firmware/efi/cper_cxl.h | 66 ++++++++++++
> include/linux/cxl_err.h | 22 ++++
> 5 files changed, 277 insertions(+), 1 deletion(-)
> create mode 100644 drivers/firmware/efi/cper_cxl.c
> create mode 100644 drivers/firmware/efi/cper_cxl.h
> create mode 100644 include/linux/cxl_err.h
>
> --
> 2.17.1
>
On Fri, 28 Oct 2022 20:09:49 +0000
Smita Koralahalli <[email protected]> wrote:
> Add support for decoding CXL Protocol Error Section as defined in UEFI 2.10
> Section N.2.13.
>
> Do the section decoding in a new cper_cxl.c file. This new file will be
> used in the future for more CXL CPERs decode support. Add this to the
> existing UEFI_CPER config.
>
> Signed-off-by: Smita Koralahalli <[email protected]>
Obviously very late with this comment, but I now have a test setup running
so a minor formatting issue became apparent that would be nice to tidy up
I think.
> +void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err)
> +{
> + if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_TYPE)
> + pr_info("%s agent_type: %d, %s\n", pfx, prot_err->agent_type,
> + prot_err->agent_type < ARRAY_SIZE(prot_err_agent_type_strs)
> + ? prot_err_agent_type_strs[prot_err->agent_type]
> + : "unknown");
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_AGENT_ADDRESS) {
> + switch (prot_err->agent_type) {
> + /*
> + * According to UEFI 2.10 Section N.2.13, the term CXL Device
> + * is used to refer to Restricted CXL Device, CXL Device, CXL
> + * Logical Device or a CXL Fabric Manager Managed Logical
> + * Device.
> + */
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + case RP:
> + case DSP:
> + case USP:
> + pr_info("%s agent_address: %04x:%02x:%02x.%x\n",
> + pfx, prot_err->agent_addr.segment,
> + prot_err->agent_addr.bus,
> + prot_err->agent_addr.device,
> + prot_err->agent_addr.function);
> + break;
> + case RCH_DP:
> + pr_info("%s rcrb_base_address: 0x%016llx\n", pfx,
> + prot_err->agent_addr.rcrb_base_addr);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_DEVICE_ID) {
> + const __u8 *class_code;
> +
> + switch (prot_err->agent_type) {
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + case RP:
> + case DSP:
> + case USP:
> + pr_info("%s slot: %d\n", pfx,
> + prot_err->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
> + pr_info("%s vendor_id: 0x%04x, device_id: 0x%04x\n",
> + pfx, prot_err->device_id.vendor_id,
> + prot_err->device_id.device_id);
> + pr_info("%s sub_vendor_id: 0x%04x, sub_device_id: 0x%04x\n",
> + pfx, prot_err->device_id.subsystem_vendor_id,
> + prot_err->device_id.subsystem_id);
> + class_code = prot_err->device_id.class_code;
> + pr_info("%s class_code: %02x%02x\n", pfx,
> + class_code[1], class_code[0]);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_SERIAL_NUMBER) {
> + switch (prot_err->agent_type) {
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + pr_info("%s lower_dw: 0x%08x, upper_dw: 0x%08x\n", pfx,
> + prot_err->dev_serial_num.lower_dw,
> + prot_err->dev_serial_num.upper_dw);
Doesn't say what it is the lower dw of. Good to add a mention of serial number.
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_CAPABILITY) {
> + switch (prot_err->agent_type) {
> + case RCD:
> + case DEVICE:
> + case LD:
> + case FMLD:
> + case RP:
> + case DSP:
> + case USP:
No heading for this. Which means the output just ends up with something like:
lower_dw: 0x00000004, upper_dw = 0x00000000
00000000: 00020010 00008000 0000000f 00000411
00000010: 00110000 00000000 00000000 00000000
etc So also wants a space before the hex dump or indeed greater indent once that's
added.
Similar formatting things apply to later changes to the message such as the header
log register in the next patch.
Whilst not a bug as such, pretty printing is always nice :)
Jonathan
> + print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4,
> + prot_err->capability,
> + sizeof(prot_err->capability), 0);
> + break;
> + default:
> + break;
> + }
> + }
> +
> + if (prot_err->valid_bits & PROT_ERR_VALID_DVSEC) {
> + pr_info("%s DVSEC length: 0x%04x\n", pfx, prot_err->dvsec_len);
> +
> + pr_info("%s CXL DVSEC:\n", pfx);
> + print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, (prot_err + 1),
> + prot_err->dvsec_len, 0);
> + }
> +}
> diff --git a/drivers/firmware/efi/cper_cxl.h b/drivers/firmware/efi/cper_cxl.h
> new file mode 100644
> index 000000000000..86bfcf7909ec
> --- /dev/null
> +++ b/drivers/firmware/efi/cper_cxl.h
> @@ -0,0 +1,66 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * UEFI Common Platform Error Record (CPER) support for CXL Section.
> + *
> + * Copyright (C) 2022 Advanced Micro Devices, Inc.
> + *
> + * Author: Smita Koralahalli <[email protected]>
> + */
> +
> +#ifndef LINUX_CPER_CXL_H
> +#define LINUX_CPER_CXL_H
> +
> +/* CXL Protocol Error Section */
> +#define CPER_SEC_CXL_PROT_ERR \
> + GUID_INIT(0x80B9EFB4, 0x52B5, 0x4DE3, 0xA7, 0x77, 0x68, 0x78, \
> + 0x4B, 0x77, 0x10, 0x48)
> +
> +#pragma pack(1)
> +
> +/* Compute Express Link Protocol Error Section, UEFI v2.10 sec N.2.13 */
> +struct cper_sec_prot_err {
> + u64 valid_bits;
> + u8 agent_type;
> + u8 reserved[7];
> +
> + /*
> + * Except for RCH Downstream Port, all the remaining CXL Agent
> + * types are uniquely identified by the PCIe compatible SBDF number.
> + */
> + union {
> + u64 rcrb_base_addr;
> + struct {
> + u8 function;
> + u8 device;
> + u8 bus;
> + u16 segment;
> + u8 reserved_1[3];
> + };
> + } agent_addr;
> +
> + struct {
> + u16 vendor_id;
> + u16 device_id;
> + u16 subsystem_vendor_id;
> + u16 subsystem_id;
> + u8 class_code[2];
> + u16 slot;
> + u8 reserved_1[4];
> + } device_id;
> +
> + struct {
> + u32 lower_dw;
> + u32 upper_dw;
> + } dev_serial_num;
> +
> + u8 capability[60];
> + u16 dvsec_len;
> + u16 err_len;
> + u8 reserved_2[4];
> +};
> +
> +#pragma pack()
> +
> +void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_err);
> +
> +#endif //__CPER_CXL_