2024-03-12 21:27:06

by Zaid Alali

[permalink] [raw]
Subject: [RFC PATCH 0/5] Enable EINJv2 Support

This patch set intends to enable EINJv2 support. The goal of this
update is to allow the driver to simultaneously support EINJ and EINJv2.
The implementation follows a proposed ACPI specs(1)(2) that enables the
driver to discover system capabilities through GET_ERROR_TYPE.

Proposed ACPI spces:
(1) https://bugzilla.tianocore.org/show_bug.cgi?id=4615
(2) https://bugzilla.tianocore.org/attachment.cgi?id=1446

Zaid Alali (5):
ACPI: APEI: EINJ: Enable the discovery of EINJv2 capabilities
ACPI: APEI: EINJ: Add einjv2 extension struct
ACPI: APEI: EINJ: Add debugfs files for EINJv2 support
ACPI: APEI: EINJ: Enable EINJv2 error injections
ACPI: APEI: EINJ: Update the documentation for EINJv2 support

.../firmware-guide/acpi/apei/einj.rst | 44 ++++-
drivers/acpi/apei/einj.c | 178 ++++++++++++++++--
include/acpi/actbl1.h | 1 +
3 files changed, 204 insertions(+), 19 deletions(-)

--
2.34.1



2024-03-12 21:27:15

by Zaid Alali

[permalink] [raw]
Subject: [RFC PATCH 1/5] ACPI: APEI: EINJ: Enable the discovery of EINJv2 capabilities

EINJv2 capabilities can be discovered by checking the return value
of get_error_type, where bit 30 set indicates EINJv2 support. This
commit enables the driver to show all supported error injections
for EINJ and EINJv2 at the same time.

Signed-off-by: Zaid Alali <[email protected]>
---
drivers/acpi/apei/einj.c | 37 ++++++++++++++++++++++++++++++-------
include/acpi/actbl1.h | 1 +
2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 89fb9331c611..90efbcbf6b54 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -32,6 +32,7 @@
#define SLEEP_UNIT_MAX 5000 /* 5ms */
/* Firmware should respond within 1 seconds */
#define FIRMWARE_TIMEOUT (1 * USEC_PER_SEC)
+#define ACPI65_EINJV2_SUPP BIT(30)
#define ACPI5_VENDOR_BIT BIT(31)
#define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
ACPI_EINJ_MEMORY_UNCORRECTABLE | \
@@ -145,13 +146,13 @@ static void einj_exec_ctx_init(struct apei_exec_context *ctx)
EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
}

-static int __einj_get_available_error_type(u32 *type)
+static int __einj_get_available_error_type(u32 *type, int version)
{
struct apei_exec_context ctx;
int rc;

einj_exec_ctx_init(&ctx);
- rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
+ rc = apei_exec_run(&ctx, version);
if (rc)
return rc;
*type = apei_exec_ctx_get_output(&ctx);
@@ -160,12 +161,12 @@ static int __einj_get_available_error_type(u32 *type)
}

/* Get error injection capabilities of the platform */
-static int einj_get_available_error_type(u32 *type)
+static int einj_get_available_error_type(u32 *type, int version)
{
int rc;

mutex_lock(&einj_mutex);
- rc = __einj_get_available_error_type(type);
+ rc = __einj_get_available_error_type(type, version);
mutex_unlock(&einj_mutex);

return rc;
@@ -221,9 +222,14 @@ static void check_vendor_extension(u64 paddr,

static void *einj_get_parameter_address(void)
{
- int i;
+ int i, rc;
u64 pa_v4 = 0, pa_v5 = 0;
struct acpi_whea_header *entry;
+ u32 error_type = 0;
+
+ rc = einj_get_available_error_type(&error_type, ACPI_EINJ_GET_ERROR_TYPE);
+ if (rc)
+ return NULL;

entry = EINJ_TAB_ENTRY(einj_tab);
for (i = 0; i < einj_tab->entries; i++) {
@@ -615,19 +621,35 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
{ BIT(17), "CXL.mem Protocol Uncorrectable fatal" },
{ BIT(31), "Vendor Defined Error Types" },
};
+static struct { u32 mask; const char *str; } const einjv2_error_type_string[] = {
+ { BIT(0), "EINJV2 Processor Error" },
+ { BIT(1), "EINJV2 Memory Error" },
+ { BIT(2), "EINJV2 PCI Express Error" },
+};

static int available_error_type_show(struct seq_file *m, void *v)
{
int rc;
u32 error_type = 0;

- rc = einj_get_available_error_type(&error_type);
+ rc = einj_get_available_error_type(&error_type, ACPI_EINJ_GET_ERROR_TYPE);
if (rc)
return rc;
for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
if (error_type & einj_error_type_string[pos].mask)
seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
einj_error_type_string[pos].str);
+ if (error_type & ACPI65_EINJV2_SUPP) {
+ rc = einj_get_available_error_type(&error_type, ACPI_EINJV2_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ seq_printf(m, "====================\n");
+ for (int pos = 0; pos < ARRAY_SIZE(einjv2_error_type_string); pos++)
+ if (error_type & einjv2_error_type_string[pos].mask)
+ seq_printf(m, "0x%08x\t%s\n", einjv2_error_type_string[pos].mask,
+ einjv2_error_type_string[pos].str);
+
+ }

return 0;
}
@@ -662,7 +684,8 @@ static int error_type_set(void *data, u64 val)
if (tval & (tval - 1))
return -EINVAL;
if (!vendor) {
- rc = einj_get_available_error_type(&available_error_type);
+ rc = einj_get_available_error_type(&available_error_type,
+ ACPI_EINJ_GET_ERROR_TYPE);
if (rc)
return rc;
if (!(val & available_error_type))
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index a33375e055ad..a07d564b0590 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1030,6 +1030,7 @@ enum acpi_einj_actions {
ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8,
ACPI_EINJ_GET_EXECUTE_TIMINGS = 9,
ACPI_EINJ_ACTION_RESERVED = 10, /* 10 and greater are reserved */
+ ACPI_EINJV2_GET_ERROR_TYPE = 0x11,
ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
};

--
2.34.1


2024-03-12 21:28:25

by Zaid Alali

[permalink] [raw]
Subject: [RFC PATCH 2/5] ACPI: APEI: EINJ: Add einjv2 extension struct

Proposed ACPI specifications(1) enables EINJv2 by extending
set_error_type_with_address strcut. This commit adds einjv2
extension struct and EINJv2 error types to prepare the driver
for EINJv2 support.

(1) https://bugzilla.tianocore.org/show_bug.cgi?id=4615

Signed-off-by: Zaid Alali <[email protected]>
---
drivers/acpi/apei/einj.c | 26 +++++++++++++++++++++++++-
1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 90efbcbf6b54..119f7accd1c9 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -43,6 +43,28 @@
*/
static int acpi5;

+struct syndrome_array {
+ union {
+ u32 acpi_id;
+ u32 device_id;
+ u32 pcie_sbdf;
+ u8 fru_id[16];
+ } comp_id;
+ union {
+ u32 proc_synd;
+ u32 mem_synd;
+ u32 pcie_synd;
+ u8 vendor_synd[16];
+ } comp_synd;
+};
+
+struct einjv2_extension_struct {
+ u32 length;
+ u16 revision;
+ u16 component_arr_count;
+ struct syndrome_array component_arr[];
+} __packed;
+
struct set_error_type_with_address {
u32 type;
u32 vendor_extension;
@@ -51,7 +73,9 @@ struct set_error_type_with_address {
u64 memory_address;
u64 memory_address_range;
u32 pcie_sbdf;
-};
+ struct einjv2_extension_struct einjv2_struct;
+} __packed;
+
enum {
SETWA_FLAGS_APICID = 1,
SETWA_FLAGS_MEM = 2,
--
2.34.1


2024-03-12 21:28:38

by Zaid Alali

[permalink] [raw]
Subject: [RFC PATCH 3/5] ACPI: APEI: EINJ: Add debugfs files for EINJv2 support

EINJv2 enables users to inject errors to multiple components/
devices at the same time. This commit creates a debugfs blob
file to be used for reading the user input for component array.

Signed-off-by: Zaid Alali <[email protected]>
---
drivers/acpi/apei/einj.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index 119f7accd1c9..ceac53aa0d3f 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -101,6 +101,10 @@ static struct debugfs_blob_wrapper vendor_blob;
static struct debugfs_blob_wrapper vendor_errors;
static char vendor_dev[64];

+static struct debugfs_blob_wrapper einjv2_component_arr;
+static u64 component_count;
+static void *user_input;
+
/*
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
* EINJ table through an unpublished extension. Use with caution as
@@ -810,6 +814,8 @@ static int __init einj_init(void)

einj_param = einj_get_parameter_address();
if ((param_extension || acpi5) && einj_param) {
+ u32 error_type;
+
debugfs_create_x32("flags", S_IRUSR | S_IWUSR, einj_debug_dir,
&error_flags);
debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir,
@@ -820,6 +826,25 @@ static int __init einj_init(void)
&error_param3);
debugfs_create_x64("param4", S_IRUSR | S_IWUSR, einj_debug_dir,
&error_param4);
+
+ rc = einj_get_available_error_type(&error_type, ACPI_EINJ_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+
+ if (error_type & ACPI65_EINJV2_SUPP) {
+ debugfs_create_x64("einjv2_component_count", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &component_count);
+ user_input = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!user_input) {
+ rc = -ENOMEM;
+ goto err_release;
+ }
+ einjv2_component_arr.data = user_input;
+ einjv2_component_arr.size = PAGE_SIZE;
+ debugfs_create_blob("einjv2_component_array", S_IRUSR | S_IWUSR,
+ einj_debug_dir, &einjv2_component_arr);
+ }
+
debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
einj_debug_dir, &notrigger);
}
@@ -871,6 +896,7 @@ static void __exit einj_exit(void)
apei_resources_fini(&einj_resources);
debugfs_remove_recursive(einj_debug_dir);
acpi_put_table((struct acpi_table_header *)einj_tab);
+ kfree(user_input);
}

module_init(einj_init);
--
2.34.1


2024-03-12 21:29:01

by Zaid Alali

[permalink] [raw]
Subject: [RFC PATCH 5/5] ACPI: APEI: EINJ: Update the documentation for EINJv2 support

Add documentation for the proposed ACPI specs for EINJv2(1)(2)

(1)https://bugzilla.tianocore.org/show_bug.cgi?id=4615
(2)https://bugzilla.tianocore.org/attachment.cgi?id=1446

Signed-off-by: Zaid Alali <[email protected]>
---
.../firmware-guide/acpi/apei/einj.rst | 44 ++++++++++++++++++-
1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/apei/einj.rst b/Documentation/firmware-guide/acpi/apei/einj.rst
index d6b61d22f525..7e5c3f71ccd1 100644
--- a/Documentation/firmware-guide/acpi/apei/einj.rst
+++ b/Documentation/firmware-guide/acpi/apei/einj.rst
@@ -57,8 +57,18 @@ The following files belong to it:
0x00000800 Platform Uncorrectable fatal
================ ===================================

+ ================ ===================================
+ Error Type Value Error Description
+ ================ ===================================
+ 0x00000001 EINJV2 Processor Error
+ 0x00000002 EINJV2 Memory Error
+ 0x00000004 EINJV2 PCI Express Error
+ ================ ===================================
+
The format of the file contents are as above, except present are only
- the available error types.
+ the available error types. The available Error types are discovered by
+ calling GET_ERROR_TYPE command, and if bit 30 is set in the returned
+ value, then EINJv2 is supported by the system.

- error_type

@@ -81,9 +91,11 @@ The following files belong to it:
Bit 0
Processor APIC field valid (see param3 below).
Bit 1
- Memory address and mask valid (param1 and param2).
+ Memory address and range valid (param1 and param2).
Bit 2
PCIe (seg,bus,dev,fn) valid (see param4 below).
+ Bit 3
+ EINJv2 extension structure is valid

If set to zero, legacy behavior is mimicked where the type of
injection specifies just one bit set, and param1 is multiplexed.
@@ -118,6 +130,17 @@ The following files belong to it:
this actually works depends on what operations the BIOS actually
includes in the trigger phase.

+- einjv2_component_count
+
+ The value from this file is used to set the "Component Array Count"
+ field of EINJv2 Extension Structure.
+
+- einjv2_component_array
+ The contents of this file are used to set the "Component Array" field
+ of the EINJv2 Extension Structure. The expected format is hex values
+ for component id and syndrom seperated by space, and multiple
+ components are seperated by new line.
+
BIOS versions based on the ACPI 4.0 specification have limited options
in controlling where the errors are injected. Your BIOS may support an
extension (enabled with the param_extension=1 module parameter, or boot
@@ -172,6 +195,23 @@ An error injection example::
# echo 0x8 > error_type # Choose correctable memory error
# echo 1 > error_inject # Inject now

+An EINJv2 error injection example::
+
+ # cd /sys/kernel/debug/apei/einj
+ # cat available_error_type # See which errors can be injected
+ 0x00000002 Processor Uncorrectable non-fatal
+ 0x00000008 Memory Correctable
+ 0x00000010 Memory Uncorrectable non-fatal
+ ==================
+ 0x00000001 EINJV2 Processor Error
+ 0x00000002 EINJV2 Memory Error
+
+ # echo 0x12345000 > param1 # Set memory address for injection
+ # echo 0xfffffffffffff000 > param2 # Range - anywhere in this page
+ # echo 0x2 > error_type # Choose EINJv2 memory error
+ # echo 0x8 > flags # set flags to indicate EINJv2
+ # echo 1 > error_inject # Inject now
+
You should see something like this in dmesg::

[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
--
2.34.1


2024-03-12 21:30:37

by Zaid Alali

[permalink] [raw]
Subject: [RFC PATCH 4/5] ACPI: APEI: EINJ: Enable EINJv2 error injections

This commit enable the driver to inject EINJv2 type errors.
The component array values are parsed from user_input and expected
to contain hex values for component id and syndrom seperated by
space, and multiple components are separated by new line.

for example:
component_id1 component_syndrom1
component_id2 component_syndrom2
:
component_id(n) component_syndrom(n)

Signed-off-by: Zaid Alali <[email protected]>
---
drivers/acpi/apei/einj.c | 89 ++++++++++++++++++++++++++++++++++++----
1 file changed, 80 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
index ceac53aa0d3f..9e31bf707ced 100644
--- a/drivers/acpi/apei/einj.c
+++ b/drivers/acpi/apei/einj.c
@@ -80,6 +80,13 @@ enum {
SETWA_FLAGS_APICID = 1,
SETWA_FLAGS_MEM = 2,
SETWA_FLAGS_PCIE_SBDF = 4,
+ SETWA_FLAGS_EINJV2 = 8,
+};
+
+enum {
+ EINJV2_PROCESSOR_ERROR = 0x1,
+ EINJV2_MEMORY_ERROR = 0x2,
+ EINJV2_PCIE_ERROR = 0x4,
};

/*
@@ -104,6 +111,7 @@ static char vendor_dev[64];
static struct debugfs_blob_wrapper einjv2_component_arr;
static u64 component_count;
static void *user_input;
+static int nr_components;

/*
* Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
@@ -275,11 +283,20 @@ static void *einj_get_parameter_address(void)
}
if (pa_v5) {
struct set_error_type_with_address *v5param;
-
v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
if (v5param) {
+ int offset, len;
+
acpi5 = 1;
check_vendor_extension(pa_v5, v5param);
+ if (error_type & ACPI65_EINJV2_SUPP) {
+ len = v5param->einjv2_struct.length;
+ offset = offsetof(struct einjv2_extension_struct, component_arr);
+ nr_components = (len-offset)/32;
+ acpi_os_unmap_iomem(v5param, sizeof(*v5param));
+ v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) + (
+ (nr_components) * sizeof(struct syndrome_array)));
+ }
return v5param;
}
}
@@ -485,10 +502,47 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
v5param->flags = vendor_flags;
} else if (flags) {
v5param->flags = flags;
- v5param->memory_address = param1;
- v5param->memory_address_range = param2;
- v5param->apicid = param3;
- v5param->pcie_sbdf = param4;
+ if (flags & SETWA_FLAGS_MEM) {
+ v5param->memory_address = param1;
+ v5param->memory_address_range = param2;
+ }
+ if (flags & SETWA_FLAGS_EINJV2) {
+ if (component_count > nr_components)
+ return -EINVAL;
+
+ v5param->einjv2_struct.component_arr_count = component_count;
+ int count = 0, bytes_read, pos = 0;
+ unsigned int comp, synd;
+ struct syndrome_array *component_arr;
+
+ component_arr = v5param->einjv2_struct.component_arr;
+ while (sscanf(user_input+pos, "%x %x\n%n", &comp, &synd,
+ &bytes_read) == 2) {
+ count++;
+ pos += bytes_read;
+ if (count > component_count)
+ return -EINVAL;
+
+ switch (type) {
+ case EINJV2_PROCESSOR_ERROR:
+ component_arr[count-1].comp_id.acpi_id = comp;
+ component_arr[count-1].comp_synd.proc_synd = synd;
+ break;
+ case EINJV2_MEMORY_ERROR:
+ component_arr[count-1].comp_id.device_id = comp;
+ component_arr[count-1].comp_synd.mem_synd = synd;
+ break;
+ case EINJV2_PCIE_ERROR:
+ component_arr[count-1].comp_id.pcie_sbdf = comp;
+ component_arr[count-1].comp_synd.pcie_synd = synd;
+ break;
+ }
+ }
+
+ } else {
+ v5param->apicid = param3;
+ v5param->pcie_sbdf = param4;
+ }
} else {
switch (type) {
case ACPI_EINJ_PROCESSOR_CORRECTABLE:
@@ -572,9 +626,19 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,

/* If user manually set "flags", make sure it is legal */
if (flags && (flags &
- ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
+ ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF|SETWA_FLAGS_EINJV2)))
return -EINVAL;

+ /*check if type is a valid EINJv2 error type*/
+ if (flags & SETWA_FLAGS_EINJV2) {
+ u32 error_type;
+
+ rc = einj_get_available_error_type(&error_type, ACPI_EINJV2_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ if (!(type & error_type))
+ return -EINVAL;
+ }
/*
* We need extra sanity checks for memory errors.
* Other types leap directly to injection.
@@ -694,7 +758,7 @@ static int error_type_get(void *data, u64 *val)
static int error_type_set(void *data, u64 val)
{
int rc;
- u32 available_error_type = 0;
+ u32 available_error_type = 0, available_error_type_v2 = 0;
u32 tval, vendor;

/* Only low 32 bits for error type are valid */
@@ -716,7 +780,13 @@ static int error_type_set(void *data, u64 val)
ACPI_EINJ_GET_ERROR_TYPE);
if (rc)
return rc;
- if (!(val & available_error_type))
+ if (available_error_type & ACPI65_EINJV2_SUPP) {
+ rc = einj_get_available_error_type(&available_error_type_v2,
+ ACPI_EINJV2_GET_ERROR_TYPE);
+ if (rc)
+ return rc;
+ }
+ if (!(val & (available_error_type | available_error_type_v2)))
return -EINVAL;
}
error_type = val;
@@ -886,7 +956,8 @@ static void __exit einj_exit(void)
sizeof(struct set_error_type_with_address) :
sizeof(struct einj_parameter);

- acpi_os_unmap_iomem(einj_param, size);
+ acpi_os_unmap_iomem(einj_param,
+ size+((nr_components) * sizeof(struct syndrome_array)));
if (vendor_errors.size)
acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
}
--
2.34.1


2024-05-14 18:44:29

by John Allen

[permalink] [raw]
Subject: Re: [RFC PATCH 3/5] ACPI: APEI: EINJ: Add debugfs files for EINJv2 support

On Tue, Mar 12, 2024 at 02:26:24PM -0700, Zaid Alali wrote:
> EINJv2 enables users to inject errors to multiple components/
> devices at the same time. This commit creates a debugfs blob

Drop "This commit" and write this using imperative mood (as a command).
For example, "Create a debugfs blob file to be used for reading the user
input for the component array".

> file to be used for reading the user input for component array.
>
> Signed-off-by: Zaid Alali <[email protected]>
> ---
> drivers/acpi/apei/einj.c | 26 ++++++++++++++++++++++++++
> 1 file changed, 26 insertions(+)
>
> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
> index 119f7accd1c9..ceac53aa0d3f 100644
> --- a/drivers/acpi/apei/einj.c
> +++ b/drivers/acpi/apei/einj.c
> @@ -101,6 +101,10 @@ static struct debugfs_blob_wrapper vendor_blob;
> static struct debugfs_blob_wrapper vendor_errors;
> static char vendor_dev[64];
>
> +static struct debugfs_blob_wrapper einjv2_component_arr;
> +static u64 component_count;
> +static void *user_input;
> +
> /*
> * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
> * EINJ table through an unpublished extension. Use with caution as
> @@ -810,6 +814,8 @@ static int __init einj_init(void)
>
> einj_param = einj_get_parameter_address();
> if ((param_extension || acpi5) && einj_param) {
> + u32 error_type;
> +
> debugfs_create_x32("flags", S_IRUSR | S_IWUSR, einj_debug_dir,
> &error_flags);
> debugfs_create_x64("param1", S_IRUSR | S_IWUSR, einj_debug_dir,
> @@ -820,6 +826,25 @@ static int __init einj_init(void)
> &error_param3);
> debugfs_create_x64("param4", S_IRUSR | S_IWUSR, einj_debug_dir,
> &error_param4);
> +
> + rc = einj_get_available_error_type(&error_type, ACPI_EINJ_GET_ERROR_TYPE);
> + if (rc)
> + return rc;
> +
> + if (error_type & ACPI65_EINJV2_SUPP) {
> + debugfs_create_x64("einjv2_component_count", S_IRUSR | S_IWUSR,
> + einj_debug_dir, &component_count);
> + user_input = kzalloc(PAGE_SIZE, GFP_KERNEL);

What is the reason for a PAGE_SIZE allocation here?

I would guess that a typical user will only supply a couple entries in
the component array. If this is x86 and PAGE_SIZE is 4k, that's probably
fine, but IIUC, ARM can have up to 64k pages which seems like a lot more
than is needed here. I would think that since this is architecture
independent ACPI code, we want to avoid using something architecture
dependent like page size here anyway.

I also think that while 4k may be fine (and usually overkill) in most
cases, it's much smaller than the maximum possible number of entries.
While uncommon, we might want to allow for a larger allocation while
still keeping the default allocation small. Maybe a module parameter
could be used to allocate a bigger file if needed?

Thanks,
John

> + if (!user_input) {
> + rc = -ENOMEM;
> + goto err_release;
> + }
> + einjv2_component_arr.data = user_input;
> + einjv2_component_arr.size = PAGE_SIZE;
> + debugfs_create_blob("einjv2_component_array", S_IRUSR | S_IWUSR,
> + einj_debug_dir, &einjv2_component_arr);
> + }
> +
> debugfs_create_x32("notrigger", S_IRUSR | S_IWUSR,
> einj_debug_dir, &notrigger);
> }
> @@ -871,6 +896,7 @@ static void __exit einj_exit(void)
> apei_resources_fini(&einj_resources);
> debugfs_remove_recursive(einj_debug_dir);
> acpi_put_table((struct acpi_table_header *)einj_tab);
> + kfree(user_input);
> }
>
> module_init(einj_init);
> --
> 2.34.1
>

2024-05-14 19:11:17

by John Allen

[permalink] [raw]
Subject: Re: [RFC PATCH 1/5] ACPI: APEI: EINJ: Enable the discovery of EINJv2 capabilities

On Tue, Mar 12, 2024 at 02:26:22PM -0700, Zaid Alali wrote:
> EINJv2 capabilities can be discovered by checking the return value
> of get_error_type, where bit 30 set indicates EINJv2 support. This
> commit enables the driver to show all supported error injections

Drop "This commit" and write this using imperative mood (as a command).
For this one, "Enable the driver to show all supported error injections
for EINJ and EINJv2 at the same time".

> for EINJ and EINJv2 at the same time.
>
> Signed-off-by: Zaid Alali <[email protected]>
> ---
> drivers/acpi/apei/einj.c | 37 ++++++++++++++++++++++++++++++-------
> include/acpi/actbl1.h | 1 +
> 2 files changed, 31 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
> index 89fb9331c611..90efbcbf6b54 100644
> --- a/drivers/acpi/apei/einj.c
> +++ b/drivers/acpi/apei/einj.c
> @@ -32,6 +32,7 @@
> #define SLEEP_UNIT_MAX 5000 /* 5ms */
> /* Firmware should respond within 1 seconds */
> #define FIRMWARE_TIMEOUT (1 * USEC_PER_SEC)
> +#define ACPI65_EINJV2_SUPP BIT(30)
> #define ACPI5_VENDOR_BIT BIT(31)
> #define MEM_ERROR_MASK (ACPI_EINJ_MEMORY_CORRECTABLE | \
> ACPI_EINJ_MEMORY_UNCORRECTABLE | \
> @@ -145,13 +146,13 @@ static void einj_exec_ctx_init(struct apei_exec_context *ctx)
> EINJ_TAB_ENTRY(einj_tab), einj_tab->entries);
> }
>
> -static int __einj_get_available_error_type(u32 *type)
> +static int __einj_get_available_error_type(u32 *type, int version)
> {
> struct apei_exec_context ctx;
> int rc;
>
> einj_exec_ctx_init(&ctx);
> - rc = apei_exec_run(&ctx, ACPI_EINJ_GET_ERROR_TYPE);
> + rc = apei_exec_run(&ctx, version);
> if (rc)
> return rc;
> *type = apei_exec_ctx_get_output(&ctx);
> @@ -160,12 +161,12 @@ static int __einj_get_available_error_type(u32 *type)
> }
>
> /* Get error injection capabilities of the platform */
> -static int einj_get_available_error_type(u32 *type)
> +static int einj_get_available_error_type(u32 *type, int version)
> {
> int rc;
>
> mutex_lock(&einj_mutex);
> - rc = __einj_get_available_error_type(type);
> + rc = __einj_get_available_error_type(type, version);
> mutex_unlock(&einj_mutex);
>
> return rc;
> @@ -221,9 +222,14 @@ static void check_vendor_extension(u64 paddr,
>
> static void *einj_get_parameter_address(void)
> {
> - int i;
> + int i, rc;
> u64 pa_v4 = 0, pa_v5 = 0;
> struct acpi_whea_header *entry;
> + u32 error_type = 0;
> +
> + rc = einj_get_available_error_type(&error_type, ACPI_EINJ_GET_ERROR_TYPE);
> + if (rc)
> + return NULL;
>
> entry = EINJ_TAB_ENTRY(einj_tab);
> for (i = 0; i < einj_tab->entries; i++) {
> @@ -615,19 +621,35 @@ static struct { u32 mask; const char *str; } const einj_error_type_string[] = {
> { BIT(17), "CXL.mem Protocol Uncorrectable fatal" },
> { BIT(31), "Vendor Defined Error Types" },
> };
> +static struct { u32 mask; const char *str; } const einjv2_error_type_string[] = {
> + { BIT(0), "EINJV2 Processor Error" },
> + { BIT(1), "EINJV2 Memory Error" },
> + { BIT(2), "EINJV2 PCI Express Error" },
> +};
>
> static int available_error_type_show(struct seq_file *m, void *v)
> {
> int rc;
> u32 error_type = 0;
>
> - rc = einj_get_available_error_type(&error_type);
> + rc = einj_get_available_error_type(&error_type, ACPI_EINJ_GET_ERROR_TYPE);
> if (rc)
> return rc;
> for (int pos = 0; pos < ARRAY_SIZE(einj_error_type_string); pos++)
> if (error_type & einj_error_type_string[pos].mask)
> seq_printf(m, "0x%08x\t%s\n", einj_error_type_string[pos].mask,
> einj_error_type_string[pos].str);
> + if (error_type & ACPI65_EINJV2_SUPP) {
> + rc = einj_get_available_error_type(&error_type, ACPI_EINJV2_GET_ERROR_TYPE);
> + if (rc)
> + return rc;
> + seq_printf(m, "====================\n");

Seems like if we're going to visually split the EINJ and EINJv2 cases,
rather than just splitting them with the above line, it might be better
to be more descriptive. For example:

# cat available_error_type
EINJ error types:
0x00000002 Processor Uncorrectable non-fatal
0x00000008 Memory Correctable
0x00000010 Memory Uncorrectable non-fatal
EINJv2 error types:
0x00000001 EINJV2 Processor Error
0x00000002 EINJV2 Memory Error

> + for (int pos = 0; pos < ARRAY_SIZE(einjv2_error_type_string); pos++)
> + if (error_type & einjv2_error_type_string[pos].mask)
> + seq_printf(m, "0x%08x\t%s\n", einjv2_error_type_string[pos].mask,
> + einjv2_error_type_string[pos].str);

When you break a long function call like this, the second line should
align below the first character after the parenthesis. For example:

seq_printf(m, "0x%08x\t%s\n", einjv2_error_type_string[pos].mask,
einjv2_error_type_string[pos].str);

There are a few other places in the series where alignment should be
fixed in this way as well.

Thanks,
John

> +
> + }
>
> return 0;
> }
> @@ -662,7 +684,8 @@ static int error_type_set(void *data, u64 val)
> if (tval & (tval - 1))
> return -EINVAL;
> if (!vendor) {
> - rc = einj_get_available_error_type(&available_error_type);
> + rc = einj_get_available_error_type(&available_error_type,
> + ACPI_EINJ_GET_ERROR_TYPE);
> if (rc)
> return rc;
> if (!(val & available_error_type))
> diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
> index a33375e055ad..a07d564b0590 100644
> --- a/include/acpi/actbl1.h
> +++ b/include/acpi/actbl1.h
> @@ -1030,6 +1030,7 @@ enum acpi_einj_actions {
> ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS = 8,
> ACPI_EINJ_GET_EXECUTE_TIMINGS = 9,
> ACPI_EINJ_ACTION_RESERVED = 10, /* 10 and greater are reserved */
> + ACPI_EINJV2_GET_ERROR_TYPE = 0x11,
> ACPI_EINJ_TRIGGER_ERROR = 0xFF /* Except for this value */
> };
>
> --
> 2.34.1
>

2024-05-14 19:35:35

by John Allen

[permalink] [raw]
Subject: Re: [RFC PATCH 4/5] ACPI: APEI: EINJ: Enable EINJv2 error injections

On Tue, Mar 12, 2024 at 02:26:25PM -0700, Zaid Alali wrote:
> This commit enable the driver to inject EINJv2 type errors.

Same with other commits, commit descriptions should be in imperative
mood. For this one I might say something like, "Support injecting EINJv2
type errors." or something along those lines.

> The component array values are parsed from user_input and expected
> to contain hex values for component id and syndrom seperated by

s/syndrom/syndrome

> space, and multiple components are separated by new line.
>
> for example:
> component_id1 component_syndrom1

s/syndrom/syndrome

Same for the below lines.

> component_id2 component_syndrom2
> :
> component_id(n) component_syndrom(n)

This interface seems a bit clunky, but I can't think of a way a better
way to do it right now. Can you provide an example of how a user would
write to the component array file? This would be a good addition to the
example in the documentation provided in patch 5/5.

>
> Signed-off-by: Zaid Alali <[email protected]>
> ---
> drivers/acpi/apei/einj.c | 89 ++++++++++++++++++++++++++++++++++++----
> 1 file changed, 80 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c
> index ceac53aa0d3f..9e31bf707ced 100644
> --- a/drivers/acpi/apei/einj.c
> +++ b/drivers/acpi/apei/einj.c
> @@ -80,6 +80,13 @@ enum {
> SETWA_FLAGS_APICID = 1,
> SETWA_FLAGS_MEM = 2,
> SETWA_FLAGS_PCIE_SBDF = 4,
> + SETWA_FLAGS_EINJV2 = 8,
> +};
> +
> +enum {
> + EINJV2_PROCESSOR_ERROR = 0x1,
> + EINJV2_MEMORY_ERROR = 0x2,
> + EINJV2_PCIE_ERROR = 0x4,
> };
>
> /*
> @@ -104,6 +111,7 @@ static char vendor_dev[64];
> static struct debugfs_blob_wrapper einjv2_component_arr;
> static u64 component_count;
> static void *user_input;
> +static int nr_components;
>
> /*
> * Some BIOSes allow parameters to the SET_ERROR_TYPE entries in the
> @@ -275,11 +283,20 @@ static void *einj_get_parameter_address(void)
> }
> if (pa_v5) {
> struct set_error_type_with_address *v5param;
> -
> v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param));
> if (v5param) {
> + int offset, len;
> +
> acpi5 = 1;
> check_vendor_extension(pa_v5, v5param);
> + if (error_type & ACPI65_EINJV2_SUPP) {
> + len = v5param->einjv2_struct.length;
> + offset = offsetof(struct einjv2_extension_struct, component_arr);
> + nr_components = (len-offset)/32;

Binary operators like '-' and '/' should have a single space on either
side:
nr_components = (len - offset) / 32;

Some places have this right, but there are a number of other places in
the series that should fix this as well.

> + acpi_os_unmap_iomem(v5param, sizeof(*v5param));
> + v5param = acpi_os_map_iomem(pa_v5, sizeof(*v5param) + (
> + (nr_components) * sizeof(struct syndrome_array)));
> + }
> return v5param;
> }
> }
> @@ -485,10 +502,47 @@ static int __einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
> v5param->flags = vendor_flags;
> } else if (flags) {
> v5param->flags = flags;
> - v5param->memory_address = param1;
> - v5param->memory_address_range = param2;
> - v5param->apicid = param3;
> - v5param->pcie_sbdf = param4;
> + if (flags & SETWA_FLAGS_MEM) {
> + v5param->memory_address = param1;
> + v5param->memory_address_range = param2;
> + }
> + if (flags & SETWA_FLAGS_EINJV2) {
> + if (component_count > nr_components)
> + return -EINVAL;
> +
> + v5param->einjv2_struct.component_arr_count = component_count;
> + int count = 0, bytes_read, pos = 0;
> + unsigned int comp, synd;
> + struct syndrome_array *component_arr;
> +
> + component_arr = v5param->einjv2_struct.component_arr;
> + while (sscanf(user_input+pos, "%x %x\n%n", &comp, &synd,
> + &bytes_read) == 2) {
> + count++;
> + pos += bytes_read;
> + if (count > component_count)
> + return -EINVAL;
> +
> + switch (type) {
> + case EINJV2_PROCESSOR_ERROR:
> + component_arr[count-1].comp_id.acpi_id = comp;
> + component_arr[count-1].comp_synd.proc_synd = synd;
> + break;
> + case EINJV2_MEMORY_ERROR:
> + component_arr[count-1].comp_id.device_id = comp;
> + component_arr[count-1].comp_synd.mem_synd = synd;
> + break;
> + case EINJV2_PCIE_ERROR:
> + component_arr[count-1].comp_id.pcie_sbdf = comp;
> + component_arr[count-1].comp_synd.pcie_synd = synd;
> + break;

Nesting is getting pretty deep here. A separate function for parsing the
component array could improve readability.

> + }
> + }
> +

Remove unneeded new line above.

Thanks,
John

> + } else {
> + v5param->apicid = param3;
> + v5param->pcie_sbdf = param4;
> + }
> } else {
> switch (type) {
> case ACPI_EINJ_PROCESSOR_CORRECTABLE:
> @@ -572,9 +626,19 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2,
>
> /* If user manually set "flags", make sure it is legal */
> if (flags && (flags &
> - ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF)))
> + ~(SETWA_FLAGS_APICID|SETWA_FLAGS_MEM|SETWA_FLAGS_PCIE_SBDF|SETWA_FLAGS_EINJV2)))
> return -EINVAL;
>
> + /*check if type is a valid EINJv2 error type*/
> + if (flags & SETWA_FLAGS_EINJV2) {
> + u32 error_type;
> +
> + rc = einj_get_available_error_type(&error_type, ACPI_EINJV2_GET_ERROR_TYPE);
> + if (rc)
> + return rc;
> + if (!(type & error_type))
> + return -EINVAL;
> + }
> /*
> * We need extra sanity checks for memory errors.
> * Other types leap directly to injection.
> @@ -694,7 +758,7 @@ static int error_type_get(void *data, u64 *val)
> static int error_type_set(void *data, u64 val)
> {
> int rc;
> - u32 available_error_type = 0;
> + u32 available_error_type = 0, available_error_type_v2 = 0;
> u32 tval, vendor;
>
> /* Only low 32 bits for error type are valid */
> @@ -716,7 +780,13 @@ static int error_type_set(void *data, u64 val)
> ACPI_EINJ_GET_ERROR_TYPE);
> if (rc)
> return rc;
> - if (!(val & available_error_type))
> + if (available_error_type & ACPI65_EINJV2_SUPP) {
> + rc = einj_get_available_error_type(&available_error_type_v2,
> + ACPI_EINJV2_GET_ERROR_TYPE);
> + if (rc)
> + return rc;
> + }
> + if (!(val & (available_error_type | available_error_type_v2)))
> return -EINVAL;
> }
> error_type = val;
> @@ -886,7 +956,8 @@ static void __exit einj_exit(void)
> sizeof(struct set_error_type_with_address) :
> sizeof(struct einj_parameter);
>
> - acpi_os_unmap_iomem(einj_param, size);
> + acpi_os_unmap_iomem(einj_param,
> + size+((nr_components) * sizeof(struct syndrome_array)));
> if (vendor_errors.size)
> acpi_os_unmap_memory(vendor_errors.data, vendor_errors.size);
> }
> --
> 2.34.1
>