To support GHES_ASSIST on Machine Check Architecture (MCA) error sources,
a set of GHES structures is provided by the system firmware for each MCA
error source. Each of these sets consists of a GHES structure for each MCA
bank on each logical CPU, with all structures of a set sharing a common
Related Source ID, equal to the Source ID of one of the MCA error source
structures.[1] On SOCs with large core counts, this typically equates to
tens of thousands of GHES_ASSIST structures for MCA under
"/sys/bus/platform/drivers/GHES".
Support for GHES_ASSIST however, hasn't been implemented in the kernel. As
such, the information provided through these structures is not consumed by
Linux. Moreover, these GHES_ASSIST structures for MCA, which are supposed
to provide supplemental information in context of an error reported by
hardware, are setup as independent error sources by the kernel during HEST
initialization.
Additionally, if the Type field of the Notification structure, associated
with these GHES_ASSIST structures for MCA, is set to Polled, the kernel
sets up a timer for each individual structure. The duration of the timer
is derived from the Poll Interval field of the Notification structure. On
SOCs with high core counts, this will result in tens of thousands of
timers expiring periodically causing unnecessary preemptions and wastage
of CPU cycles. The problem will particularly intensify if Poll Interval
duration is not sufficiently high.
Since GHES_ASSIST support is not present in kernel, skip initialization
of GHES_ASSIST structures for MCA to eliminate their performance impact.
[1] ACPI specification 6.5, section 18.7
Signed-off-by: Avadhut Naik <[email protected]>
---
drivers/acpi/apei/hest.c | 49 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 49 insertions(+)
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 6aef1ee5e1bd..03cb0ece4235 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hest_disable);
static struct acpi_table_hest *__read_mostly hest_tab;
+/*
+ * Since GHES_ASSIST is not supported, skip initialization
+ * of GHES_ASSIST structures for MCA.
+ * During HEST parsing, detected MCA error sources are cached.
+ * Flags and Source Id fields from these cached values are
+ * then referred to determine if the encountered GHES_ASSIST
+ * structure should be initialized.
+ */
+static struct {
+ struct acpi_hest_ia_corrected *cmc;
+ struct acpi_hest_ia_machine_check *mc;
+ struct acpi_hest_ia_deferred_check *dmc;
+} mces;
+
static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
[ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
[ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
@@ -70,22 +84,53 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
len = sizeof(*cmc) + cmc->num_hardware_banks *
sizeof(struct acpi_hest_ia_error_bank);
+ mces.cmc = cmc;
} else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
struct acpi_hest_ia_machine_check *mc;
mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
len = sizeof(*mc) + mc->num_hardware_banks *
sizeof(struct acpi_hest_ia_error_bank);
+ mces.mc = mc;
} else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) {
struct acpi_hest_ia_deferred_check *mc;
mc = (struct acpi_hest_ia_deferred_check *)hest_hdr;
len = sizeof(*mc) + mc->num_hardware_banks *
sizeof(struct acpi_hest_ia_error_bank);
+ mces.dmc = mc;
}
BUG_ON(len == -1);
return len;
};
+/* GHES and GHESv2 structures share the same format, starting from
+ * Source Id and ending in Error Status Block Length (inclusive).
+ */
+static bool is_ghes_assist_struct(struct acpi_hest_header *hest_hdr)
+{
+ struct acpi_hest_generic *ghes;
+ u16 related_source_id;
+
+ if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
+ hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
+ return false;
+
+ ghes = (struct acpi_hest_generic *)hest_hdr;
+ related_source_id = ghes->related_source_id;
+
+ if (mces.cmc && mces.cmc->flags & ACPI_HEST_GHES_ASSIST &&
+ related_source_id == mces.cmc->header.source_id)
+ return true;
+ else if (mces.dmc && mces.dmc->flags & ACPI_HEST_GHES_ASSIST &&
+ related_source_id == mces.dmc->header.source_id)
+ return true;
+ else if (mces.mc && mces.mc->flags & ACPI_HEST_GHES_ASSIST &&
+ related_source_id == mces.mc->header.source_id)
+ return true;
+
+ return false;
+}
+
typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
static int apei_hest_parse(apei_hest_func_t func, void *data)
@@ -113,6 +158,10 @@ static int apei_hest_parse(apei_hest_func_t func, void *data)
hest_hdr->source_id);
return -EINVAL;
}
+ if (is_ghes_assist_struct(hest_hdr)) {
+ hest_hdr = (void *)hest_hdr + len;
+ continue;
+ }
rc = func(hest_hdr, data);
if (rc)
--
2.34.1
On 9/28/23 2:42 PM, Avadhut Naik wrote:
> To support GHES_ASSIST on Machine Check Architecture (MCA) error sources,
> a set of GHES structures is provided by the system firmware for each MCA
> error source. Each of these sets consists of a GHES structure for each MCA
> bank on each logical CPU, with all structures of a set sharing a common
> Related Source ID, equal to the Source ID of one of the MCA error source
> structures.[1] On SOCs with large core counts, this typically equates to
> tens of thousands of GHES_ASSIST structures for MCA under
> "/sys/bus/platform/drivers/GHES".
>
> Support for GHES_ASSIST however, hasn't been implemented in the kernel. As
> such, the information provided through these structures is not consumed by
> Linux. Moreover, these GHES_ASSIST structures for MCA, which are supposed
> to provide supplemental information in context of an error reported by
> hardware, are setup as independent error sources by the kernel during HEST
> initialization.
>
> Additionally, if the Type field of the Notification structure, associated
> with these GHES_ASSIST structures for MCA, is set to Polled, the kernel
> sets up a timer for each individual structure. The duration of the timer
> is derived from the Poll Interval field of the Notification structure. On
> SOCs with high core counts, this will result in tens of thousands of
> timers expiring periodically causing unnecessary preemptions and wastage
> of CPU cycles. The problem will particularly intensify if Poll Interval
> duration is not sufficiently high.
>
> Since GHES_ASSIST support is not present in kernel, skip initialization
> of GHES_ASSIST structures for MCA to eliminate their performance impact.
>
> [1] ACPI specification 6.5, section 18.7
>
> Signed-off-by: Avadhut Naik <[email protected]>
> ---
> drivers/acpi/apei/hest.c | 49 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 49 insertions(+)
>
> diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
> index 6aef1ee5e1bd..03cb0ece4235 100644
> --- a/drivers/acpi/apei/hest.c
> +++ b/drivers/acpi/apei/hest.c
> @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hest_disable);
>
> static struct acpi_table_hest *__read_mostly hest_tab;
>
> +/*
> + * Since GHES_ASSIST is not supported, skip initialization
> + * of GHES_ASSIST structures for MCA.
> + * During HEST parsing, detected MCA error sources are cached.
> + * Flags and Source Id fields from these cached values are
> + * then referred to determine if the encountered GHES_ASSIST
> + * structure should be initialized.
> + */
> +static struct {
> + struct acpi_hest_ia_corrected *cmc;
> + struct acpi_hest_ia_machine_check *mc;
> + struct acpi_hest_ia_deferred_check *dmc;
> +} mces;
> +
> static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
> [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
> [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
> @@ -70,22 +84,53 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
> cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
> len = sizeof(*cmc) + cmc->num_hardware_banks *
> sizeof(struct acpi_hest_ia_error_bank);
> + mces.cmc = cmc;
> } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
> struct acpi_hest_ia_machine_check *mc;
> mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
> len = sizeof(*mc) + mc->num_hardware_banks *
> sizeof(struct acpi_hest_ia_error_bank);
> + mces.mc = mc;
> } else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) {
> struct acpi_hest_ia_deferred_check *mc;
> mc = (struct acpi_hest_ia_deferred_check *)hest_hdr;
> len = sizeof(*mc) + mc->num_hardware_banks *
> sizeof(struct acpi_hest_ia_error_bank);
> + mces.dmc = mc;
> }
> BUG_ON(len == -1);
>
> return len;
> };
>
> +/* GHES and GHESv2 structures share the same format, starting from
Please follow the general kernel multi-line comment style here.
> + * Source Id and ending in Error Status Block Length (inclusive).
> + */
> +static bool is_ghes_assist_struct(struct acpi_hest_header *hest_hdr)
> +{
> + struct acpi_hest_generic *ghes;
> + u16 related_source_id;
> +
> + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
> + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
> + return false;
> +
> + ghes = (struct acpi_hest_generic *)hest_hdr;
> + related_source_id = ghes->related_source_id;
> +
> + if (mces.cmc && mces.cmc->flags & ACPI_HEST_GHES_ASSIST &&
> + related_source_id == mces.cmc->header.source_id)
IMO, there should be parentheses around statements with multiple terms
to improve clarity.
if (mces.cmc && (mces.cmc->flags & ACPI_HEST_GHES_ASSIST) &&
(related_source_id == mces.cmc->header.source_id))
> + return true;
> + else if (mces.dmc && mces.dmc->flags & ACPI_HEST_GHES_ASSIST &&
The "else" is not needed, because of the return.
IOW, use "else if" when you want to skip a check if the previous "if"
was true. Here the check is already skipped if the previous "if" was
true and did a return.
Also, the order of the types is "cmc, mc, dmc" in the struct definition
and hest_esrc_len(). But here it is "cmc, dmc, mc". This is just a minor
nit about consistency in ordering.
> + related_source_id == mces.dmc->header.source_id)
> + return true;
> + else if (mces.mc && mces.mc->flags & ACPI_HEST_GHES_ASSIST &&
> + related_source_id == mces.mc->header.source_id)
> + return true;
> +
> + return false;
> +}
> +
> typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
>
> static int apei_hest_parse(apei_hest_func_t func, void *data)
> @@ -113,6 +158,10 @@ static int apei_hest_parse(apei_hest_func_t func, void *data)
> hest_hdr->source_id);
> return -EINVAL;
> }
Newline here please.
> + if (is_ghes_assist_struct(hest_hdr)) {
> + hest_hdr = (void *)hest_hdr + len;
> + continue;
> + }
>
> rc = func(hest_hdr, data);
> if (rc)
Besides the minor formatting comments, this looks good to me.
Reviewed-by: Yazen Ghannam <[email protected]>
Thanks,
Yazen
Hi,
On 9/29/2023 09:40, Yazen Ghannam wrote:
> On 9/28/23 2:42 PM, Avadhut Naik wrote:
>> To support GHES_ASSIST on Machine Check Architecture (MCA) error sources,
>> a set of GHES structures is provided by the system firmware for each MCA
>> error source. Each of these sets consists of a GHES structure for each MCA
>> bank on each logical CPU, with all structures of a set sharing a common
>> Related Source ID, equal to the Source ID of one of the MCA error source
>> structures.[1] On SOCs with large core counts, this typically equates to
>> tens of thousands of GHES_ASSIST structures for MCA under
>> "/sys/bus/platform/drivers/GHES".
>>
>> Support for GHES_ASSIST however, hasn't been implemented in the kernel. As
>> such, the information provided through these structures is not consumed by
>> Linux. Moreover, these GHES_ASSIST structures for MCA, which are supposed
>> to provide supplemental information in context of an error reported by
>> hardware, are setup as independent error sources by the kernel during HEST
>> initialization.
>>
>> Additionally, if the Type field of the Notification structure, associated
>> with these GHES_ASSIST structures for MCA, is set to Polled, the kernel
>> sets up a timer for each individual structure. The duration of the timer
>> is derived from the Poll Interval field of the Notification structure. On
>> SOCs with high core counts, this will result in tens of thousands of
>> timers expiring periodically causing unnecessary preemptions and wastage
>> of CPU cycles. The problem will particularly intensify if Poll Interval
>> duration is not sufficiently high.
>>
>> Since GHES_ASSIST support is not present in kernel, skip initialization
>> of GHES_ASSIST structures for MCA to eliminate their performance impact.
>>
>> [1] ACPI specification 6.5, section 18.7
>>
>> Signed-off-by: Avadhut Naik <[email protected]>
>> ---
>> drivers/acpi/apei/hest.c | 49 ++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 49 insertions(+)
>>
>> diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
>> index 6aef1ee5e1bd..03cb0ece4235 100644
>> --- a/drivers/acpi/apei/hest.c
>> +++ b/drivers/acpi/apei/hest.c
>> @@ -37,6 +37,20 @@ EXPORT_SYMBOL_GPL(hest_disable);
>>
>> static struct acpi_table_hest *__read_mostly hest_tab;
>>
>> +/*
>> + * Since GHES_ASSIST is not supported, skip initialization
>> + * of GHES_ASSIST structures for MCA.
>> + * During HEST parsing, detected MCA error sources are cached.
>> + * Flags and Source Id fields from these cached values are
>> + * then referred to determine if the encountered GHES_ASSIST
>> + * structure should be initialized.
>> + */
>> +static struct {
>> + struct acpi_hest_ia_corrected *cmc;
>> + struct acpi_hest_ia_machine_check *mc;
>> + struct acpi_hest_ia_deferred_check *dmc;
>> +} mces;
>> +
>> static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = {
>> [ACPI_HEST_TYPE_IA32_CHECK] = -1, /* need further calculation */
>> [ACPI_HEST_TYPE_IA32_CORRECTED_CHECK] = -1,
>> @@ -70,22 +84,53 @@ static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
>> cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
>> len = sizeof(*cmc) + cmc->num_hardware_banks *
>> sizeof(struct acpi_hest_ia_error_bank);
>> + mces.cmc = cmc;
>> } else if (hest_type == ACPI_HEST_TYPE_IA32_CHECK) {
>> struct acpi_hest_ia_machine_check *mc;
>> mc = (struct acpi_hest_ia_machine_check *)hest_hdr;
>> len = sizeof(*mc) + mc->num_hardware_banks *
>> sizeof(struct acpi_hest_ia_error_bank);
>> + mces.mc = mc;
>> } else if (hest_type == ACPI_HEST_TYPE_IA32_DEFERRED_CHECK) {
>> struct acpi_hest_ia_deferred_check *mc;
>> mc = (struct acpi_hest_ia_deferred_check *)hest_hdr;
>> len = sizeof(*mc) + mc->num_hardware_banks *
>> sizeof(struct acpi_hest_ia_error_bank);
>> + mces.dmc = mc;
>> }
>> BUG_ON(len == -1);
>>
>> return len;
>> };
>>
>> +/* GHES and GHESv2 structures share the same format, starting from
>
> Please follow the general kernel multi-line comment style here.
>
Will do.
>> + * Source Id and ending in Error Status Block Length (inclusive).
>> + */
>> +static bool is_ghes_assist_struct(struct acpi_hest_header *hest_hdr)
>> +{
>> + struct acpi_hest_generic *ghes;
>> + u16 related_source_id;
>> +
>> + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
>> + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
>> + return false;
>> +
>> + ghes = (struct acpi_hest_generic *)hest_hdr;
>> + related_source_id = ghes->related_source_id;
>> +
>> + if (mces.cmc && mces.cmc->flags & ACPI_HEST_GHES_ASSIST &&
>> + related_source_id == mces.cmc->header.source_id)
>
> IMO, there should be parentheses around statements with multiple terms
> to improve clarity.
>
Had tried that but reverted since checkpatch flags them as CHECK. Something
like below:
CHECK: Unnecessary parentheses around 'related_source_id == mces.cmc->header.source_id'
#107: FILE: drivers/acpi/apei/hest.c:122:
+ if (mces.cmc && (mces.cmc->flags & ACPI_HEST_GHES_ASSIST) &&
+ (related_source_id == mces.cmc->header.source_id))
> if (mces.cmc && (mces.cmc->flags & ACPI_HEST_GHES_ASSIST) &&
> (related_source_id == mces.cmc->header.source_id))
>
>> + return true;
>> + else if (mces.dmc && mces.dmc->flags & ACPI_HEST_GHES_ASSIST &&
>
> The "else" is not needed, because of the return.
>
> IOW, use "else if" when you want to skip a check if the previous "if"
> was true. Here the check is already skipped if the previous "if" was
> true and did a return.
>
> Also, the order of the types is "cmc, mc, dmc" in the struct definition
> and hest_esrc_len(). But here it is "cmc, dmc, mc". This is just a minor
> nit about consistency in ordering.
>
Understood.
Initially, was using booleans to know which conditions are true and
return appropriately at the end of the function. Hence, the else ifs.
Missed remvoving them. Will do so.
Will also change the order to match struct definition and hest_esrc_len().
>> + related_source_id == mces.dmc->header.source_id)
>> + return true;
>> + else if (mces.mc && mces.mc->flags & ACPI_HEST_GHES_ASSIST &&
>> + related_source_id == mces.mc->header.source_id)
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data);
>>
>> static int apei_hest_parse(apei_hest_func_t func, void *data)
>> @@ -113,6 +158,10 @@ static int apei_hest_parse(apei_hest_func_t func, void *data)
>> hest_hdr->source_id);
>> return -EINVAL;
>> }
>
> Newline here please.
>
Will do.
>> + if (is_ghes_assist_struct(hest_hdr)) {
>> + hest_hdr = (void *)hest_hdr + len;
>> + continue;
>> + }
>>
>> rc = func(hest_hdr, data);
>> if (rc)
>
> Besides the minor formatting comments, this looks good to me.
>
> Reviewed-by: Yazen Ghannam <[email protected]>
>
> Thanks,
> Yazen
--
Thanks,
Avadhut Naik