From: Shengwei Luo <[email protected]>
The ARM processor error section includes several ARM processor error
information, several ARM processor context information and several
vendor specific error information structures. In addition to these
info, there are error severity and cpu logical index about the event.
Report all of these information to userspace via perf i/f.
Original-Author: Jason Tian <[email protected]>
Signed-off-by: Shengwei Luo <[email protected]>
---
v1->v2: Cleaned up ci warnings
---
drivers/acpi/apei/ghes.c | 3 +--
drivers/ras/ras.c | 46 ++++++++++++++++++++++++++++++++++++--
include/linux/ras.h | 15 +++++++++++--
include/ras/ras_event.h | 48 +++++++++++++++++++++++++++++++++++-----
4 files changed, 101 insertions(+), 11 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254..f824c26057b1 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -490,9 +490,8 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
int sec_sev, i;
char *p;
- log_arm_hw_error(err);
-
sec_sev = ghes_severity(gdata->error_severity);
+ log_arm_hw_error(err, sec_sev);
if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
return false;
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 95540ea8dd9d..2a7f424d59b9 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -21,9 +21,51 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
}
-void log_arm_hw_error(struct cper_sec_proc_arm *err)
+void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
{
- trace_arm_event(err);
+ u32 pei_len;
+ u32 ctx_len = 0;
+ s32 vsei_len;
+ u8 *pei_err;
+ u8 *ctx_err;
+ u8 *ven_err_data;
+ struct cper_arm_err_info *err_info;
+ struct cper_arm_ctx_info *ctx_info;
+ int n, sz;
+ int cpu;
+
+ pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num;
+ pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm);
+
+ err_info = (struct cper_arm_err_info *)(err + 1);
+ ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num);
+ ctx_err = (u8 *)ctx_info;
+ for (n = 0; n < err->context_info_num; n++) {
+ sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
+ ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
+ ctx_len += sz;
+ }
+
+ vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) +
+ pei_len + ctx_len);
+ if (vsei_len < 0) {
+ pr_warn(FW_BUG
+ "section length: %d\n", err->section_length);
+ pr_warn(FW_BUG
+ "section length is too small\n");
+ pr_warn(FW_BUG
+ "firmware-generated error record is incorrect\n");
+ vsei_len = 0;
+ }
+ ven_err_data = (u8 *)ctx_info;
+
+ cpu = GET_LOGICAL_INDEX(err->mpidr);
+ /* when return value is invalid, set cpu index to -1 */
+ if (cpu < 0)
+ cpu = -1;
+
+ trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len,
+ ven_err_data, (u32)vsei_len, sev, cpu);
}
static int __init ras_init(void)
diff --git a/include/linux/ras.h b/include/linux/ras.h
index 1f4048bf2674..4529775374d0 100644
--- a/include/linux/ras.h
+++ b/include/linux/ras.h
@@ -24,7 +24,7 @@ int __init parse_cec_param(char *str);
void log_non_standard_event(const guid_t *sec_type,
const guid_t *fru_id, const char *fru_text,
const u8 sev, const u8 *err, const u32 len);
-void log_arm_hw_error(struct cper_sec_proc_arm *err);
+void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev);
#else
static inline void
log_non_standard_event(const guid_t *sec_type,
@@ -32,7 +32,18 @@ log_non_standard_event(const guid_t *sec_type,
const u8 sev, const u8 *err, const u32 len)
{ return; }
static inline void
-log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
+log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; }
#endif
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
+#include <asm/smp_plat.h>
+/*
+ * Include ARM specific SMP header which provides a function mapping mpidr to
+ * cpu logical index.
+ */
+#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK)
+#else
+#define GET_LOGICAL_INDEX(mpidr) -EINVAL
+#endif /* CONFIG_ARM || CONFIG_ARM64 */
+
#endif /* __RAS_H__ */
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index d0337a41141c..92cfb61bdb20 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -168,11 +168,24 @@ TRACE_EVENT(mc_event,
* This event is generated when hardware detects an ARM processor error
* has occurred. UEFI 2.6 spec section N.2.4.4.
*/
+#define APEIL "ARM Processor Err Info data len"
+#define APEID "ARM Processor Err Info raw data"
+#define APECIL "ARM Processor Err Context Info data len"
+#define APECID "ARM Processor Err Context Info raw data"
+#define VSEIL "Vendor Specific Err Info data len"
+#define VSEID "Vendor Specific Err Info raw data"
TRACE_EVENT(arm_event,
- TP_PROTO(const struct cper_sec_proc_arm *proc),
+ TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err,
+ const u32 pei_len,
+ const u8 *ctx_err,
+ const u32 ctx_len,
+ const u8 *oem,
+ const u32 oem_len,
+ u8 sev,
+ int cpu),
- TP_ARGS(proc),
+ TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu),
TP_STRUCT__entry(
__field(u64, mpidr)
@@ -180,6 +193,14 @@ TRACE_EVENT(arm_event,
__field(u32, running_state)
__field(u32, psci_state)
__field(u8, affinity)
+ __field(u32, pei_len)
+ __dynamic_array(u8, buf, pei_len)
+ __field(u32, ctx_len)
+ __dynamic_array(u8, buf1, ctx_len)
+ __field(u32, oem_len)
+ __dynamic_array(u8, buf2, oem_len)
+ __field(u8, sev)
+ __field(int, cpu)
),
TP_fast_assign(
@@ -199,12 +220,29 @@ TRACE_EVENT(arm_event,
__entry->running_state = ~0;
__entry->psci_state = ~0;
}
+ __entry->pei_len = pei_len;
+ memcpy(__get_dynamic_array(buf), pei_err, pei_len);
+ __entry->ctx_len = ctx_len;
+ memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len);
+ __entry->oem_len = oem_len;
+ memcpy(__get_dynamic_array(buf2), oem, oem_len);
+ __entry->sev = sev;
+ __entry->cpu = cpu;
),
- TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
- "running state: %d; PSCI state: %d",
+ TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
+ "running state: %d; PSCI state: %d; "
+ "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s",
+ __entry->cpu,
+ __entry->sev,
__entry->affinity, __entry->mpidr, __entry->midr,
- __entry->running_state, __entry->psci_state)
+ __entry->running_state, __entry->psci_state,
+ APEIL, __entry->pei_len, APEID,
+ __print_hex(__get_dynamic_array(buf), __entry->pei_len),
+ APECIL, __entry->ctx_len, APECID,
+ __print_hex(__get_dynamic_array(buf1), __entry->ctx_len),
+ VSEIL, __entry->oem_len, VSEID,
+ __print_hex(__get_dynamic_array(buf2), __entry->oem_len))
);
/*
--
2.27.0
On January 26, 2022 3:09:06 AM UTC, [email protected] wrote:
>From: Shengwei Luo <[email protected]>
>
>The ARM processor error section includes several ARM processor error
>information, several ARM processor context information and several
>vendor specific error information structures. In addition to these
>info, there are error severity and cpu logical index about the event.
>Report all of these information to userspace via perf i/f.
A patch commit message needs to explain why a change is being done, not what is being done. The "what" I can see most of the time.
--
Sent from a small device: formatting sux and brevity is inevitable.
On Wed, Jan 26, 2022 at 11:09:06AM +0800, [email protected] wrote:
> From: Shengwei Luo <[email protected]>
>
> The ARM processor error section includes several ARM processor error
> information, several ARM processor context information and several
> vendor specific error information structures. In addition to these
> info, there are error severity and cpu logical index about the event.
> Report all of these information to userspace via perf i/f.
>
> Original-Author: Jason Tian <[email protected]>
> Signed-off-by: Shengwei Luo <[email protected]>
In addition to Boris's comments, "Original-Author" is not a proper tag, and so
this patch cannot be accepted as-is.
Please see:
https://www.kernel.org/doc/html/v4.17/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
Either:
* The original patch had a "Signed-off-by" tag, which you should have kept
as-is, and added your own.
* The original patch did not have a "Signed-off-by" tag, and we cannot accept
the patch.
It would be good to provide a link to the prior patch, ideally via
lore.kernel.org. For reference, this patch is:
https://lore.kernel.org/lkml/[email protected]/
It would also be good to Cc the relevant arm maintainers here (e.g James
Morse), since they're more likely to know about the arm side of things...
Thanks,
Mark.
> ---
> v1->v2: Cleaned up ci warnings
> ---
> drivers/acpi/apei/ghes.c | 3 +--
> drivers/ras/ras.c | 46 ++++++++++++++++++++++++++++++++++++--
> include/linux/ras.h | 15 +++++++++++--
> include/ras/ras_event.h | 48 +++++++++++++++++++++++++++++++++++-----
> 4 files changed, 101 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c5c9acc6254..f824c26057b1 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -490,9 +490,8 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
> int sec_sev, i;
> char *p;
>
> - log_arm_hw_error(err);
> -
> sec_sev = ghes_severity(gdata->error_severity);
> + log_arm_hw_error(err, sec_sev);
> if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE)
> return false;
>
> diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
> index 95540ea8dd9d..2a7f424d59b9 100644
> --- a/drivers/ras/ras.c
> +++ b/drivers/ras/ras.c
> @@ -21,9 +21,51 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
> trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
> }
>
> -void log_arm_hw_error(struct cper_sec_proc_arm *err)
> +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev)
> {
> - trace_arm_event(err);
> + u32 pei_len;
> + u32 ctx_len = 0;
> + s32 vsei_len;
> + u8 *pei_err;
> + u8 *ctx_err;
> + u8 *ven_err_data;
> + struct cper_arm_err_info *err_info;
> + struct cper_arm_ctx_info *ctx_info;
> + int n, sz;
> + int cpu;
> +
> + pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num;
> + pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm);
> +
> + err_info = (struct cper_arm_err_info *)(err + 1);
> + ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num);
> + ctx_err = (u8 *)ctx_info;
> + for (n = 0; n < err->context_info_num; n++) {
> + sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size;
> + ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz);
> + ctx_len += sz;
> + }
> +
> + vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) +
> + pei_len + ctx_len);
> + if (vsei_len < 0) {
> + pr_warn(FW_BUG
> + "section length: %d\n", err->section_length);
> + pr_warn(FW_BUG
> + "section length is too small\n");
> + pr_warn(FW_BUG
> + "firmware-generated error record is incorrect\n");
> + vsei_len = 0;
> + }
> + ven_err_data = (u8 *)ctx_info;
> +
> + cpu = GET_LOGICAL_INDEX(err->mpidr);
> + /* when return value is invalid, set cpu index to -1 */
> + if (cpu < 0)
> + cpu = -1;
> +
> + trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len,
> + ven_err_data, (u32)vsei_len, sev, cpu);
> }
>
> static int __init ras_init(void)
> diff --git a/include/linux/ras.h b/include/linux/ras.h
> index 1f4048bf2674..4529775374d0 100644
> --- a/include/linux/ras.h
> +++ b/include/linux/ras.h
> @@ -24,7 +24,7 @@ int __init parse_cec_param(char *str);
> void log_non_standard_event(const guid_t *sec_type,
> const guid_t *fru_id, const char *fru_text,
> const u8 sev, const u8 *err, const u32 len);
> -void log_arm_hw_error(struct cper_sec_proc_arm *err);
> +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev);
> #else
> static inline void
> log_non_standard_event(const guid_t *sec_type,
> @@ -32,7 +32,18 @@ log_non_standard_event(const guid_t *sec_type,
> const u8 sev, const u8 *err, const u32 len)
> { return; }
> static inline void
> -log_arm_hw_error(struct cper_sec_proc_arm *err) { return; }
> +log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; }
> #endif
>
> +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
> +#include <asm/smp_plat.h>
> +/*
> + * Include ARM specific SMP header which provides a function mapping mpidr to
> + * cpu logical index.
> + */
> +#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK)
> +#else
> +#define GET_LOGICAL_INDEX(mpidr) -EINVAL
> +#endif /* CONFIG_ARM || CONFIG_ARM64 */
> +
> #endif /* __RAS_H__ */
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index d0337a41141c..92cfb61bdb20 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -168,11 +168,24 @@ TRACE_EVENT(mc_event,
> * This event is generated when hardware detects an ARM processor error
> * has occurred. UEFI 2.6 spec section N.2.4.4.
> */
> +#define APEIL "ARM Processor Err Info data len"
> +#define APEID "ARM Processor Err Info raw data"
> +#define APECIL "ARM Processor Err Context Info data len"
> +#define APECID "ARM Processor Err Context Info raw data"
> +#define VSEIL "Vendor Specific Err Info data len"
> +#define VSEID "Vendor Specific Err Info raw data"
> TRACE_EVENT(arm_event,
>
> - TP_PROTO(const struct cper_sec_proc_arm *proc),
> + TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err,
> + const u32 pei_len,
> + const u8 *ctx_err,
> + const u32 ctx_len,
> + const u8 *oem,
> + const u32 oem_len,
> + u8 sev,
> + int cpu),
>
> - TP_ARGS(proc),
> + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu),
>
> TP_STRUCT__entry(
> __field(u64, mpidr)
> @@ -180,6 +193,14 @@ TRACE_EVENT(arm_event,
> __field(u32, running_state)
> __field(u32, psci_state)
> __field(u8, affinity)
> + __field(u32, pei_len)
> + __dynamic_array(u8, buf, pei_len)
> + __field(u32, ctx_len)
> + __dynamic_array(u8, buf1, ctx_len)
> + __field(u32, oem_len)
> + __dynamic_array(u8, buf2, oem_len)
> + __field(u8, sev)
> + __field(int, cpu)
> ),
>
> TP_fast_assign(
> @@ -199,12 +220,29 @@ TRACE_EVENT(arm_event,
> __entry->running_state = ~0;
> __entry->psci_state = ~0;
> }
> + __entry->pei_len = pei_len;
> + memcpy(__get_dynamic_array(buf), pei_err, pei_len);
> + __entry->ctx_len = ctx_len;
> + memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len);
> + __entry->oem_len = oem_len;
> + memcpy(__get_dynamic_array(buf2), oem, oem_len);
> + __entry->sev = sev;
> + __entry->cpu = cpu;
> ),
>
> - TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
> - "running state: %d; PSCI state: %d",
> + TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
> + "running state: %d; PSCI state: %d; "
> + "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s",
> + __entry->cpu,
> + __entry->sev,
> __entry->affinity, __entry->mpidr, __entry->midr,
> - __entry->running_state, __entry->psci_state)
> + __entry->running_state, __entry->psci_state,
> + APEIL, __entry->pei_len, APEID,
> + __print_hex(__get_dynamic_array(buf), __entry->pei_len),
> + APECIL, __entry->ctx_len, APECID,
> + __print_hex(__get_dynamic_array(buf1), __entry->ctx_len),
> + VSEIL, __entry->oem_len, VSEID,
> + __print_hex(__get_dynamic_array(buf2), __entry->oem_len))
> );
>
> /*
> --
> 2.27.0
>
On Mon, Feb 07, 2022 at 09:41:50AM +0800, lostway wrote:
> <html>
> <head>
> <meta http-equiv='Content-Type' content='text/html; charset=UTF-8'>
> </head>
> <body>
> <style>
> font{
> line-height: 1.6;
> }
> ul,ol{
> padding-left: 20px;
> list-style-position: inside;
> }
> </style>
> <div style = 'font-family:微软雅黑,Verdana,"Microsoft Yahei",SimSun,sans-serif;font-size:14px; line-height:1.6;'>
...
You need to fix your mail client not to send html crap:
Documentation/process/email-clients.rst
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette