CPER records describing a firmware-first error are identified by GUID.
The ghes driver currently logs, but ignores any unknown CPER records.
This prevents describing errors that can't be represented by a standard
entry, that would otherwise allow a driver to recover from an error.
The UEFI spec calls these 'Non-standard Section Body' (N.2.3 of
version 2.8).
Add a notifier chain for these non-standard/vendor-records. Callers
must identify their type of records by GUID.
Record data is copied to memory from the ghes_estatus_pool to allow
us to keep it until after the notifier has run.
Signed-off-by: Shiju Jose <[email protected]>
[ Removed kfifo and ghes_gdata_pool. Expanded commit message ]
Signed-off-by: James Morse <[email protected]>
---
drivers/acpi/apei/ghes.c | 63 ++++++++++++++++++++++++++++++++++++++++
include/acpi/ghes.h | 27 +++++++++++++++++
2 files changed, 90 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 81bf71b10d44..99df00f64306 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -79,6 +79,12 @@
((struct acpi_hest_generic_status *) \
((struct ghes_estatus_node *)(estatus_node) + 1))
+#define GHES_VENDOR_ENTRY_LEN(gdata_len) \
+ (sizeof(struct ghes_vendor_record_entry) + (gdata_len))
+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \
+ ((struct acpi_hest_generic_data *) \
+ ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
+
/*
* NMI-like notifications vary by architecture, before the compiler can prune
* unused static functions it needs a value for these enums.
@@ -123,6 +129,12 @@ static DEFINE_MUTEX(ghes_list_mutex);
*/
static DEFINE_SPINLOCK(ghes_notify_lock_irq);
+struct ghes_vendor_record_entry {
+ struct work_struct work;
+ int error_severity;
+ char vendor_record[];
+};
+
static struct gen_pool *ghes_estatus_pool;
static unsigned long ghes_estatus_pool_size_request;
@@ -511,6 +523,56 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
#endif
}
+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
+
+int ghes_register_vendor_record_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
+
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
+}
+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
+
+static void ghes_vendor_record_work_func(struct work_struct *work)
+{
+ struct ghes_vendor_record_entry *entry;
+ struct acpi_hest_generic_data *gdata;
+ u32 len;
+
+ entry = container_of(work, struct ghes_vendor_record_entry, work);
+ gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+
+ blocking_notifier_call_chain(&vendor_record_notify_list,
+ entry->error_severity, gdata);
+
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+ gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len);
+}
+
+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
+ int sev)
+{
+ struct acpi_hest_generic_data *copied_gdata;
+ struct ghes_vendor_record_entry *entry;
+ u32 len;
+
+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
+ entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
+ if (!entry)
+ return;
+
+ copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
+ memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
+ entry->error_severity = sev;
+
+ INIT_WORK(&entry->work, ghes_vendor_record_work_func);
+ schedule_work(&entry->work);
+}
+
static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -549,6 +611,7 @@ static bool ghes_do_proc(struct ghes *ghes,
} else {
void *err = acpi_hest_get_payload(gdata);
+ ghes_defer_non_standard_event(gdata, sev);
log_non_standard_event(sec_type, fru_id, fru_text,
sec_sev, err,
gdata->error_data_length);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 517a5231cc1b..ae0e8847fdd5 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -53,6 +53,33 @@ enum {
GHES_SEV_PANIC = 0x3,
};
+#ifdef CONFIG_ACPI_APEI_GHES
+/**
+ * ghes_register_vendor_record_notifier - register a notifier for vendor
+ * records that the kernel would otherwise ignore.
+ * @nb: pointer to the notifier_block structure of the event handler.
+ *
+ * return 0 : SUCCESS, non-zero : FAIL
+ */
+int ghes_register_vendor_record_notifier(struct notifier_block *nb);
+
+/**
+ * ghes_unregister_vendor_record_notifier - unregister the previously
+ * registered vendor record notifier.
+ * @nb: pointer to the notifier_block structure of the vendor record handler.
+ */
+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
+#else
+static inline int ghes_register_vendor_record_notifier(struct notifier_block *nb)
+{
+ return -ENODEV;
+}
+
+static inline void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
+{
+}
+#endif
+
int ghes_estatus_pool_init(int num_ghes);
/* From drivers/edac/ghes_edac.c */
--
2.17.1
Hi Rafael, Hi James,
Can you help to merge this patch because I added and tested all the suggestions from James.
Thanks,
Shiju
>-----Original Message-----
>From: [email protected] [mailto:linux-pci-
>[email protected]] On Behalf Of Shiju Jose
>Sent: 22 June 2020 13:05
>To: [email protected]; [email protected]; linux-
>[email protected]; [email protected]; [email protected];
>[email protected]; [email protected]; [email protected];
>[email protected]; [email protected];
>[email protected]; [email protected];
>Wangkefeng (OS Kernel Lab) <[email protected]>;
>[email protected]
>Cc: Linuxarm <[email protected]>; yangyicong
><[email protected]>; Jonathan Cameron
><[email protected]>; tanxiaofei <[email protected]>
>Subject: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
>(vendor) CPER records
>
>CPER records describing a firmware-first error are identified by GUID.
>The ghes driver currently logs, but ignores any unknown CPER records.
>This prevents describing errors that can't be represented by a standard entry,
>that would otherwise allow a driver to recover from an error.
>The UEFI spec calls these 'Non-standard Section Body' (N.2.3 of version 2.8).
>
>Add a notifier chain for these non-standard/vendor-records. Callers must
>identify their type of records by GUID.
>
>Record data is copied to memory from the ghes_estatus_pool to allow us to
>keep it until after the notifier has run.
>
>Signed-off-by: Shiju Jose <[email protected]> [ Removed kfifo and
>ghes_gdata_pool. Expanded commit message ]
>Signed-off-by: James Morse <[email protected]>
>---
> drivers/acpi/apei/ghes.c | 63
>++++++++++++++++++++++++++++++++++++++++
> include/acpi/ghes.h | 27 +++++++++++++++++
> 2 files changed, 90 insertions(+)
>
>diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
>81bf71b10d44..99df00f64306 100644
>--- a/drivers/acpi/apei/ghes.c
>+++ b/drivers/acpi/apei/ghes.c
>@@ -79,6 +79,12 @@
> ((struct acpi_hest_generic_status *) \
> ((struct ghes_estatus_node *)(estatus_node) + 1))
>
>+#define GHES_VENDOR_ENTRY_LEN(gdata_len) \
>+ (sizeof(struct ghes_vendor_record_entry) + (gdata_len))
>+#define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \
>+ ((struct acpi_hest_generic_data *) \
>+ ((struct ghes_vendor_record_entry *)(vendor_entry) + 1))
>+
> /*
> * NMI-like notifications vary by architecture, before the compiler can prune
> * unused static functions it needs a value for these enums.
>@@ -123,6 +129,12 @@ static DEFINE_MUTEX(ghes_list_mutex);
> */
> static DEFINE_SPINLOCK(ghes_notify_lock_irq);
>
>+struct ghes_vendor_record_entry {
>+ struct work_struct work;
>+ int error_severity;
>+ char vendor_record[];
>+};
>+
> static struct gen_pool *ghes_estatus_pool; static unsigned long
>ghes_estatus_pool_size_request;
>
>@@ -511,6 +523,56 @@ static void ghes_handle_aer(struct
>acpi_hest_generic_data *gdata) #endif }
>
>+static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list);
>+
>+int ghes_register_vendor_record_notifier(struct notifier_block *nb) {
>+ return blocking_notifier_chain_register(&vendor_record_notify_list,
>+nb); } EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier);
>+
>+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb)
>+{
>+ blocking_notifier_chain_unregister(&vendor_record_notify_list, nb);
>}
>+EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier);
>+
>+static void ghes_vendor_record_work_func(struct work_struct *work) {
>+ struct ghes_vendor_record_entry *entry;
>+ struct acpi_hest_generic_data *gdata;
>+ u32 len;
>+
>+ entry = container_of(work, struct ghes_vendor_record_entry, work);
>+ gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
>+
>+ blocking_notifier_call_chain(&vendor_record_notify_list,
>+ entry->error_severity, gdata);
>+
>+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
>+ gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); }
>+
>+static void ghes_defer_non_standard_event(struct acpi_hest_generic_data
>*gdata,
>+ int sev)
>+{
>+ struct acpi_hest_generic_data *copied_gdata;
>+ struct ghes_vendor_record_entry *entry;
>+ u32 len;
>+
>+ len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata));
>+ entry = (void *)gen_pool_alloc(ghes_estatus_pool, len);
>+ if (!entry)
>+ return;
>+
>+ copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry);
>+ memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata));
>+ entry->error_severity = sev;
>+
>+ INIT_WORK(&entry->work, ghes_vendor_record_work_func);
>+ schedule_work(&entry->work);
>+}
>+
> static bool ghes_do_proc(struct ghes *ghes,
> const struct acpi_hest_generic_status *estatus) {
>@@ -549,6 +611,7 @@ static bool ghes_do_proc(struct ghes *ghes,
> } else {
> void *err = acpi_hest_get_payload(gdata);
>
>+ ghes_defer_non_standard_event(gdata, sev);
> log_non_standard_event(sec_type, fru_id, fru_text,
> sec_sev, err,
> gdata->error_data_length);
>diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index
>517a5231cc1b..ae0e8847fdd5 100644
>--- a/include/acpi/ghes.h
>+++ b/include/acpi/ghes.h
>@@ -53,6 +53,33 @@ enum {
> GHES_SEV_PANIC = 0x3,
> };
>
>+#ifdef CONFIG_ACPI_APEI_GHES
>+/**
>+ * ghes_register_vendor_record_notifier - register a notifier for
>+vendor
>+ * records that the kernel would otherwise ignore.
>+ * @nb: pointer to the notifier_block structure of the event handler.
>+ *
>+ * return 0 : SUCCESS, non-zero : FAIL
>+ */
>+int ghes_register_vendor_record_notifier(struct notifier_block *nb);
>+
>+/**
>+ * ghes_unregister_vendor_record_notifier - unregister the previously
>+ * registered vendor record notifier.
>+ * @nb: pointer to the notifier_block structure of the vendor record
>handler.
>+ */
>+void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
>+#else static inline int ghes_register_vendor_record_notifier(struct
>+notifier_block *nb) {
>+ return -ENODEV;
>+}
>+
>+static inline void ghes_unregister_vendor_record_notifier(struct
>+notifier_block *nb) { } #endif
>+
> int ghes_estatus_pool_init(int num_ghes);
>
> /* From drivers/edac/ghes_edac.c */
>--
>2.17.1
>
On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <[email protected]> wrote:
>
> Hi Rafael, Hi James,
>
> Can you help to merge this patch because I added and tested all the suggestions from James.
I could apply the [1/2] in principle, but I need an ACK for the [2/2]
from the PCI side.
That said, it looks like the [1/2] is a James' patch that you are
sending with some changes made by you.
In that case the ordering of the S-o-b tags under it should be
different (the S-o-b from James, the what-you-have-change line and the
S-o-b from you) and also the From: tag should point to James.
Thanks!
Hi Rafael,
>-----Original Message-----
>From: Rafael J. Wysocki [mailto:[email protected]]
>Sent: 13 July 2020 12:18
>To: Shiju Jose <[email protected]>
>Cc: [email protected]; [email protected]; linux-
>[email protected]; [email protected]; [email protected];
>[email protected]; [email protected]; [email protected];
>[email protected]; [email protected];
>[email protected]; [email protected];
>Wangkefeng (OS Kernel Lab) <[email protected]>;
>[email protected]; Linuxarm <[email protected]>; yangyicong
><[email protected]>; Jonathan Cameron
><[email protected]>; tanxiaofei <[email protected]>
>Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
>(vendor) CPER records
>
>On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <[email protected]>
>wrote:
>>
>> Hi Rafael, Hi James,
>>
>> Can you help to merge this patch because I added and tested all the
>suggestions from James.
>
>I could apply the [1/2] in principle, but I need an ACK for the [2/2] from the
>PCI side.
>
>That said, it looks like the [1/2] is a James' patch that you are sending with
>some changes made by you.
James added following changes on top of the original patch(V10) by me,
[ Removed kfifo and ghes_gdata_pool. Expanded commit message ]
I had confusion how the S-o-b tag to be added for James's changes in the V11 patch posted.
>
>In that case the ordering of the S-o-b tags under it should be different (the S-
>o-b from James, the what-you-have-change line and the S-o-b from you) and
>also the From: tag should point to James.
>
>Thanks!
Thanks,
Shiju
On Mon, Jul 13, 2020 at 3:33 PM Shiju Jose <[email protected]> wrote:
>
> Hi Rafael,
>
> >-----Original Message-----
> >From: Rafael J. Wysocki [mailto:[email protected]]
> >Sent: 13 July 2020 12:18
> >To: Shiju Jose <[email protected]>
> >Cc: [email protected]; [email protected]; linux-
> >[email protected]; [email protected]; [email protected];
> >[email protected]; [email protected]; [email protected];
> >[email protected]; [email protected];
> >[email protected]; [email protected];
> >Wangkefeng (OS Kernel Lab) <[email protected]>;
> >[email protected]; Linuxarm <[email protected]>; yangyicong
> ><[email protected]>; Jonathan Cameron
> ><[email protected]>; tanxiaofei <[email protected]>
> >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
> >(vendor) CPER records
> >
> >On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <[email protected]>
> >wrote:
> >>
> >> Hi Rafael, Hi James,
> >>
> >> Can you help to merge this patch because I added and tested all the
> >suggestions from James.
> >
> >I could apply the [1/2] in principle, but I need an ACK for the [2/2] from the
> >PCI side.
> >
> >That said, it looks like the [1/2] is a James' patch that you are sending with
> >some changes made by you.
> James added following changes on top of the original patch(V10) by me,
> [ Removed kfifo and ghes_gdata_pool. Expanded commit message ]
> I had confusion how the S-o-b tag to be added for James's changes in the V11 patch posted.
So James should have sent the patch with his S-o-b under it.
You cannot add S-o-b for somebody else to any patches. You can only
add your S-o-b to somebody else's patch if you have made any changes
on top of the original.
In case you want to make a record of somebody else's contribution to
your patch, you can use the Co-developed-by tag.
Thanks!
Hi Rafael,
>-----Original Message-----
>From: [email protected] [mailto:linux-pci-
>[email protected]] On Behalf Of Rafael J. Wysocki
>Sent: 13 July 2020 14:38
>To: Shiju Jose <[email protected]>
>Cc: Rafael J. Wysocki <[email protected]>; [email protected]; linux-
>[email protected]; [email protected]; [email protected];
>[email protected]; [email protected]; [email protected];
>[email protected]; [email protected]; [email protected];
>[email protected]; [email protected];
>Wangkefeng (OS Kernel Lab) <[email protected]>;
>[email protected]; Linuxarm <[email protected]>; yangyicong
><[email protected]>; Jonathan Cameron
><[email protected]>; tanxiaofei <[email protected]>
>Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for unknown
>(vendor) CPER records
>
>On Mon, Jul 13, 2020 at 3:33 PM Shiju Jose <[email protected]> wrote:
>>
>> Hi Rafael,
>>
>> >-----Original Message-----
>> >From: Rafael J. Wysocki [mailto:[email protected]]
>> >Sent: 13 July 2020 12:18
>> >To: Shiju Jose <[email protected]>
>> >Cc: [email protected]; [email protected]; linux-
>> >[email protected]; [email protected]; [email protected];
>> >[email protected]; [email protected]; [email protected];
>> >[email protected]; [email protected];
>> >[email protected]; [email protected];
>> >Wangkefeng (OS Kernel Lab) <[email protected]>;
>> >[email protected]; Linuxarm <[email protected]>; yangyicong
>> ><[email protected]>; Jonathan Cameron
>> ><[email protected]>; tanxiaofei <[email protected]>
>> >Subject: Re: [PATCH v11 1/2] ACPI / APEI: Add a notifier chain for
>> >unknown
>> >(vendor) CPER records
>> >
>> >On Mon, Jul 13, 2020 at 10:35 AM Shiju Jose <[email protected]>
>> >wrote:
>> >>
>> >> Hi Rafael, Hi James,
>> >>
>> >> Can you help to merge this patch because I added and tested all the
>> >suggestions from James.
>> >
>> >I could apply the [1/2] in principle, but I need an ACK for the [2/2]
>> >from the PCI side.
>> >
>> >That said, it looks like the [1/2] is a James' patch that you are
>> >sending with some changes made by you.
>> James added following changes on top of the original patch(V10) by me,
>> [ Removed kfifo and ghes_gdata_pool. Expanded commit message ] I had
>> confusion how the S-o-b tag to be added for James's changes in the V11
>patch posted.
>
>So James should have sent the patch with his S-o-b under it.
>
>You cannot add S-o-b for somebody else to any patches. You can only add
>your S-o-b to somebody else's patch if you have made any changes on top of
>the original.
>
>In case you want to make a record of somebody else's contribution to your
>patch, you can use the Co-developed-by tag.
Ok. I will resend the patch with Co-developed-by tag.
>
>Thanks!
Thanks,
Shiju