Commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
apci_init()") introduced a bug that ghes_edac_register() would be invoked
before edac_init(). Because at that time, the bus "edac" hadn't been even
registered, this created sysfs /devices/mc0 instead of
/sys/devices/system/edac/mc/mc0 on an Ampere eMag server.
To remove the dependency of ghes_edac on ghes, make it a proper module. Use
a list to save the probing devices in ghes_probe(), and defer the
ghes_edac_register() to module_init() of the new ghes_edac module by
iterating over the devices list.
Co-developed-by: Borislav Petkov <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Signed-off-by: Jia He <[email protected]>
Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()")
Cc: [email protected]
Cc: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 35 ++++++++++++++++++++++--
drivers/edac/Kconfig | 4 +--
drivers/edac/ghes_edac.c | 59 +++++++++++++++++++++++++---------------
include/acpi/ghes.h | 23 ++++------------
4 files changed, 77 insertions(+), 44 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 8cb65f757d06..9c52183e3ad9 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -120,6 +120,9 @@ module_param_named(disable, ghes_disable, bool, 0);
static LIST_HEAD(ghes_hed);
static DEFINE_MUTEX(ghes_list_mutex);
+static LIST_HEAD(ghes_devs);
+static DEFINE_MUTEX(ghes_devs_mutex);
+
/*
* Because the memory area used to transfer hardware error information
* from BIOS to Linux can be determined only in NMI, IRQ or timer
@@ -1378,7 +1381,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
platform_set_drvdata(ghes_dev, ghes);
- ghes_edac_register(ghes, &ghes_dev->dev);
+ ghes->dev = &ghes_dev->dev;
+
+ mutex_lock(&ghes_devs_mutex);
+ list_add_tail(&ghes->elist, &ghes_devs);
+ mutex_unlock(&ghes_devs_mutex);
/* Handle any pending errors right away */
spin_lock_irqsave(&ghes_notify_lock_irq, flags);
@@ -1442,7 +1449,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
ghes_fini(ghes);
- ghes_edac_unregister(ghes);
+ mutex_lock(&ghes_devs_mutex);
+ list_del_rcu(&ghes->elist);
+ mutex_unlock(&ghes_devs_mutex);
kfree(ghes);
@@ -1500,6 +1509,28 @@ void __init acpi_ghes_init(void)
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
}
+/*
+ * Known x86 systems that prefer GHES error reporting:
+ */
+static struct acpi_platform_list plat_list[] = {
+ {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions},
+ { } /* End */
+};
+
+struct list_head *ghes_get_devices(bool force)
+{
+ int idx = -1;
+
+ if (IS_ENABLED(CONFIG_X86)) {
+ idx = acpi_match_platform_list(plat_list);
+ if (idx < 0 && !force)
+ return NULL;
+ }
+
+ return &ghes_devs;
+}
+EXPORT_SYMBOL_GPL(ghes_get_devices);
+
void ghes_register_report_chain(struct notifier_block *nb)
{
atomic_notifier_chain_register(&ghes_report_chain, nb);
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 17562cf1fe97..df45db81858b 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -53,8 +53,8 @@ config EDAC_DECODE_MCE
has been initialized.
config EDAC_GHES
- bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
- depends on ACPI_APEI_GHES && (EDAC=y)
+ tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC"
+ depends on ACPI_APEI_GHES
select UEFI_CPER
help
Not all machines support hardware-driven error report. Some of those
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 7b8d56a769f6..bb3ea42ba70b 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -60,6 +60,8 @@ module_param(force_load, bool, 0);
static bool system_scanned;
+static struct list_head *ghes_devs;
+
/* Memory Device - Type 17 of SMBIOS spec */
struct memdev_dmi_entry {
u8 type;
@@ -387,34 +389,15 @@ static struct notifier_block ghes_edac_mem_err_nb = {
.priority = 0,
};
-/*
- * Known systems that are safe to enable this module.
- */
-static struct acpi_platform_list plat_list[] = {
- {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions},
- { } /* End */
-};
-
-int ghes_edac_register(struct ghes *ghes, struct device *dev)
+static int ghes_edac_register(struct device *dev)
{
bool fake = false;
struct mem_ctl_info *mci;
struct ghes_pvt *pvt;
struct edac_mc_layer layers[1];
unsigned long flags;
- int idx = -1;
int rc = 0;
- if (IS_ENABLED(CONFIG_X86)) {
- /* Check if safe to enable on this system */
- idx = acpi_match_platform_list(plat_list);
- if (!force_load && idx < 0)
- return -ENODEV;
- } else {
- force_load = true;
- idx = 0;
- }
-
/* finish another registration/unregistration instance first */
mutex_lock(&ghes_reg_mutex);
@@ -458,7 +441,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
pr_info("work on such system. Use this driver with caution\n");
- } else if (idx < 0) {
+ } else if (force_load) {
pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
@@ -530,7 +513,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev)
return rc;
}
-void ghes_edac_unregister(struct ghes *ghes)
+static void ghes_edac_unregister(struct ghes *ghes)
{
struct mem_ctl_info *mci;
unsigned long flags;
@@ -566,3 +549,35 @@ void ghes_edac_unregister(struct ghes *ghes)
unlock:
mutex_unlock(&ghes_reg_mutex);
}
+
+static int __init ghes_edac_init(void)
+{
+ struct ghes *g, *g_tmp;
+
+ if (!IS_ENABLED(CONFIG_X86))
+ force_load = true;
+
+ ghes_devs = ghes_get_devices(force_load);
+ if (!ghes_devs)
+ return -ENODEV;
+
+ list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) {
+ ghes_edac_register(g->dev);
+ }
+
+ return 0;
+}
+module_init(ghes_edac_init);
+
+static void __exit ghes_edac_exit(void)
+{
+ struct ghes *g, *g_tmp;
+
+ list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) {
+ ghes_edac_unregister(g);
+ }
+}
+module_exit(ghes_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Output ACPI APEI/GHES BIOS detected errors module via EDAC");
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 5cbd38b6e4e1..150c0b9500d6 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -27,6 +27,8 @@ struct ghes {
struct timer_list timer;
unsigned int irq;
};
+ struct device *dev;
+ struct list_head elist;
};
struct ghes_estatus_node {
@@ -69,28 +71,13 @@ int ghes_register_vendor_record_notifier(struct notifier_block *nb);
* @nb: pointer to the notifier_block structure of the vendor record handler.
*/
void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
+struct list_head *ghes_get_devices(bool force);
+#else
+static inline struct list_head *ghes_get_devices(bool force) { return NULL; }
#endif
int ghes_estatus_pool_init(int num_ghes);
-/* From drivers/edac/ghes_edac.c */
-
-#ifdef CONFIG_EDAC_GHES
-int ghes_edac_register(struct ghes *ghes, struct device *dev);
-
-void ghes_edac_unregister(struct ghes *ghes);
-
-#else
-static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
-{
- return -ENODEV;
-}
-
-static inline void ghes_edac_unregister(struct ghes *ghes)
-{
-}
-#endif
-
static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
{
return gdata->revision >> 8;
--
2.25.1
On Mon, Aug 22, 2022 at 03:40:42PM +0000, Jia He wrote:
> Commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
> apci_init()") introduced a bug that ghes_edac_register() would be invoked
> before edac_init(). Because at that time, the bus "edac" hadn't been even
> registered, this created sysfs /devices/mc0 instead of
> /sys/devices/system/edac/mc/mc0 on an Ampere eMag server.
>
> To remove the dependency of ghes_edac on ghes, make it a proper module. Use
> a list to save the probing devices in ghes_probe(), and defer the
> ghes_edac_register() to module_init() of the new ghes_edac module by
> iterating over the devices list.
>
> Co-developed-by: Borislav Petkov <[email protected]>
> Signed-off-by: Borislav Petkov <[email protected]>
> Signed-off-by: Jia He <[email protected]>
> Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()")
> Cc: [email protected]
Why is this marked for stable?
The prerequisite patches are needed too. I guess this needs to be
communicated to stable folks somehow by doing
Cc: [email protected] # needs commits X, Y, ...
but I guess the committer needs to do that because only at commit time
will X and Y be known...
So, is there any particular reason why this should be in stable?
> @@ -1442,7 +1449,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
>
> ghes_fini(ghes);
>
> - ghes_edac_unregister(ghes);
> + mutex_lock(&ghes_devs_mutex);
> + list_del_rcu(&ghes->elist);
Is that list RCU-protected?
> + mutex_unlock(&ghes_devs_mutex);
>
> kfree(ghes);
...
> @@ -566,3 +549,35 @@ void ghes_edac_unregister(struct ghes *ghes)
> unlock:
> mutex_unlock(&ghes_reg_mutex);
> }
> +
> +static int __init ghes_edac_init(void)
> +{
> + struct ghes *g, *g_tmp;
> +
> + if (!IS_ENABLED(CONFIG_X86))
> + force_load = true;
No, this is not how this works.
> + ghes_devs = ghes_get_devices(force_load);
> + if (!ghes_devs)
> + return -ENODEV;
You simply need to check force_load here.
> + list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) {
> + ghes_edac_register(g->dev);
> + }
> +
> + return 0;
> +}
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
Hi Borislav
> -----Original Message-----
> On Mon, Aug 22, 2022 at 03:40:42PM +0000, Jia He wrote:
> > Commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
> > apci_init()") introduced a bug that ghes_edac_register() would be
> > invoked before edac_init(). Because at that time, the bus "edac"
> > hadn't been even registered, this created sysfs /devices/mc0 instead
> > of
> > /sys/devices/system/edac/mc/mc0 on an Ampere eMag server.
> >
> > To remove the dependency of ghes_edac on ghes, make it a proper
> > module. Use a list to save the probing devices in ghes_probe(), and
> > defer the
> > ghes_edac_register() to module_init() of the new ghes_edac module by
> > iterating over the devices list.
> >
> > Co-developed-by: Borislav Petkov <[email protected]>
> > Signed-off-by: Borislav Petkov <[email protected]>
> > Signed-off-by: Jia He <[email protected]>
> > Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
> > apci_init()")
> > Cc: [email protected]
>
> Why is this marked for stable?
>
> The prerequisite patches are needed too. I guess this needs to be
> communicated to stable folks somehow by doing
>
> Cc: [email protected] # needs commits X, Y, ...
>
> but I guess the committer needs to do that because only at commit time will X
> and Y be known...
>
> So, is there any particular reason why this should be in stable?
Okay, I am fine with removing the stable line if dc4e8c07e9e2 will not be included in
any stable tree branch.
>
> > @@ -1442,7 +1449,9 @@ static int ghes_remove(struct platform_device
> > *ghes_dev)
> >
> > ghes_fini(ghes);
> >
> > - ghes_edac_unregister(ghes);
> > + mutex_lock(&ghes_devs_mutex);
> > + list_del_rcu(&ghes->elist);
>
> Is that list RCU-protected?
No, I will remove the "rcu" suffix since I use list_add_tail.
>
> > + mutex_unlock(&ghes_devs_mutex);
> >
> > kfree(ghes);
>
> ...
>
> > @@ -566,3 +549,35 @@ void ghes_edac_unregister(struct ghes *ghes)
> > unlock:
> > mutex_unlock(&ghes_reg_mutex);
> > }
> > +
> > +static int __init ghes_edac_init(void) {
> > + struct ghes *g, *g_tmp;
> > +
> > + if (!IS_ENABLED(CONFIG_X86))
> > + force_load = true;
>
> No, this is not how this works.
>
> > + ghes_devs = ghes_get_devices(force_load);
> > + if (!ghes_devs)
> > + return -ENODEV;
>
> You simply need to check force_load here.
>
Okay, hence should I export the *ghes_devs* in ghes?
--
Cheers,
Justin (Jia He)
On Thursday, August 25, 2022 6:21 AM, Justin He wrote:
> > > @@ -566,3 +549,35 @@ void ghes_edac_unregister(struct ghes *ghes)
> > > unlock:
> > > mutex_unlock(&ghes_reg_mutex);
> > > }
> > > +
> > > +static int __init ghes_edac_init(void) {
> > > + struct ghes *g, *g_tmp;
> > > +
> > > + if (!IS_ENABLED(CONFIG_X86))
> > > + force_load = true;
> >
> > No, this is not how this works.
> >
> > > + ghes_devs = ghes_get_devices(force_load);
> > > + if (!ghes_devs)
> > > + return -ENODEV;
> >
> > You simply need to check force_load here.
> >
>
> Okay, hence should I export the *ghes_devs* in ghes?
It does not matter. This series then moves the force_load check
to ghes_edac_preferred(). It is confusing for reviewers...
Please divide patches based on the final design.
Toshi
> -----Original Message-----
> From: Jia He <[email protected]>
> Sent: Monday, August 22, 2022 10:41 AM
> Subject: [RESEND PATCH v3 3/9] EDAC/ghes: Make ghes_edac a proper module to
> remove the dependency on ghes
1. I suggest adding:
MODULE_ALIAS("acpi*")
so udev will automatically load the module on any system with ACPI.
> drivers/edac/Kconfig
> config EDAC_GHES
> + tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC"
2. I suggest:
tristate "APEI (ACPI Platform Error Interfaces) GHES (Generic Hardware Error Source)"
That's in a menu of EDAC drivers, so no suffix is needed.
3. The Kconfig help text needs some updates, since the drivers are now ordering
themselves to avoid race conditions.
Current:
Not all machines support hardware-driven error report. Some of those
provide a BIOS-driven error report mechanism via ACPI, using the
APEI/GHES driver. By enabling this option, the error reports provided
by GHES are sent to userspace via the EDAC API.
When this option is enabled, it will disable the hardware-driven
mechanisms, if a GHES BIOS is detected, entering into the
"Firmware First" mode.
It should be noticed that keeping both GHES and a hardware-driven
error mechanism won't work well, as BIOS will race with OS, while
reading the error registers. So, if you want to not use "Firmware
first" GHES error mechanism, you should disable GHES either at
compilation time or by passing "ghes.disable=1" Kernel parameter
at boot time.
In doubt, say 'Y'.
Suggestion:
Support for error detection and correction based on APEI (ACPI Platform
Error Interfaces), which allows system firmware to report hardware errors
via the HEST (Hardware Error Source Table) using GHES (Generic Hardware
Error Source) records. Some systems perform "firmware first" processing
of errors before reporting them.
This module is supported in systems supporting GHES. If the architecture
is x86, the module only loads if the platform is listed in a known-good
platform list (see drivers/edac/ghes_edac.c) or if ghes.force_load=1
is specified on the kernel command line).
4. In the help text for each module that looks for GHES and refuses to load
(e.g., EDAC_AMD64), add a sentence:
This module does not load on a system supporting ACPI GHES.
> drivers/acpi/apei/ghes.c
> +MODULE_DESCRIPTION("Output ACPI APEI/GHES BIOS detected errors module via EDAC");
5. I suggest:
MODULE_DESCRIPTION("APEI (ACPI Platform Error Interfaces) GHES (Generic Hardware Error Source) EDAC driver")
On Fri, Aug 26, 2022 at 10:42:13PM +0000, Elliott, Robert (Servers) wrote:
> 4. In the help text for each module that looks for GHES and refuses to load
> (e.g., EDAC_AMD64), add a sentence:
>
> This module does not load on a system supporting ACPI GHES.
It is not "system supporting ACPI GHES." - it is on a system which is
*known* to have a more or less tested GHES implementation. The notoriety
of firmware RAS brokenness is well known.
So please stop this - there's a world outside HP BIOS.
None of this is needed for this patchset.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Sat, Aug 27, 2022 at 07:22:48AM +0200, Borislav Petkov wrote:
> On Fri, Aug 26, 2022 at 10:42:13PM +0000, Elliott, Robert (Servers) wrote:
> > 4. In the help text for each module that looks for GHES and refuses to load
> > (e.g., EDAC_AMD64), add a sentence:
> >
> > This module does not load on a system supporting ACPI GHES.
>
> It is not "system supporting ACPI GHES." - it is on a system which is
> *known* to have a more or less tested GHES implementation. The notoriety
> of firmware RAS brokenness is well known.
>
> So please stop this - there's a world outside HP BIOS.
>
> None of this is needed for this patchset.
>
GHES can be used for more than just memory errors. There are platforms where
memory errors are handled through the OS MCA, and PCIe AER errors are handled
through the FW, for example.
Is the HPE Server platform guaranteed to always provide memory errors through
GHES regardless of CPU vendor/architecture?
Thanks,
Yazen
On Mon, Aug 29, 2022 at 03:59:28PM +0000, Yazen Ghannam wrote:
> GHES can be used for more than just memory errors. There are platforms where
> memory errors are handled through the OS MCA, and PCIe AER errors are handled
> through the FW, for example.
>
> Is the HPE Server platform guaranteed to always provide memory errors through
> GHES regardless of CPU vendor/architecture?
/me looks in the direction of HPE folks...
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Monday, August 29, 2022 2:39 PM, Borislav Petkov wrote:
> On Mon, Aug 29, 2022 at 03:59:28PM +0000, Yazen Ghannam wrote:
> > GHES can be used for more than just memory errors. There are platforms where
> > memory errors are handled through the OS MCA, and PCIe AER errors are handled
> > through the FW, for example.
> >
> > Is the HPE Server platform guaranteed to always provide memory errors through
> > GHES regardless of CPU vendor/architecture?
>
> /me looks in the direction of HPE folks...
The HPE platforms enabled by the platform check are guaranteed to be operating
in FW First mode, which FW decides which error to report to the OS via GHES or
other means. This may include multiple CPU vendors/architecture.
On such platforms, for instance, FW does not report corrected errors to the OS
since FW manages the threshold & FRU notification. Chipset-specific edac drivers,
designed for OS First mode, is not necessary on such platforms. Disabling such OS
First edac driver is achieved by enabling ghes_edac as well.
OS MCA is still used for uncorrected errors, such as SRAR (software recoverable
action required) which requires recovery action synchronous to the execution via
MCE signalling.
Toshi