2021-11-26 07:06:34

by Shuai Xue

[permalink] [raw]
Subject: [RFC PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
while SDEI is not ready until a subsys_initcall_sync().

The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
apei_sdei_unregister_ghes) to register or unregister event callback for
dispatcher in firmware. When the GHES driver probing, it registers the
corresponding callback according to the notification type specified by
GHES. If the GHES notification type is SDEI, the GHES driver will call
apei_sdei_register_ghes to register event call.

When the firmware emits an event, it migrates the handling of the event
into the kernel at the registered entry-point __sdei_asm_handler. And
finally, the kernel will call the registered event callback and return
status_code to indicate the status of event handling. SDEI_EV_FAILED
indicates that the kernel failed to handle the event.

Consequently, when an error occurs during kernel booting, the kernel is
unable to handle and report errors until the GHES driver is initialized by
device_initcall(), in which the event callback is registered. All errors
that occurred before GHES initialization are missed and there is no chance
to report and find them again.

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and the framework for registering
and unregistering events. By the way, I don't figure out why acpi_hest_init
is called in acpi_pci_root_init, it don't rely on any other thing. May it
could be moved further, following acpi_iort_init in acpi_init.

sdei_init() relies on ACPI table which is initialized subsys_initcall():
acpi_init(), acpi_bus_init(), acpi_load_tables(), acpi_tb_laod_namespace().
May it should be also moved further, after acpi_load_tables.

In this patch, move sdei_init and ghes_init as far ahead as possible, right
after acpi_hest_init().

Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 18 ++++++++----------
drivers/acpi/pci_root.c | 5 ++++-
drivers/firmware/arm_sdei.c | 13 ++-----------
include/acpi/apei.h | 2 ++
include/linux/arm_sdei.h | 2 ++
5 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c8330ed1ffd..b11e46fb4b3d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-static int __init ghes_init(void)
+void __init ghes_init(void)
{
int rc;

if (acpi_disabled)
- return -ENODEV;
+ return;

switch (hest_disable) {
case HEST_NOT_FOUND:
- return -ENODEV;
+ pr_info(GHES_PFX "HEST is not found!\n");
+ return;
case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n");
- return -EINVAL;
+ return;
default:
break;
}

- if (ghes_disable) {
+ if (ghes_disable)
pr_info(GHES_PFX "GHES is not enabled!\n");
- return -EINVAL;
- }

ghes_nmi_init_cxt();

@@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");

- return 0;
+ return;
err:
- return rc;
+ ghes_disable = 1;
}
-device_initcall(ghes_init);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index ab2f7dfb0c44..1260bb556184 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -23,7 +23,7 @@
#include <linux/dmi.h>
#include <linux/platform_data/x86/apple.h>
#include <acpi/apei.h> /* for acpi_hest_init() */
-
+#include <linux/arm_sdei.h> /* for sdei_init() */
#include "internal.h"

#define ACPI_PCI_ROOT_CLASS "pci_bridge"
@@ -946,6 +946,9 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
void __init acpi_pci_root_init(void)
{
acpi_hest_init();
+ sdei_init();
+ ghes_init();
+
if (acpi_pci_disabled)
return;

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
return true;
}

-static int __init sdei_init(void)
+void __init sdei_init(void)
{
struct platform_device *pdev;
int ret;

ret = platform_driver_register(&sdei_driver);
if (ret || !sdei_present_acpi())
- return ret;
+ return;

pdev = platform_device_register_simple(sdei_driver.driver.name,
0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
pr_info("Failed to register ACPI:SDEI platform device %d\n",
ret);
}
-
- return ret;
}

-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..7dbd6363fda7 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,8 +27,10 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
+void __init ghes_init(void);
#else
#define ghes_disable 1
+static inline void ghes_init(void) { return; }
#endif

#ifdef CONFIG_ACPI_APEI
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..9c987188b692 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
/* For use by arch code when CPU hotplug notifiers are not appropriate. */
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { return ; }
#endif /* CONFIG_ARM_SDE_INTERFACE */


--
2.20.1.12.g72788fdb



2021-12-02 12:50:59

by Shuai Xue

[permalink] [raw]
Subject: Re: [RFC PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

Hi, Bjron,

By any chance, could you help review this patch? Any comment are welcomed.

Regards,
Shuai

On 2021/11/26 PM3:04, Shuai Xue wrote:
> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
> while SDEI is not ready until a subsys_initcall_sync().
>
> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
> apei_sdei_unregister_ghes) to register or unregister event callback for
> dispatcher in firmware. When the GHES driver probing, it registers the
> corresponding callback according to the notification type specified by
> GHES. If the GHES notification type is SDEI, the GHES driver will call
> apei_sdei_register_ghes to register event call.
>
> When the firmware emits an event, it migrates the handling of the event
> into the kernel at the registered entry-point __sdei_asm_handler. And
> finally, the kernel will call the registered event callback and return
> status_code to indicate the status of event handling. SDEI_EV_FAILED
> indicates that the kernel failed to handle the event.
>
> Consequently, when an error occurs during kernel booting, the kernel is
> unable to handle and report errors until the GHES driver is initialized by
> device_initcall(), in which the event callback is registered. All errors
> that occurred before GHES initialization are missed and there is no chance
> to report and find them again.
>
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and the framework for registering
> and unregistering events. By the way, I don't figure out why acpi_hest_init
> is called in acpi_pci_root_init, it don't rely on any other thing. May it
> could be moved further, following acpi_iort_init in acpi_init.
>
> sdei_init() relies on ACPI table which is initialized subsys_initcall():
> acpi_init(), acpi_bus_init(), acpi_load_tables(), acpi_tb_laod_namespace().
> May it should be also moved further, after acpi_load_tables.
>
> In this patch, move sdei_init and ghes_init as far ahead as possible, right
> after acpi_hest_init().
>
> Signed-off-by: Shuai Xue <[email protected]>
> ---
> drivers/acpi/apei/ghes.c | 18 ++++++++----------
> drivers/acpi/pci_root.c | 5 ++++-
> drivers/firmware/arm_sdei.c | 13 ++-----------
> include/acpi/apei.h | 2 ++
> include/linux/arm_sdei.h | 2 ++
> 5 files changed, 18 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c8330ed1ffd..b11e46fb4b3d 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
> .remove = ghes_remove,
> };
>
> -static int __init ghes_init(void)
> +void __init ghes_init(void)
> {
> int rc;
>
> if (acpi_disabled)
> - return -ENODEV;
> + return;
>
> switch (hest_disable) {
> case HEST_NOT_FOUND:
> - return -ENODEV;
> + pr_info(GHES_PFX "HEST is not found!\n");
> + return;
> case HEST_DISABLED:
> pr_info(GHES_PFX "HEST is not enabled!\n");
> - return -EINVAL;
> + return;
> default:
> break;
> }
>
> - if (ghes_disable) {
> + if (ghes_disable)
> pr_info(GHES_PFX "GHES is not enabled!\n");
> - return -EINVAL;
> - }
>
> ghes_nmi_init_cxt();
>
> @@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
> else
> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>
> - return 0;
> + return;
> err:
> - return rc;
> + ghes_disable = 1;
> }
> -device_initcall(ghes_init);
> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> index ab2f7dfb0c44..1260bb556184 100644
> --- a/drivers/acpi/pci_root.c
> +++ b/drivers/acpi/pci_root.c
> @@ -23,7 +23,7 @@
> #include <linux/dmi.h>
> #include <linux/platform_data/x86/apple.h>
> #include <acpi/apei.h> /* for acpi_hest_init() */
> -
> +#include <linux/arm_sdei.h> /* for sdei_init() */
> #include "internal.h"
>
> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> @@ -946,6 +946,9 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
> void __init acpi_pci_root_init(void)
> {
> acpi_hest_init();
> + sdei_init();
> + ghes_init();
> +
> if (acpi_pci_disabled)
> return;
>
> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> index a7e762c352f9..1e1a51510e83 100644
> --- a/drivers/firmware/arm_sdei.c
> +++ b/drivers/firmware/arm_sdei.c
> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> return true;
> }
>
> -static int __init sdei_init(void)
> +void __init sdei_init(void)
> {
> struct platform_device *pdev;
> int ret;
>
> ret = platform_driver_register(&sdei_driver);
> if (ret || !sdei_present_acpi())
> - return ret;
> + return;
>
> pdev = platform_device_register_simple(sdei_driver.driver.name,
> 0, NULL, 0);
> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> pr_info("Failed to register ACPI:SDEI platform device %d\n",
> ret);
> }
> -
> - return ret;
> }
>
> -/*
> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> - * by device_initcall(). We want to be called in the middle.
> - */
> -subsys_initcall_sync(sdei_init);
> -
> int sdei_event_handler(struct pt_regs *regs,
> struct sdei_registered_event *arg)
> {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index ece0a8af2bae..7dbd6363fda7 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -27,8 +27,10 @@ extern int hest_disable;
> extern int erst_disable;
> #ifdef CONFIG_ACPI_APEI_GHES
> extern bool ghes_disable;
> +void __init ghes_init(void);
> #else
> #define ghes_disable 1
> +static inline void ghes_init(void) { return; }
> #endif
>
> #ifdef CONFIG_ACPI_APEI
> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> index 0a241c5c911d..9c987188b692 100644
> --- a/include/linux/arm_sdei.h
> +++ b/include/linux/arm_sdei.h
> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> int sdei_mask_local_cpu(void);
> int sdei_unmask_local_cpu(void);
> +void __init sdei_init(void);
> #else
> static inline int sdei_mask_local_cpu(void) { return 0; }
> static inline int sdei_unmask_local_cpu(void) { return 0; }
> +static inline void sdei_init(void) { return ; }
> #endif /* CONFIG_ARM_SDE_INTERFACE */
>
>
>

2021-12-16 13:35:05

by Shuai Xue

[permalink] [raw]
Subject: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
while SDEI is not ready until a subsys_initcall_sync().

The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
apei_sdei_unregister_ghes) to register or unregister event callback for
dispatcher in firmware. When the GHES driver probing, it registers the
corresponding callback according to the notification type specified by
GHES. If the GHES notification type is SDEI, the GHES driver will call
apei_sdei_register_ghes to register event call.

When the firmware emits an event, it migrates the handling of the event
into the kernel at the registered entry-point __sdei_asm_handler. And
finally, the kernel will call the registered event callback and return
status_code to indicate the status of event handling. SDEI_EV_FAILED
indicates that the kernel failed to handle the event.

Consequently, when an error occurs during kernel booting, the kernel is
unable to handle and report errors until the GHES driver is initialized by
device_initcall(), in which the event callback is registered. All errors
that occurred before GHES initialization are missed and there is no chance
to report and find them again.

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and the framework for registering
and unregistering events. By the way, I don't figure out why acpi_hest_init
is called in acpi_pci_root_init, it don't rely on any other thing. May it
could be moved further, following acpi_iort_init in acpi_init.

sdei_init() relies on ACPI table which is initialized subsys_initcall():
acpi_init(), acpi_bus_init(), acpi_load_tables(), acpi_tb_laod_namespace().
May it should be also moved further, after acpi_load_tables.

In this patch, move sdei_init and ghes_init as far ahead as possible, right
after acpi_hest_init().

Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 18 ++++++++----------
drivers/acpi/pci_root.c | 5 ++++-
drivers/firmware/arm_sdei.c | 13 ++-----------
include/acpi/apei.h | 2 ++
include/linux/arm_sdei.h | 2 ++
5 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c8330ed1ffd..b11e46fb4b3d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-static int __init ghes_init(void)
+void __init ghes_init(void)
{
int rc;

if (acpi_disabled)
- return -ENODEV;
+ return;

switch (hest_disable) {
case HEST_NOT_FOUND:
- return -ENODEV;
+ pr_info(GHES_PFX "HEST is not found!\n");
+ return;
case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n");
- return -EINVAL;
+ return;
default:
break;
}

- if (ghes_disable) {
+ if (ghes_disable)
pr_info(GHES_PFX "GHES is not enabled!\n");
- return -EINVAL;
- }

ghes_nmi_init_cxt();

@@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");

- return 0;
+ return;
err:
- return rc;
+ ghes_disable = 1;
}
-device_initcall(ghes_init);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index ab2f7dfb0c44..1260bb556184 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -23,7 +23,7 @@
#include <linux/dmi.h>
#include <linux/platform_data/x86/apple.h>
#include <acpi/apei.h> /* for acpi_hest_init() */
-
+#include <linux/arm_sdei.h> /* for sdei_init() */
#include "internal.h"

#define ACPI_PCI_ROOT_CLASS "pci_bridge"
@@ -946,6 +946,9 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
void __init acpi_pci_root_init(void)
{
acpi_hest_init();
+ sdei_init();
+ ghes_init();
+
if (acpi_pci_disabled)
return;

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
return true;
}

-static int __init sdei_init(void)
+void __init sdei_init(void)
{
struct platform_device *pdev;
int ret;

ret = platform_driver_register(&sdei_driver);
if (ret || !sdei_present_acpi())
- return ret;
+ return;

pdev = platform_device_register_simple(sdei_driver.driver.name,
0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
pr_info("Failed to register ACPI:SDEI platform device %d\n",
ret);
}
-
- return ret;
}

-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..7dbd6363fda7 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,8 +27,10 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
+void __init ghes_init(void);
#else
#define ghes_disable 1
+static inline void ghes_init(void) { return; }
#endif

#ifdef CONFIG_ACPI_APEI
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..9c987188b692 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
/* For use by arch code when CPU hotplug notifiers are not appropriate. */
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { return ; }
#endif /* CONFIG_ARM_SDE_INTERFACE */


--
2.20.1.12.g72788fdb


2021-12-17 18:18:28

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On Thu, Dec 16, 2021 at 2:35 PM Shuai Xue <[email protected]> wrote:
>
> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
> while SDEI is not ready until a subsys_initcall_sync().
>
> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
> apei_sdei_unregister_ghes) to register or unregister event callback for
> dispatcher in firmware. When the GHES driver probing, it registers the
> corresponding callback according to the notification type specified by
> GHES. If the GHES notification type is SDEI, the GHES driver will call
> apei_sdei_register_ghes to register event call.
>
> When the firmware emits an event, it migrates the handling of the event
> into the kernel at the registered entry-point __sdei_asm_handler. And
> finally, the kernel will call the registered event callback and return
> status_code to indicate the status of event handling. SDEI_EV_FAILED
> indicates that the kernel failed to handle the event.
>
> Consequently, when an error occurs during kernel booting, the kernel is
> unable to handle and report errors until the GHES driver is initialized by
> device_initcall(), in which the event callback is registered. All errors
> that occurred before GHES initialization are missed and there is no chance
> to report and find them again.
>
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and the framework for registering
> and unregistering events. By the way, I don't figure out why acpi_hest_init
> is called in acpi_pci_root_init, it don't rely on any other thing. May it
> could be moved further, following acpi_iort_init in acpi_init.
>
> sdei_init() relies on ACPI table which is initialized subsys_initcall():
> acpi_init(), acpi_bus_init(), acpi_load_tables(), acpi_tb_laod_namespace().
> May it should be also moved further, after acpi_load_tables.
>
> In this patch, move sdei_init and ghes_init as far ahead as possible, right
> after acpi_hest_init().
>
> Signed-off-by: Shuai Xue <[email protected]>

This needs ACKs from Bjorn and Boris/Tony/James/.

> ---
> drivers/acpi/apei/ghes.c | 18 ++++++++----------
> drivers/acpi/pci_root.c | 5 ++++-
> drivers/firmware/arm_sdei.c | 13 ++-----------
> include/acpi/apei.h | 2 ++
> include/linux/arm_sdei.h | 2 ++
> 5 files changed, 18 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c8330ed1ffd..b11e46fb4b3d 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
> .remove = ghes_remove,
> };
>
> -static int __init ghes_init(void)
> +void __init ghes_init(void)
> {
> int rc;
>
> if (acpi_disabled)
> - return -ENODEV;
> + return;
>
> switch (hest_disable) {
> case HEST_NOT_FOUND:
> - return -ENODEV;
> + pr_info(GHES_PFX "HEST is not found!\n");
> + return;
> case HEST_DISABLED:
> pr_info(GHES_PFX "HEST is not enabled!\n");
> - return -EINVAL;
> + return;
> default:
> break;
> }
>
> - if (ghes_disable) {
> + if (ghes_disable)
> pr_info(GHES_PFX "GHES is not enabled!\n");
> - return -EINVAL;
> - }
>
> ghes_nmi_init_cxt();
>
> @@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
> else
> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>
> - return 0;
> + return;
> err:
> - return rc;
> + ghes_disable = 1;
> }
> -device_initcall(ghes_init);
> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> index ab2f7dfb0c44..1260bb556184 100644
> --- a/drivers/acpi/pci_root.c
> +++ b/drivers/acpi/pci_root.c
> @@ -23,7 +23,7 @@
> #include <linux/dmi.h>
> #include <linux/platform_data/x86/apple.h>
> #include <acpi/apei.h> /* for acpi_hest_init() */
> -
> +#include <linux/arm_sdei.h> /* for sdei_init() */
> #include "internal.h"
>
> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> @@ -946,6 +946,9 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
> void __init acpi_pci_root_init(void)
> {
> acpi_hest_init();
> + sdei_init();
> + ghes_init();
> +
> if (acpi_pci_disabled)
> return;
>
> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> index a7e762c352f9..1e1a51510e83 100644
> --- a/drivers/firmware/arm_sdei.c
> +++ b/drivers/firmware/arm_sdei.c
> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> return true;
> }
>
> -static int __init sdei_init(void)
> +void __init sdei_init(void)
> {
> struct platform_device *pdev;
> int ret;
>
> ret = platform_driver_register(&sdei_driver);
> if (ret || !sdei_present_acpi())
> - return ret;
> + return;
>
> pdev = platform_device_register_simple(sdei_driver.driver.name,
> 0, NULL, 0);
> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> pr_info("Failed to register ACPI:SDEI platform device %d\n",
> ret);
> }
> -
> - return ret;
> }
>
> -/*
> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> - * by device_initcall(). We want to be called in the middle.
> - */
> -subsys_initcall_sync(sdei_init);
> -
> int sdei_event_handler(struct pt_regs *regs,
> struct sdei_registered_event *arg)
> {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index ece0a8af2bae..7dbd6363fda7 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -27,8 +27,10 @@ extern int hest_disable;
> extern int erst_disable;
> #ifdef CONFIG_ACPI_APEI_GHES
> extern bool ghes_disable;
> +void __init ghes_init(void);
> #else
> #define ghes_disable 1
> +static inline void ghes_init(void) { return; }
> #endif
>
> #ifdef CONFIG_ACPI_APEI
> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> index 0a241c5c911d..9c987188b692 100644
> --- a/include/linux/arm_sdei.h
> +++ b/include/linux/arm_sdei.h
> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> int sdei_mask_local_cpu(void);
> int sdei_unmask_local_cpu(void);
> +void __init sdei_init(void);
> #else
> static inline int sdei_mask_local_cpu(void) { return 0; }
> static inline int sdei_unmask_local_cpu(void) { return 0; }
> +static inline void sdei_init(void) { return ; }
> #endif /* CONFIG_ARM_SDE_INTERFACE */
>
>
> --
> 2.20.1.12.g72788fdb
>

2021-12-19 04:04:45

by Shuai Xue

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

Hi Rafeal,

Thank you for your reply.

On 2021/12/18 AM2:17, Rafael J. Wysocki wrote:
> On Thu, Dec 16, 2021 at 2:35 PM Shuai Xue <[email protected]> wrote:
>>
>> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
>> while SDEI is not ready until a subsys_initcall_sync().
>>
>> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
>> apei_sdei_unregister_ghes) to register or unregister event callback for
>> dispatcher in firmware. When the GHES driver probing, it registers the
>> corresponding callback according to the notification type specified by
>> GHES. If the GHES notification type is SDEI, the GHES driver will call
>> apei_sdei_register_ghes to register event call.
>>
>> When the firmware emits an event, it migrates the handling of the event
>> into the kernel at the registered entry-point __sdei_asm_handler. And
>> finally, the kernel will call the registered event callback and return
>> status_code to indicate the status of event handling. SDEI_EV_FAILED
>> indicates that the kernel failed to handle the event.
>>
>> Consequently, when an error occurs during kernel booting, the kernel is
>> unable to handle and report errors until the GHES driver is initialized by
>> device_initcall(), in which the event callback is registered. All errors
>> that occurred before GHES initialization are missed and there is no chance
>> to report and find them again.
>>
>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>> the estatus memory pool. On the other hand, ghes_init() relies on
>> sdei_init() to detect the SDEI version and the framework for registering
>> and unregistering events. By the way, I don't figure out why acpi_hest_init
>> is called in acpi_pci_root_init, it don't rely on any other thing. May it
>> could be moved further, following acpi_iort_init in acpi_init.
>>
>> sdei_init() relies on ACPI table which is initialized subsys_initcall():
>> acpi_init(), acpi_bus_init(), acpi_load_tables(), acpi_tb_laod_namespace().
>> May it should be also moved further, after acpi_load_tables.
>>
>> In this patch, move sdei_init and ghes_init as far ahead as possible, right
>> after acpi_hest_init().
>>
>> Signed-off-by: Shuai Xue <[email protected]>
>
> This needs ACKs from Bjorn and Boris/Tony/James/.

OK, I will keep an eye on their response.
I am wondering that do you have any comments for revision?

Thank you.

Best Regrads,
Shuai

>> ---
>> drivers/acpi/apei/ghes.c | 18 ++++++++----------
>> drivers/acpi/pci_root.c | 5 ++++-
>> drivers/firmware/arm_sdei.c | 13 ++-----------
>> include/acpi/apei.h | 2 ++
>> include/linux/arm_sdei.h | 2 ++
>> 5 files changed, 18 insertions(+), 22 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 0c8330ed1ffd..b11e46fb4b3d 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
>> .remove = ghes_remove,
>> };
>>
>> -static int __init ghes_init(void)
>> +void __init ghes_init(void)
>> {
>> int rc;
>>
>> if (acpi_disabled)
>> - return -ENODEV;
>> + return;
>>
>> switch (hest_disable) {
>> case HEST_NOT_FOUND:
>> - return -ENODEV;
>> + pr_info(GHES_PFX "HEST is not found!\n");
>> + return;
>> case HEST_DISABLED:
>> pr_info(GHES_PFX "HEST is not enabled!\n");
>> - return -EINVAL;
>> + return;
>> default:
>> break;
>> }
>>
>> - if (ghes_disable) {
>> + if (ghes_disable)
>> pr_info(GHES_PFX "GHES is not enabled!\n");
>> - return -EINVAL;
>> - }
>>
>> ghes_nmi_init_cxt();
>>
>> @@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
>> else
>> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>>
>> - return 0;
>> + return;
>> err:
>> - return rc;
>> + ghes_disable = 1;
>> }
>> -device_initcall(ghes_init);
>> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
>> index ab2f7dfb0c44..1260bb556184 100644
>> --- a/drivers/acpi/pci_root.c
>> +++ b/drivers/acpi/pci_root.c
>> @@ -23,7 +23,7 @@
>> #include <linux/dmi.h>
>> #include <linux/platform_data/x86/apple.h>
>> #include <acpi/apei.h> /* for acpi_hest_init() */
>> -
>> +#include <linux/arm_sdei.h> /* for sdei_init() */
>> #include "internal.h"
>>
>> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
>> @@ -946,6 +946,9 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>> void __init acpi_pci_root_init(void)
>> {
>> acpi_hest_init();
>> + sdei_init();
>> + ghes_init();
>> +
>> if (acpi_pci_disabled)
>> return;
>>
>> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
>> index a7e762c352f9..1e1a51510e83 100644
>> --- a/drivers/firmware/arm_sdei.c
>> +++ b/drivers/firmware/arm_sdei.c
>> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
>> return true;
>> }
>>
>> -static int __init sdei_init(void)
>> +void __init sdei_init(void)
>> {
>> struct platform_device *pdev;
>> int ret;
>>
>> ret = platform_driver_register(&sdei_driver);
>> if (ret || !sdei_present_acpi())
>> - return ret;
>> + return;
>>
>> pdev = platform_device_register_simple(sdei_driver.driver.name,
>> 0, NULL, 0);
>> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
>> pr_info("Failed to register ACPI:SDEI platform device %d\n",
>> ret);
>> }
>> -
>> - return ret;
>> }
>>
>> -/*
>> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
>> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
>> - * by device_initcall(). We want to be called in the middle.
>> - */
>> -subsys_initcall_sync(sdei_init);
>> -
>> int sdei_event_handler(struct pt_regs *regs,
>> struct sdei_registered_event *arg)
>> {
>> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
>> index ece0a8af2bae..7dbd6363fda7 100644
>> --- a/include/acpi/apei.h
>> +++ b/include/acpi/apei.h
>> @@ -27,8 +27,10 @@ extern int hest_disable;
>> extern int erst_disable;
>> #ifdef CONFIG_ACPI_APEI_GHES
>> extern bool ghes_disable;
>> +void __init ghes_init(void);
>> #else
>> #define ghes_disable 1
>> +static inline void ghes_init(void) { return; }
>> #endif
>>
>> #ifdef CONFIG_ACPI_APEI
>> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
>> index 0a241c5c911d..9c987188b692 100644
>> --- a/include/linux/arm_sdei.h
>> +++ b/include/linux/arm_sdei.h
>> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
>> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
>> int sdei_mask_local_cpu(void);
>> int sdei_unmask_local_cpu(void);
>> +void __init sdei_init(void);
>> #else
>> static inline int sdei_mask_local_cpu(void) { return 0; }
>> static inline int sdei_unmask_local_cpu(void) { return 0; }
>> +static inline void sdei_init(void) { return ; }
>> #endif /* CONFIG_ARM_SDE_INTERFACE */
>>
>>
>> --
>> 2.20.1.12.g72788fdb
>>

2021-12-21 23:17:08

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On Thu, Dec 16, 2021 at 09:34:56PM +0800, Shuai Xue wrote:
> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
> while SDEI is not ready until a subsys_initcall_sync().
>
> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
> apei_sdei_unregister_ghes) to register or unregister event callback for
> dispatcher in firmware. When the GHES driver probing, it registers the
> corresponding callback according to the notification type specified by
> GHES. If the GHES notification type is SDEI, the GHES driver will call
> apei_sdei_register_ghes to register event call.
>
> When the firmware emits an event, it migrates the handling of the event
> into the kernel at the registered entry-point __sdei_asm_handler. And
> finally, the kernel will call the registered event callback and return
> status_code to indicate the status of event handling. SDEI_EV_FAILED
> indicates that the kernel failed to handle the event.
>
> Consequently, when an error occurs during kernel booting, the kernel is
> unable to handle and report errors until the GHES driver is initialized by
> device_initcall(), in which the event callback is registered. All errors
> that occurred before GHES initialization are missed and there is no chance
> to report and find them again.
>
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and the framework for registering
> and unregistering events.

> By the way, I don't figure out why acpi_hest_init is called in
> acpi_pci_root_init, it don't rely on any other thing. May it could
> be moved further, following acpi_iort_init in acpi_init.

I think you should drop the "By the way ..." text or move it after the
"---" at the bottom of your commit log. It doesn't help understand
this patch.

> sdei_init() relies on ACPI table which is initialized
> subsys_initcall(): acpi_init(), acpi_bus_init(), acpi_load_tables(),
> acpi_tb_laod_namespace(). May it should be also moved further,
> after acpi_load_tables.

This text also doesn't seem relevant to this patch.

> In this patch, move sdei_init and ghes_init as far ahead as
> possible, right after acpi_hest_init().

I'm having a hard time figuring out the reason for this patch.

Apparently the relevant parts are sdei_init() and ghes_init().
Today they are executed in that order:

subsys_initcall_sync(sdei_init);
device_initcall(ghes_init);

After this patch, they would be executed in the same order, but called
explicitly instead of as initcalls:

acpi_pci_root_init()
{
acpi_hest_init();
sdei_init();
ghes_init();
...

Explicit calls are certainly better than initcalls, but that doesn't
seem to be the reason for this patch.

Does this patch fix a bug? If so, what is the bug?

You say that currently "errors that occur before GHES initialization
are missed". Isn't that still true after this patch? Does this patch
merely reduce the time before GHES initialization? If so, I'm
dubious, because we have to tolerate an arbitrary amount of time
there.

s/acpi_tb_laod_namespace/acpi_tb_load_namespace/

You use "()" after function names sometimes, but not always. Please
do it consistently.

> -device_initcall(ghes_init);

> void __init acpi_pci_root_init(void)
> {
> acpi_hest_init();
> + sdei_init();
> + ghes_init();

What's the connection between PCI, SDEI, and GHES? As far as I can
tell, SDEI and GHES are not PCI-specific, so it doesn't seem like they
should be initialized here in acpi_pci_root_init().

> -subsys_initcall_sync(sdei_init);

2021-12-23 08:11:24

by Shuai Xue

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

Hi, Bjorn,

Thank you for your comments.

在 2021/12/22 AM7:17, Bjorn Helgaas 写道:
> On Thu, Dec 16, 2021 at 09:34:56PM +0800, Shuai Xue wrote:
>> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
>> while SDEI is not ready until a subsys_initcall_sync().
>>
>> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
>> apei_sdei_unregister_ghes) to register or unregister event callback for
>> dispatcher in firmware. When the GHES driver probing, it registers the
>> corresponding callback according to the notification type specified by
>> GHES. If the GHES notification type is SDEI, the GHES driver will call
>> apei_sdei_register_ghes to register event call.
>>
>> When the firmware emits an event, it migrates the handling of the event
>> into the kernel at the registered entry-point __sdei_asm_handler. And
>> finally, the kernel will call the registered event callback and return
>> status_code to indicate the status of event handling. SDEI_EV_FAILED
>> indicates that the kernel failed to handle the event.
>>
>> Consequently, when an error occurs during kernel booting, the kernel is
>> unable to handle and report errors until the GHES driver is initialized by
>> device_initcall(), in which the event callback is registered. All errors
>> that occurred before GHES initialization are missed and there is no chance
>> to report and find them again.
>>
>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>> the estatus memory pool. On the other hand, ghes_init() relies on
>> sdei_init() to detect the SDEI version and the framework for registering
>> and unregistering events.
>
>> By the way, I don't figure out why acpi_hest_init is called in
>> acpi_pci_root_init, it don't rely on any other thing. May it could
>> be moved further, following acpi_iort_init in acpi_init.
>
> I think you should drop the "By the way ..." text or move it after the
> "---" at the bottom of your commit log. It doesn't help understand
> this patch.

I will fix it in next version.

>> sdei_init() relies on ACPI table which is initialized
>> subsys_initcall(): acpi_init(), acpi_bus_init(), acpi_load_tables(),
>> acpi_tb_laod_namespace(). May it should be also moved further,
>> after acpi_load_tables.
>
> This text also doesn't seem relevant to this patch.

I will delete it in next version.

>> In this patch, move sdei_init and ghes_init as far ahead as
>> possible, right after acpi_hest_init().
>
> I'm having a hard time figuring out the reason for this patch.
>
> Apparently the relevant parts are sdei_init() and ghes_init().
> Today they are executed in that order:
>
> subsys_initcall_sync(sdei_init);
> device_initcall(ghes_init);
>
> After this patch, they would be executed in the same order, but called
> explicitly instead of as initcalls:
>
> acpi_pci_root_init()
> {
> acpi_hest_init();
> sdei_init();
> ghes_init();
> ...
>
> Explicit calls are certainly better than initcalls, but that doesn't
> seem to be the reason for this patch.
>
> Does this patch fix a bug? If so, what is the bug?

Yes. When the kernel booting, the console logs many times from firmware
before GHES drivers init:

Trip in MM PCIe RAS handle(Intr:910)
Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
Find RP(98:1.0)
--Walk dev(98:1.0) CE:0 UCE:4000
...
ERROR: sdei_dispatch_event(32a) ret:-1
--handler(910) end

This is because the callback function has not been registered yet.
Previously reported errors will be overwritten by new ones. Therefore,
all errors that occurred before GHES initialization are missed
and there is no chance to report and find them again.


> You say that currently "errors that occur before GHES initialization
> are missed". Isn't that still true after this patch? Does this patch
> merely reduce the time before GHES initialization? If so, I'm
> dubious, because we have to tolerate an arbitrary amount of time
> there.
After this patch, there are still errors missing. As you mentioned,
we have to tolerate it until the software reporting capability is built.

Yes, this patch merely reduce the time before GHES initialization. The boot
dmesg before this patch:

[ 3.688586] HEST: Table parsing has been initialized.
...
[ 33.204340] calling sdei_init+0x0/0x120 @ 1
[ 33.208645] sdei: SDEIv1.0 (0x0) detected in firmware.
...
[ 36.005390] calling ghes_init+0x0/0x11c @ 1
[ 36.190021] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.


After this patch, the boot dmesg like bellow:

[ 3.688664] HEST: Table parsing has been initialized.
[ 3.688691] sdei: SDEIv1.0 (0x0) detected in firmware.
[ 3.694557] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.

As we can see, the initialization of GHES is advanced by 33 seconds.
So, in my opinion, this patch is necessary, right?
(It should be noted that the effect of optimization varies with the platform.)

> s/acpi_tb_laod_namespace/acpi_tb_load_namespace/

> You use "()" after function names sometimes, but not always. Please
> do it consistently.

Thank you for pointing this out. I will fix it in next version.


>> -device_initcall(ghes_init);
>
>> void __init acpi_pci_root_init(void)
>> {
>> acpi_hest_init();
>> + sdei_init();
>> + ghes_init();
>
> What's the connection between PCI, SDEI, and GHES? As far as I can
> tell, SDEI and GHES are not PCI-specific, so it doesn't seem like they
> should be initialized here in acpi_pci_root_init().

The only reason is that acpi_hest_init() is initialized here.

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and the framework for registering
and unregistering events. The dependencies are as follows

ghes_init() => acpi_hest_init()
ghes_init() => sdei_init()

I don't figure out why acpi_hest_init() is called in
acpi_pci_root_init(), it don't rely on any other thing.
I am wondering that should we moved all of them further? e.g.
following acpi_iort_init() in acpi_init().

Best Regards,
Shuai

2021-12-24 00:17:26

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

[+to Rafael, question about HEST/GHES/SDEI init]

On Thu, Dec 23, 2021 at 04:11:11PM +0800, Shuai Xue wrote:
> 在 2021/12/22 AM7:17, Bjorn Helgaas 写道:
> > On Thu, Dec 16, 2021 at 09:34:56PM +0800, Shuai Xue wrote:
> >> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
> >> while SDEI is not ready until a subsys_initcall_sync().
> >>
> >> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
> >> apei_sdei_unregister_ghes) to register or unregister event callback for
> >> dispatcher in firmware. When the GHES driver probing, it registers the
> >> corresponding callback according to the notification type specified by
> >> GHES. If the GHES notification type is SDEI, the GHES driver will call
> >> apei_sdei_register_ghes to register event call.
> >>
> >> When the firmware emits an event, it migrates the handling of the event
> >> into the kernel at the registered entry-point __sdei_asm_handler. And
> >> finally, the kernel will call the registered event callback and return
> >> status_code to indicate the status of event handling. SDEI_EV_FAILED
> >> indicates that the kernel failed to handle the event.
> >>
> >> Consequently, when an error occurs during kernel booting, the kernel is
> >> unable to handle and report errors until the GHES driver is initialized by
> >> device_initcall(), in which the event callback is registered. All errors
> >> that occurred before GHES initialization are missed and there is no chance
> >> to report and find them again.
> >>
> >> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> >> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> >> the estatus memory pool. On the other hand, ghes_init() relies on
> >> sdei_init() to detect the SDEI version and the framework for registering
> >> and unregistering events.
> >
> >> By the way, I don't figure out why acpi_hest_init is called in
> >> acpi_pci_root_init, it don't rely on any other thing. May it could
> >> be moved further, following acpi_iort_init in acpi_init.

> >> sdei_init() relies on ACPI table which is initialized
> >> subsys_initcall(): acpi_init(), acpi_bus_init(), acpi_load_tables(),
> >> acpi_tb_laod_namespace(). May it should be also moved further,
> >> after acpi_load_tables.

> >> In this patch, move sdei_init and ghes_init as far ahead as
> >> possible, right after acpi_hest_init().
> >
> > I'm having a hard time figuring out the reason for this patch.
> >
> > Apparently the relevant parts are sdei_init() and ghes_init().
> > Today they are executed in that order:
> >
> > subsys_initcall_sync(sdei_init);
> > device_initcall(ghes_init);
> >
> > After this patch, they would be executed in the same order, but called
> > explicitly instead of as initcalls:
> >
> > acpi_pci_root_init()
> > {
> > acpi_hest_init();
> > sdei_init();
> > ghes_init();
> > ...
> >
> > Explicit calls are certainly better than initcalls, but that doesn't
> > seem to be the reason for this patch.
> >
> > Does this patch fix a bug? If so, what is the bug?
>
> Yes. When the kernel booting, the console logs many times from firmware
> before GHES drivers init:
>
> Trip in MM PCIe RAS handle(Intr:910)
> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
> Find RP(98:1.0)
> --Walk dev(98:1.0) CE:0 UCE:4000
> ...
> ERROR: sdei_dispatch_event(32a) ret:-1
> --handler(910) end
>
> This is because the callback function has not been registered yet.
> Previously reported errors will be overwritten by new ones. Therefore,
> all errors that occurred before GHES initialization are missed
> and there is no chance to report and find them again.
>
> > You say that currently "errors that occur before GHES initialization
> > are missed". Isn't that still true after this patch? Does this patch
> > merely reduce the time before GHES initialization? If so, I'm
> > dubious, because we have to tolerate an arbitrary amount of time
> > there.
>
> After this patch, there are still errors missing. As you mentioned,
> we have to tolerate it until the software reporting capability is built.
>
> Yes, this patch merely reduce the time before GHES initialization.

It's not a bug that errors that happen before the callback are lost.
At least, it's not a *Linux* bug. It might be a poor design of the
firmware error reporting interface.

If the only point of this patch is to reduce the time before GHES
initialization, the commit log should clearly say that.

> The boot dmesg before this patch:
>
> [ 3.688586] HEST: Table parsing has been initialized.
> ...
> [ 33.204340] calling sdei_init+0x0/0x120 @ 1
> [ 33.208645] sdei: SDEIv1.0 (0x0) detected in firmware.
> ...
> [ 36.005390] calling ghes_init+0x0/0x11c @ 1
> [ 36.190021] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>
>
> After this patch, the boot dmesg like bellow:
>
> [ 3.688664] HEST: Table parsing has been initialized.
> [ 3.688691] sdei: SDEIv1.0 (0x0) detected in firmware.
> [ 3.694557] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.

[Tangent: I think this GHES message is confusing. What "APEI bit"
does this refer to? The only bits I remember are the Flags bits in
HEST Error Source Descriptor Entries, e.g., ACPI v6.3, sec 18.3.2.

"WHEA _OSC" means nothing to me, and I didn't find anything useful
with grep, other than that "WHEA" might be an obsolete name for what
we now call "APEI".

I don't think there's anything in _OSC that mentions "firmware first."

I don't remember anything in the spec about a way to *enable* Firmware
First Error Handling needing (I'm looking at ACPI v6.3, sec 18.4).

I think the "firmware first" information is useless to the OS -- as
far as I can tell, the spec says nothing about anything the OS should
do based on the FIRMWARE_FIRST bits.]

> As we can see, the initialization of GHES is advanced by 33 seconds.
> So, in my opinion, this patch is necessary, right?
> (It should be noted that the effect of optimization varies with the platform.)

> >> -device_initcall(ghes_init);
> >
> >> void __init acpi_pci_root_init(void)
> >> {
> >> acpi_hest_init();
> >> + sdei_init();
> >> + ghes_init();
> >
> > What's the connection between PCI, SDEI, and GHES? As far as I can
> > tell, SDEI and GHES are not PCI-specific, so it doesn't seem like they
> > should be initialized here in acpi_pci_root_init().
>
> The only reason is that acpi_hest_init() is initialized here.
>
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and the framework for registering
> and unregistering events. The dependencies are as follows
>
> ghes_init() => acpi_hest_init()
> ghes_init() => sdei_init()
>
> I don't figure out why acpi_hest_init() is called in
> acpi_pci_root_init(), it don't rely on any other thing.
> I am wondering that should we moved all of them further? e.g.
> following acpi_iort_init() in acpi_init().

I don't know why acpi_hest_init() is called from acpi_pci_root_init().
It looks like HEST can support error sources other than PCI (IA-32
Machine Checks, NMIs, GHES, etc.) It was added by 415e12b23792
("PCI/ACPI: Request _OSC control once for each root bridge (v3)");
maybe Rafael remembers why.

Seem like acpi_hest_init(), sdei_init(), and ghes_init() should all go
somewhere else, but I don't know where. Maybe somewhere in
acpi_init()?

Bjorn

2021-12-24 08:55:53

by Shuai Xue

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

Hi, Bjorn,

Thank you for your comments.

在 2021/12/24 AM8:17, Bjorn Helgaas 写道:
> [+to Rafael, question about HEST/GHES/SDEI init]
>
> On Thu, Dec 23, 2021 at 04:11:11PM +0800, Shuai Xue wrote:
>> 在 2021/12/22 AM7:17, Bjorn Helgaas 写道:
>>> On Thu, Dec 16, 2021 at 09:34:56PM +0800, Shuai Xue wrote:
>>>> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
>>>> while SDEI is not ready until a subsys_initcall_sync().
>>>>
>>>> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
>>>> apei_sdei_unregister_ghes) to register or unregister event callback for
>>>> dispatcher in firmware. When the GHES driver probing, it registers the
>>>> corresponding callback according to the notification type specified by
>>>> GHES. If the GHES notification type is SDEI, the GHES driver will call
>>>> apei_sdei_register_ghes to register event call.
>>>>
>>>> When the firmware emits an event, it migrates the handling of the event
>>>> into the kernel at the registered entry-point __sdei_asm_handler. And
>>>> finally, the kernel will call the registered event callback and return
>>>> status_code to indicate the status of event handling. SDEI_EV_FAILED
>>>> indicates that the kernel failed to handle the event.
>>>>
>>>> Consequently, when an error occurs during kernel booting, the kernel is
>>>> unable to handle and report errors until the GHES driver is initialized by
>>>> device_initcall(), in which the event callback is registered. All errors
>>>> that occurred before GHES initialization are missed and there is no chance
>>>> to report and find them again.
>>>>
>>>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>>>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>>>> the estatus memory pool. On the other hand, ghes_init() relies on
>>>> sdei_init() to detect the SDEI version and the framework for registering
>>>> and unregistering events.
>>>
>>>> By the way, I don't figure out why acpi_hest_init is called in
>>>> acpi_pci_root_init, it don't rely on any other thing. May it could
>>>> be moved further, following acpi_iort_init in acpi_init.
>
>>>> sdei_init() relies on ACPI table which is initialized
>>>> subsys_initcall(): acpi_init(), acpi_bus_init(), acpi_load_tables(),
>>>> acpi_tb_laod_namespace(). May it should be also moved further,
>>>> after acpi_load_tables.
>
>>>> In this patch, move sdei_init and ghes_init as far ahead as
>>>> possible, right after acpi_hest_init().
>>>
>>> I'm having a hard time figuring out the reason for this patch.
>>>
>>> Apparently the relevant parts are sdei_init() and ghes_init().
>>> Today they are executed in that order:
>>>
>>> subsys_initcall_sync(sdei_init);
>>> device_initcall(ghes_init);
>>>
>>> After this patch, they would be executed in the same order, but called
>>> explicitly instead of as initcalls:
>>>
>>> acpi_pci_root_init()
>>> {
>>> acpi_hest_init();
>>> sdei_init();
>>> ghes_init();
>>> ...
>>>
>>> Explicit calls are certainly better than initcalls, but that doesn't
>>> seem to be the reason for this patch.
>>>
>>> Does this patch fix a bug? If so, what is the bug?
>>
>> Yes. When the kernel booting, the console logs many times from firmware
>> before GHES drivers init:
>>
>> Trip in MM PCIe RAS handle(Intr:910)
>> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
>> Find RP(98:1.0)
>> --Walk dev(98:1.0) CE:0 UCE:4000
>> ...
>> ERROR: sdei_dispatch_event(32a) ret:-1
>> --handler(910) end
>>
>> This is because the callback function has not been registered yet.
>> Previously reported errors will be overwritten by new ones. Therefore,
>> all errors that occurred before GHES initialization are missed
>> and there is no chance to report and find them again.
>>
>>> You say that currently "errors that occur before GHES initialization
>>> are missed". Isn't that still true after this patch? Does this patch
>>> merely reduce the time before GHES initialization? If so, I'm
>>> dubious, because we have to tolerate an arbitrary amount of time
>>> there.
>>
>> After this patch, there are still errors missing. As you mentioned,
>> we have to tolerate it until the software reporting capability is built.
>>
>> Yes, this patch merely reduce the time before GHES initialization.
>
> It's not a bug that errors that happen before the callback are lost.
> At least, it's not a *Linux* bug. It might be a poor design of the
> firmware error reporting interface.
>
> If the only point of this patch is to reduce the time before GHES
> initialization, the commit log should clearly say that.

Yep, it is a design defect of firmware. I will explicitly document
the purpose of this patch in next version.

>> The boot dmesg before this patch:
>>
>> [ 3.688586] HEST: Table parsing has been initialized.
>> ...
>> [ 33.204340] calling sdei_init+0x0/0x120 @ 1
>> [ 33.208645] sdei: SDEIv1.0 (0x0) detected in firmware.
>> ...
>> [ 36.005390] calling ghes_init+0x0/0x11c @ 1
>> [ 36.190021] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>>
>>
>> After this patch, the boot dmesg like bellow:
>>
>> [ 3.688664] HEST: Table parsing has been initialized.
>> [ 3.688691] sdei: SDEIv1.0 (0x0) detected in firmware.
>> [ 3.694557] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.

After this patch, we use explicit call to init GHES rather than initcall.
The GHES message here is just to prove that GHES initialization is
advanced to 3.694557 seconds.


> [Tangent: I think this GHES message is confusing. What "APEI bit"
> does this refer to? The only bits I remember are the Flags bits in
> HEST Error Source Descriptor Entries, e.g., ACPI v6.3, sec 18.3.2.

From the log code behavior, the APEI bit refers to OSC_SB_APEI_SUPPORT.
(BIT 4, ACPI v6.4, sec 6.2.11.2, table Table 6.13 Platform-Wide _OSC
Capabilities DWORD 2. [1])

/* code in drivers/acpi/apei/ghes.c */
if (rc == 0 && osc_sb_apei_support_acked)
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");

/* code in drivers/acpi/bus.c */
osc_sb_apei_support_acked =
capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;


> "WHEA _OSC" means nothing to me, and I didn't find anything useful
> with grep, other than that "WHEA" might be an obsolete name for what
> we now call "APEI".

Yep, WHEA is an obsolete name. I think apei_osc_setup() is added for compatibility
in commit eccddd32ced0 ("ACPI, APEI, Add APEI bit support in generic _OSC call")'

The ACPI spec Platform-Wide OSPM Capabilities defines
UUID "0811B06E-4A27-44F9-8D60-3CBBC22E7B48" is used in
acpi_bus_osc_support(). The Microsoft WHEA defines
UUID "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c" that is used
in apei_osc_setup(). [2][3]

From the discussion[4], the GHES message is to distinguish between
the two specs.

To avoid confusion, can we change the message as follow.

- generic _OSC succeeded, APEI _OSC failed: APEI firmware first mode is
enabled by APEI bit.
- generic _OSC failed, APEI _OSC succeeded: APEI firmware first mode is
enabled by WHEA _OSC.
- both succeeded: APEI firmware first mode is enabled by APEI bit and
WHEA _OSC.
- both failed: Failed to enable APEI firmware first mode!


> I don't think there's anything in _OSC that mentions "firmware first."
>
> I don't remember anything in the spec about a way to *enable* Firmware
> First Error Handling needing (I'm looking at ACPI v6.3, sec 18.4).
>
> I think the "firmware first" information is useless to the OS -- as
> far as I can tell, the spec says nothing about anything the OS should
> do based on the FIRMWARE_FIRST bits.]

GHES works in "Firmware First" mode to report platform hardware error
and it depends on APEI interface. (drivers/acpi/apei/ghes.c)

* Generic Hardware Error Source provides a way to report platform
* hardware errors (such as that from chipset). It works in so called
* "Firmware First" mode, that is, hardware errors are reported to
* firmware firstly, then reported to Linux by firmware.

Based on above, I think the GHES message just means that this booting
kernel supports APEI or WHEA, and GHES report Firmware first errors on
this machine. Is it fine to keep the message?


>> As we can see, the initialization of GHES is advanced by 33 seconds.
>> So, in my opinion, this patch is necessary, right?
>> (It should be noted that the effect of optimization varies with the platform.)
>
>>>> -device_initcall(ghes_init);
>>>
>>>> void __init acpi_pci_root_init(void)
>>>> {
>>>> acpi_hest_init();
>>>> + sdei_init();
>>>> + ghes_init();
>>>
>>> What's the connection between PCI, SDEI, and GHES? As far as I can
>>> tell, SDEI and GHES are not PCI-specific, so it doesn't seem like they
>>> should be initialized here in acpi_pci_root_init().
>>
>> The only reason is that acpi_hest_init() is initialized here.
>>
>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>> the estatus memory pool. On the other hand, ghes_init() relies on
>> sdei_init() to detect the SDEI version and the framework for registering
>> and unregistering events. The dependencies are as follows
>>
>> ghes_init() => acpi_hest_init()
>> ghes_init() => sdei_init()
>>
>> I don't figure out why acpi_hest_init() is called in
>> acpi_pci_root_init(), it don't rely on any other thing.
>> I am wondering that should we moved all of them further? e.g.
>> following acpi_iort_init() in acpi_init().
>
> I don't know why acpi_hest_init() is called from acpi_pci_root_init().
> It looks like HEST can support error sources other than PCI (IA-32
> Machine Checks, NMIs, GHES, etc.) It was added by 415e12b23792
> ("PCI/ACPI: Request _OSC control once for each root bridge (v3)");
> maybe Rafael remembers why.
>
> Seem like acpi_hest_init(), sdei_init(), and ghes_init() should all go
> somewhere else, but I don't know where. Maybe somewhere in
> acpi_init()?

I tried to move them into acpi_init(), and it works well.

@@ -1251,6 +1251,9 @@ static int __init acpi_init(void)

pci_mmcfg_late_init();
acpi_iort_init();
+ acpi_hest_init();
+ sdei_init();
+ ghes_init();
acpi_scan_init();
acpi_ec_init();

What's your opinion?

Best Regards,

Shuai


[1] https://uefi.org/specs/ACPI/6.4/06_Device_Configuration/Device_Configuration.html#platform-wide-osc-capabilities-dword-2
[2] https://www.mail-archive.com/[email protected]/msg4718503.html
[3] http://www.guyreams.com/PDF/whea.pdf
[4] https://patchwork.kernel.org/project/linux-acpi/patch/[email protected]/#1978922

2022-01-13 13:38:38

by Shuai Xue

[permalink] [raw]
Subject: Re: [RESEND PATCH v4] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

Hi, Bjorn, Rafael,

Happy New Year :)

I am wondering that do you have any comment to this implementation? Any feedback
are welcomed.

Thank you.

Best regrads,
Shuai

在 2021/12/24 PM4:55, Shuai Xue 写道:
> Hi, Bjorn,
>
> Thank you for your comments.
>
> 在 2021/12/24 AM8:17, Bjorn Helgaas 写道:
>> [+to Rafael, question about HEST/GHES/SDEI init]
>>
>> On Thu, Dec 23, 2021 at 04:11:11PM +0800, Shuai Xue wrote:
>>> 在 2021/12/22 AM7:17, Bjorn Helgaas 写道:
>>>> On Thu, Dec 16, 2021 at 09:34:56PM +0800, Shuai Xue wrote:
>>>>> On an ACPI system, ACPI is initialised very early from a subsys_initcall(),
>>>>> while SDEI is not ready until a subsys_initcall_sync().
>>>>>
>>>>> The SDEI driver provides functions (e.g. apei_sdei_register_ghes,
>>>>> apei_sdei_unregister_ghes) to register or unregister event callback for
>>>>> dispatcher in firmware. When the GHES driver probing, it registers the
>>>>> corresponding callback according to the notification type specified by
>>>>> GHES. If the GHES notification type is SDEI, the GHES driver will call
>>>>> apei_sdei_register_ghes to register event call.
>>>>>
>>>>> When the firmware emits an event, it migrates the handling of the event
>>>>> into the kernel at the registered entry-point __sdei_asm_handler. And
>>>>> finally, the kernel will call the registered event callback and return
>>>>> status_code to indicate the status of event handling. SDEI_EV_FAILED
>>>>> indicates that the kernel failed to handle the event.
>>>>>
>>>>> Consequently, when an error occurs during kernel booting, the kernel is
>>>>> unable to handle and report errors until the GHES driver is initialized by
>>>>> device_initcall(), in which the event callback is registered. All errors
>>>>> that occurred before GHES initialization are missed and there is no chance
>>>>> to report and find them again.
>>>>>
>>>>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>>>>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>>>>> the estatus memory pool. On the other hand, ghes_init() relies on
>>>>> sdei_init() to detect the SDEI version and the framework for registering
>>>>> and unregistering events.
>>>>
>>>>> By the way, I don't figure out why acpi_hest_init is called in
>>>>> acpi_pci_root_init, it don't rely on any other thing. May it could
>>>>> be moved further, following acpi_iort_init in acpi_init.
>>
>>>>> sdei_init() relies on ACPI table which is initialized
>>>>> subsys_initcall(): acpi_init(), acpi_bus_init(), acpi_load_tables(),
>>>>> acpi_tb_laod_namespace(). May it should be also moved further,
>>>>> after acpi_load_tables.
>>
>>>>> In this patch, move sdei_init and ghes_init as far ahead as
>>>>> possible, right after acpi_hest_init().
>>>>
>>>> I'm having a hard time figuring out the reason for this patch.
>>>>
>>>> Apparently the relevant parts are sdei_init() and ghes_init().
>>>> Today they are executed in that order:
>>>>
>>>> subsys_initcall_sync(sdei_init);
>>>> device_initcall(ghes_init);
>>>>
>>>> After this patch, they would be executed in the same order, but called
>>>> explicitly instead of as initcalls:
>>>>
>>>> acpi_pci_root_init()
>>>> {
>>>> acpi_hest_init();
>>>> sdei_init();
>>>> ghes_init();
>>>> ...
>>>>
>>>> Explicit calls are certainly better than initcalls, but that doesn't
>>>> seem to be the reason for this patch.
>>>>
>>>> Does this patch fix a bug? If so, what is the bug?
>>>
>>> Yes. When the kernel booting, the console logs many times from firmware
>>> before GHES drivers init:
>>>
>>> Trip in MM PCIe RAS handle(Intr:910)
>>> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
>>> Find RP(98:1.0)
>>> --Walk dev(98:1.0) CE:0 UCE:4000
>>> ...
>>> ERROR: sdei_dispatch_event(32a) ret:-1
>>> --handler(910) end
>>>
>>> This is because the callback function has not been registered yet.
>>> Previously reported errors will be overwritten by new ones. Therefore,
>>> all errors that occurred before GHES initialization are missed
>>> and there is no chance to report and find them again.
>>>
>>>> You say that currently "errors that occur before GHES initialization
>>>> are missed". Isn't that still true after this patch? Does this patch
>>>> merely reduce the time before GHES initialization? If so, I'm
>>>> dubious, because we have to tolerate an arbitrary amount of time
>>>> there.
>>>
>>> After this patch, there are still errors missing. As you mentioned,
>>> we have to tolerate it until the software reporting capability is built.
>>>
>>> Yes, this patch merely reduce the time before GHES initialization.
>>
>> It's not a bug that errors that happen before the callback are lost.
>> At least, it's not a *Linux* bug. It might be a poor design of the
>> firmware error reporting interface.
>>
>> If the only point of this patch is to reduce the time before GHES
>> initialization, the commit log should clearly say that.
>
> Yep, it is a design defect of firmware. I will explicitly document
> the purpose of this patch in next version.
>
>>> The boot dmesg before this patch:
>>>
>>> [ 3.688586] HEST: Table parsing has been initialized.
>>> ...
>>> [ 33.204340] calling sdei_init+0x0/0x120 @ 1
>>> [ 33.208645] sdei: SDEIv1.0 (0x0) detected in firmware.
>>> ...
>>> [ 36.005390] calling ghes_init+0x0/0x11c @ 1
>>> [ 36.190021] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>>>
>>>
>>> After this patch, the boot dmesg like bellow:
>>>
>>> [ 3.688664] HEST: Table parsing has been initialized.
>>> [ 3.688691] sdei: SDEIv1.0 (0x0) detected in firmware.
>>> [ 3.694557] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>
> After this patch, we use explicit call to init GHES rather than initcall.
> The GHES message here is just to prove that GHES initialization is
> advanced to 3.694557 seconds.
>
>
>> [Tangent: I think this GHES message is confusing. What "APEI bit"
>> does this refer to? The only bits I remember are the Flags bits in
>> HEST Error Source Descriptor Entries, e.g., ACPI v6.3, sec 18.3.2.
>
> From the log code behavior, the APEI bit refers to OSC_SB_APEI_SUPPORT.
> (BIT 4, ACPI v6.4, sec 6.2.11.2, table Table 6.13 Platform-Wide _OSC
> Capabilities DWORD 2. [1])
>
> /* code in drivers/acpi/apei/ghes.c */
> if (rc == 0 && osc_sb_apei_support_acked)
> pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
>
> /* code in drivers/acpi/bus.c */
> osc_sb_apei_support_acked =
> capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
>
>
>> "WHEA _OSC" means nothing to me, and I didn't find anything useful
>> with grep, other than that "WHEA" might be an obsolete name for what
>> we now call "APEI".
>
> Yep, WHEA is an obsolete name. I think apei_osc_setup() is added for compatibility
> in commit eccddd32ced0 ("ACPI, APEI, Add APEI bit support in generic _OSC call")'
>
> The ACPI spec Platform-Wide OSPM Capabilities defines
> UUID "0811B06E-4A27-44F9-8D60-3CBBC22E7B48" is used in
> acpi_bus_osc_support(). The Microsoft WHEA defines
> UUID "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c" that is used
> in apei_osc_setup(). [2][3]
>
> From the discussion[4], the GHES message is to distinguish between
> the two specs.
>
> To avoid confusion, can we change the message as follow.
>
> - generic _OSC succeeded, APEI _OSC failed: APEI firmware first mode is
> enabled by APEI bit.
> - generic _OSC failed, APEI _OSC succeeded: APEI firmware first mode is
> enabled by WHEA _OSC.
> - both succeeded: APEI firmware first mode is enabled by APEI bit and
> WHEA _OSC.
> - both failed: Failed to enable APEI firmware first mode!
>
>
>> I don't think there's anything in _OSC that mentions "firmware first."
>>
>> I don't remember anything in the spec about a way to *enable* Firmware
>> First Error Handling needing (I'm looking at ACPI v6.3, sec 18.4).
>>
>> I think the "firmware first" information is useless to the OS -- as
>> far as I can tell, the spec says nothing about anything the OS should
>> do based on the FIRMWARE_FIRST bits.]
>
> GHES works in "Firmware First" mode to report platform hardware error
> and it depends on APEI interface. (drivers/acpi/apei/ghes.c)
>
> * Generic Hardware Error Source provides a way to report platform
> * hardware errors (such as that from chipset). It works in so called
> * "Firmware First" mode, that is, hardware errors are reported to
> * firmware firstly, then reported to Linux by firmware.
>
> Based on above, I think the GHES message just means that this booting
> kernel supports APEI or WHEA, and GHES report Firmware first errors on
> this machine. Is it fine to keep the message?
>
>
>>> As we can see, the initialization of GHES is advanced by 33 seconds.
>>> So, in my opinion, this patch is necessary, right?
>>> (It should be noted that the effect of optimization varies with the platform.)
>>
>>>>> -device_initcall(ghes_init);
>>>>
>>>>> void __init acpi_pci_root_init(void)
>>>>> {
>>>>> acpi_hest_init();
>>>>> + sdei_init();
>>>>> + ghes_init();
>>>>
>>>> What's the connection between PCI, SDEI, and GHES? As far as I can
>>>> tell, SDEI and GHES are not PCI-specific, so it doesn't seem like they
>>>> should be initialized here in acpi_pci_root_init().
>>>
>>> The only reason is that acpi_hest_init() is initialized here.
>>>
>>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>>> the estatus memory pool. On the other hand, ghes_init() relies on
>>> sdei_init() to detect the SDEI version and the framework for registering
>>> and unregistering events. The dependencies are as follows
>>>
>>> ghes_init() => acpi_hest_init()
>>> ghes_init() => sdei_init()
>>>
>>> I don't figure out why acpi_hest_init() is called in
>>> acpi_pci_root_init(), it don't rely on any other thing.
>>> I am wondering that should we moved all of them further? e.g.
>>> following acpi_iort_init() in acpi_init().
>>
>> I don't know why acpi_hest_init() is called from acpi_pci_root_init().
>> It looks like HEST can support error sources other than PCI (IA-32
>> Machine Checks, NMIs, GHES, etc.) It was added by 415e12b23792
>> ("PCI/ACPI: Request _OSC control once for each root bridge (v3)");
>> maybe Rafael remembers why.
>>
>> Seem like acpi_hest_init(), sdei_init(), and ghes_init() should all go
>> somewhere else, but I don't know where. Maybe somewhere in
>> acpi_init()?
>
> I tried to move them into acpi_init(), and it works well.
>
> @@ -1251,6 +1251,9 @@ static int __init acpi_init(void)
>
> pci_mmcfg_late_init();
> acpi_iort_init();
> + acpi_hest_init();
> + sdei_init();
> + ghes_init();
> acpi_scan_init();
> acpi_ec_init();
>
> What's your opinion?
>
> Best Regards,
>
> Shuai
>
>
> [1] https://uefi.org/specs/ACPI/6.4/06_Device_Configuration/Device_Configuration.html#platform-wide-osc-capabilities-dword-2
> [2] https://www.mail-archive.com/[email protected]/msg4718503.html
> [3] http://www.guyreams.com/PDF/whea.pdf
> [4] https://patchwork.kernel.org/project/linux-acpi/patch/[email protected]/#1978922

2022-01-17 05:35:40

by Shuai Xue

[permalink] [raw]
Subject: [PATCH v5] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On an ACPI system, ACPI is initialised very early from a
subsys_initcall(), while SDEI is not ready until a
subsys_initcall_sync(). This patch is to reduce the time before GHES
initialization.

The SDEI driver provides functions (e.g. apei_sdei_register_ghes(),
apei_sdei_unregister_ghes()) to register or unregister event callback
for dispatcher in firmware. When the GHES driver probing, it registers
the corresponding callback according to the notification type specified
by GHES. If the GHES notification type is SDEI, the GHES driver will
call apei_sdei_register_ghes() to register event call.

When the firmware emits an event, it migrates the handling of the event
into the kernel at the registered entry-point __sdei_asm_handler. And
finally, the kernel will call the registered event callback and return
status_code to indicate the status of event handling. SDEI_EV_FAILED
indicates that the kernel failed to handle the event.

Consequently, when an error occurs during kernel booting, the kernel is
unable to handle and report errors until the GHES driver is initialized
by device_initcall(), in which the event callback is registered. For
example, when the kernel booting, the console logs many times from
firmware before GHES drivers init in our platform:

Trip in MM PCIe RAS handle(Intr:910)
Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
Find RP(98:1.0)
--Walk dev(98:1.0) CE:0 UCE:4000
...
ERROR: sdei_dispatch_event(32a) ret:-1
--handler(910) end

This is because the callback function has not been registered yet. Due
to the poor design of firmware, previously reported errors will be
overwritten by new ones. Therefore, all errors that occurred before GHES
initialization are missed and there is no chance to report and find them
again.

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and the framework for registering
and unregistering events. The dependencies are as follows

ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
ghes_init() => sdei_init()

Based on above, move sdei_init and ghes_init as far ahead as possible,
right after acpi_iort_init() in acpi_init(). After this patch, there are
still errors missing, but we have to tolerate it until the software
reporting capability is built. And this patch merely reduce the time
before GHES initialization. The boot dmesg before this patch:

[ 3.688586] HEST: Table parsing has been initialized.
...
[ 33.204340] calling sdei_init+0x0/0x120 @ 1
[ 33.208645] sdei: SDEIv1.0 (0x0) detected in firmware.
...
[ 36.005390] calling ghes_init+0x0/0x11c @ 1
[ 36.190021] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.

After this patch, the boot dmesg like bellow:

[ 3.688664] HEST: Table parsing has been initialized.
[ 3.688691] sdei: SDEIv1.0 (0x0) detected in firmware.
[ 3.694557] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.

As we can see, the initialization of GHES is advanced by 33 seconds.
(It should be noted that the effect of optimization varies with the
platform.)

Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 18 ++++++++----------
drivers/acpi/bus.c | 4 ++++
drivers/acpi/pci_root.c | 4 ++--
drivers/firmware/arm_sdei.c | 13 ++-----------
include/acpi/apei.h | 2 ++
include/linux/arm_sdei.h | 2 ++
6 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254..64f24a9f213f 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-static int __init ghes_init(void)
+void __init ghes_init(void)
{
int rc;

if (acpi_disabled)
- return -ENODEV;
+ return;

switch (hest_disable) {
case HEST_NOT_FOUND:
- return -ENODEV;
+ pr_info(GHES_PFX "HEST is not found!\n");
+ return;
case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n");
- return -EINVAL;
+ return;
default:
break;
}

- if (ghes_disable) {
+ if (ghes_disable)
pr_info(GHES_PFX "GHES is not enabled!\n");
- return -EINVAL;
- }

ghes_nmi_init_cxt();

@@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");

- return 0;
+ return;
err:
- return rc;
+ ghes_disable = 1;
}
-device_initcall(ghes_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6..1dcd71df2cd5 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -30,6 +30,7 @@
#include <linux/acpi_viot.h>
#include <linux/pci.h>
#include <acpi/apei.h>
+#include <linux/arm_sdei.h>
#include <linux/suspend.h>
#include <linux/prmt.h>

@@ -1331,6 +1332,9 @@ static int __init acpi_init(void)

pci_mmcfg_late_init();
acpi_iort_init();
+ acpi_hest_init();
+ sdei_init();
+ ghes_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index b76db99cced3..14e20af44627 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -23,7 +23,7 @@
#include <linux/dmi.h>
#include <linux/platform_data/x86/apple.h>
#include <acpi/apei.h> /* for acpi_hest_init() */
-
+#include <linux/arm_sdei.h> /* for sdei_init() */
#include "internal.h"

#define ACPI_PCI_ROOT_CLASS "pci_bridge"
@@ -943,7 +943,7 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,

void __init acpi_pci_root_init(void)
{
- acpi_hest_init();
+
if (acpi_pci_disabled)
return;

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
return true;
}

-static int __init sdei_init(void)
+void __init sdei_init(void)
{
struct platform_device *pdev;
int ret;

ret = platform_driver_register(&sdei_driver);
if (ret || !sdei_present_acpi())
- return ret;
+ return;

pdev = platform_device_register_simple(sdei_driver.driver.name,
0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
pr_info("Failed to register ACPI:SDEI platform device %d\n",
ret);
}
-
- return ret;
}

-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..7dbd6363fda7 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,8 +27,10 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
+void __init ghes_init(void);
#else
#define ghes_disable 1
+static inline void ghes_init(void) { return; }
#endif

#ifdef CONFIG_ACPI_APEI
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..9c987188b692 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
/* For use by arch code when CPU hotplug notifiers are not appropriate. */
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { return ; }
#endif /* CONFIG_ARM_SDE_INTERFACE */


--
2.20.1.12.g72788fdb

2022-01-21 11:06:01

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v5] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On Sun, Jan 16, 2022 at 04:43:10PM +0800, Shuai Xue wrote:
> On an ACPI system, ACPI is initialised very early from a
> subsys_initcall(), while SDEI is not ready until a
> subsys_initcall_sync(). This patch is to reduce the time before GHES
> initialization.
>
> The SDEI driver provides functions (e.g. apei_sdei_register_ghes(),
> apei_sdei_unregister_ghes()) to register or unregister event callback
> for dispatcher in firmware. When the GHES driver probing, it registers
> the corresponding callback according to the notification type specified
> by GHES. If the GHES notification type is SDEI, the GHES driver will
> call apei_sdei_register_ghes() to register event call.
>
> When the firmware emits an event, it migrates the handling of the event
> into the kernel at the registered entry-point __sdei_asm_handler. And
> finally, the kernel will call the registered event callback and return
> status_code to indicate the status of event handling. SDEI_EV_FAILED
> indicates that the kernel failed to handle the event.
>
> Consequently, when an error occurs during kernel booting, the kernel is
> unable to handle and report errors until the GHES driver is initialized
> by device_initcall(), in which the event callback is registered. For
> example, when the kernel booting, the console logs many times from
> firmware before GHES drivers init in our platform:
>
> Trip in MM PCIe RAS handle(Intr:910)
> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
> Find RP(98:1.0)
> --Walk dev(98:1.0) CE:0 UCE:4000
> ...
> ERROR: sdei_dispatch_event(32a) ret:-1
> --handler(910) end

If I understand correctly, the firmware noticed an error, tried to
report it to the kernel, and is complaining because the kernel isn't
ready to handle it yet. And the reason for this patch is to reduce
these complaints from the firmware.

That doesn't seem like a very good reason for this patch. There is
*always* a window before the kernel is ready to handle events from the
firmware.

Why is the firmware noticing these errors in the first place? If
you're seeing these complaints regularly, my guess is that either you
have some terrible hardware or (more likely) the firmware isn't
clearing some expected error condition correctly. For example, maybe
the Unsupported Request errors that happen while enumerating PCIe
devices are being reported.

If you register the callback function, the kernel will now start
seeing these error reports. What happens then? Does the kernel log
the errors somewhere? Is that better than the current situation where
the firmware logs them?

However, I DO think that:

- Removing acpi_hest_init() from acpi_pci_root_init(), and

- Converting ghes_init() and sdei_init() from initcalls to explicit
calls

are very good reasons to do something like this patch because HEST is
not PCI-specific, and IMO, explicit calls are better than initcalls
because initcall ordering is implicit and not well-defined within a
level.

> This is because the callback function has not been registered yet. Due
> to the poor design of firmware, previously reported errors will be
> overwritten by new ones. Therefore, all errors that occurred before GHES
> initialization are missed and there is no chance to report and find them
> again.
>
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and the framework for registering
> and unregistering events. The dependencies are as follows
>
> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> ghes_init() => sdei_init()
>
> Based on above, move sdei_init and ghes_init as far ahead as possible,
> right after acpi_iort_init() in acpi_init(). After this patch, there are
> still errors missing, but we have to tolerate it until the software
> reporting capability is built. And this patch merely reduce the time
> before GHES initialization. The boot dmesg before this patch:
>
> [ 3.688586] HEST: Table parsing has been initialized.
> ...
> [ 33.204340] calling sdei_init+0x0/0x120 @ 1
> [ 33.208645] sdei: SDEIv1.0 (0x0) detected in firmware.
> ...
> [ 36.005390] calling ghes_init+0x0/0x11c @ 1
> [ 36.190021] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>
> After this patch, the boot dmesg like bellow:
>
> [ 3.688664] HEST: Table parsing has been initialized.
> [ 3.688691] sdei: SDEIv1.0 (0x0) detected in firmware.
> [ 3.694557] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>
> As we can see, the initialization of GHES is advanced by 33 seconds.
> (It should be noted that the effect of optimization varies with the
> platform.)
>
> Signed-off-by: Shuai Xue <[email protected]>
> ---
> drivers/acpi/apei/ghes.c | 18 ++++++++----------
> drivers/acpi/bus.c | 4 ++++
> drivers/acpi/pci_root.c | 4 ++--
> drivers/firmware/arm_sdei.c | 13 ++-----------
> include/acpi/apei.h | 2 ++
> include/linux/arm_sdei.h | 2 ++
> 6 files changed, 20 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c5c9acc6254..64f24a9f213f 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
> .remove = ghes_remove,
> };
>
> -static int __init ghes_init(void)
> +void __init ghes_init(void)
> {
> int rc;
>
> if (acpi_disabled)
> - return -ENODEV;
> + return;
>
> switch (hest_disable) {
> case HEST_NOT_FOUND:
> - return -ENODEV;
> + pr_info(GHES_PFX "HEST is not found!\n");

I don't know whether this "HEST is not found" message is worthwhile or
not. I don't think lack of an HEST is an error, and users may be
alarmed. But this is an ACPI thing, so up to you and Rafael.

> + return;
> case HEST_DISABLED:
> pr_info(GHES_PFX "HEST is not enabled!\n");
> - return -EINVAL;
> + return;
> default:
> break;
> }
>
> - if (ghes_disable) {
> + if (ghes_disable)
> pr_info(GHES_PFX "GHES is not enabled!\n");
> - return -EINVAL;
> - }
>
> ghes_nmi_init_cxt();

Previously, if "ghes_disable", we returned immediately. After this
patch, if "ghes_disable", we print "GHES is not enabled!\n", but then
we go on to call ghes_nmi_init_cxt() and register the
&ghes_platform_driver. That looks wrong. Maybe you meant to keep a
"return" here?

> @@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
> else
> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>
> - return 0;
> + return;
> err:
> - return rc;
> + ghes_disable = 1;

Why do you set ghes_disable here? As far as I can tell, we will never
look at it again. The places we do look at it are:

- ghes_init(): earlier in this function, so we've already done that,

- acpi_hest_init(): we've already called that, too, and

- acpi_bus_osc_negotiate_platform_control(): called from
acpi_bus_init(), which we've already called.

> }
> -device_initcall(ghes_init);
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index 07f604832fd6..1dcd71df2cd5 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -30,6 +30,7 @@
> #include <linux/acpi_viot.h>
> #include <linux/pci.h>
> #include <acpi/apei.h>
> +#include <linux/arm_sdei.h>
> #include <linux/suspend.h>
> #include <linux/prmt.h>
>
> @@ -1331,6 +1332,9 @@ static int __init acpi_init(void)
>
> pci_mmcfg_late_init();
> acpi_iort_init();
> + acpi_hest_init();
> + sdei_init();
> + ghes_init();

sdei_init() and ghes_init() stick out here because they're the only
functions without an "acpi_" prefix. Maybe a separate patch to rename
them?

> acpi_scan_init();
> acpi_ec_init();
> acpi_debugfs_init();
> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> index b76db99cced3..14e20af44627 100644
> --- a/drivers/acpi/pci_root.c
> +++ b/drivers/acpi/pci_root.c
> @@ -23,7 +23,7 @@
> #include <linux/dmi.h>
> #include <linux/platform_data/x86/apple.h>
> #include <acpi/apei.h> /* for acpi_hest_init() */

This #include looks like it may no longer be necessary.

> -
> +#include <linux/arm_sdei.h> /* for sdei_init() */

I don't think this #include is necessary.

> #include "internal.h"
>
> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> @@ -943,7 +943,7 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>
> void __init acpi_pci_root_init(void)
> {
> - acpi_hest_init();
> +

Drop the blank line here.

> if (acpi_pci_disabled)
> return;
>
> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> index a7e762c352f9..1e1a51510e83 100644
> --- a/drivers/firmware/arm_sdei.c
> +++ b/drivers/firmware/arm_sdei.c
> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> return true;
> }
>
> -static int __init sdei_init(void)
> +void __init sdei_init(void)
> {
> struct platform_device *pdev;
> int ret;
>
> ret = platform_driver_register(&sdei_driver);
> if (ret || !sdei_present_acpi())
> - return ret;
> + return;
>
> pdev = platform_device_register_simple(sdei_driver.driver.name,
> 0, NULL, 0);
> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> pr_info("Failed to register ACPI:SDEI platform device %d\n",
> ret);
> }
> -
> - return ret;
> }
>
> -/*
> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> - * by device_initcall(). We want to be called in the middle.
> - */
> -subsys_initcall_sync(sdei_init);
> -
> int sdei_event_handler(struct pt_regs *regs,
> struct sdei_registered_event *arg)
> {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index ece0a8af2bae..7dbd6363fda7 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -27,8 +27,10 @@ extern int hest_disable;
> extern int erst_disable;
> #ifdef CONFIG_ACPI_APEI_GHES
> extern bool ghes_disable;
> +void __init ghes_init(void);
> #else
> #define ghes_disable 1
> +static inline void ghes_init(void) { return; }

"return" is unnecessary here.

> #endif
>
> #ifdef CONFIG_ACPI_APEI
> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> index 0a241c5c911d..9c987188b692 100644
> --- a/include/linux/arm_sdei.h
> +++ b/include/linux/arm_sdei.h
> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> int sdei_mask_local_cpu(void);
> int sdei_unmask_local_cpu(void);
> +void __init sdei_init(void);
> #else
> static inline int sdei_mask_local_cpu(void) { return 0; }
> static inline int sdei_unmask_local_cpu(void) { return 0; }
> +static inline void sdei_init(void) { return ; }

"return" is unnecessary here.

> #endif /* CONFIG_ARM_SDE_INTERFACE */
>
>
> --
> 2.20.1.12.g72788fdb
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

2022-01-21 16:58:26

by Shuai Xue

[permalink] [raw]
Subject: Re: [PATCH v5] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

[+to Rafael, question about HEST/GHES/SDEI init]

Hi, Bjorn,

Thank you for your comments and quick reply.

在 2022/1/19 AM6:49, Bjorn Helgaas 写道:
> On Sun, Jan 16, 2022 at 04:43:10PM +0800, Shuai Xue wrote:
>> On an ACPI system, ACPI is initialised very early from a
>> subsys_initcall(), while SDEI is not ready until a
>> subsys_initcall_sync(). This patch is to reduce the time before GHES
>> initialization.
>>
>> The SDEI driver provides functions (e.g. apei_sdei_register_ghes(),
>> apei_sdei_unregister_ghes()) to register or unregister event callback
>> for dispatcher in firmware. When the GHES driver probing, it registers
>> the corresponding callback according to the notification type specified
>> by GHES. If the GHES notification type is SDEI, the GHES driver will
>> call apei_sdei_register_ghes() to register event call.
>>
>> When the firmware emits an event, it migrates the handling of the event
>> into the kernel at the registered entry-point __sdei_asm_handler. And
>> finally, the kernel will call the registered event callback and return
>> status_code to indicate the status of event handling. SDEI_EV_FAILED
>> indicates that the kernel failed to handle the event.
>>
>> Consequently, when an error occurs during kernel booting, the kernel is
>> unable to handle and report errors until the GHES driver is initialized
>> by device_initcall(), in which the event callback is registered. For
>> example, when the kernel booting, the console logs many times from
>> firmware before GHES drivers init in our platform:
>>
>> Trip in MM PCIe RAS handle(Intr:910)
>> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
>> Find RP(98:1.0)
>> --Walk dev(98:1.0) CE:0 UCE:4000
>> ...
>> ERROR: sdei_dispatch_event(32a) ret:-1
>> --handler(910) end
>
> If I understand correctly, the firmware noticed an error, tried to
> report it to the kernel, and is complaining because the kernel isn't
> ready to handle it yet. And the reason for this patch is to reduce
> these complaints from the firmware.

My thoughts exactly :)

> That doesn't seem like a very good reason for this patch. There is
> *always* a window before the kernel is ready to handle events from the
> firmware.

Yes, there is always a window. But if we could do better in kernel that
reduces the window by 90% (from 33 seconds to 3 second), why not?

> Why is the firmware noticing these errors in the first place? If
> you're seeing these complaints regularly, my guess is that either you
> have some terrible hardware or (more likely) the firmware isn't
> clearing some expected error condition correctly. For example, maybe
> the Unsupported Request errors that happen while enumerating PCIe
> devices are being reported.
>
> If you register the callback function, the kernel will now start
> seeing these error reports. What happens then? Does the kernel log
> the errors somewhere? Is that better than the current situation where
> the firmware logs them?

Yep, it is a hardware issue. The firmware only logs in console (ttyAMA0) and
we can not see it in kernel side. After the kernel starts seeing these error
reports, we could see EDAC/ghes and efi/cper detailed logs in dmesg. We did not
notice the problem until we check the console log, which inspired us to reduce
the window when kernel startup, so that we can see the message clearly and
properly. I think the intuition is to check the log of dmesg, not the console.

> However, I DO think that:
>
> - Removing acpi_hest_init() from acpi_pci_root_init(), and
>
> - Converting ghes_init() and sdei_init() from initcalls to explicit
> calls
>
> are very good reasons to do something like this patch because HEST is
> not PCI-specific, and IMO, explicit calls are better than initcalls
> because initcall ordering is implicit and not well-defined within a
> level.

Haha, if the above reasons still don't convince you, I would like to accept yours :)
Should we do it in one patch or separate it into two patches?

>> This is because the callback function has not been registered yet. Due
>> to the poor design of firmware, previously reported errors will be
>> overwritten by new ones. Therefore, all errors that occurred before GHES
>> initialization are missed and there is no chance to report and find them
>> again.
>>
>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>> the estatus memory pool. On the other hand, ghes_init() relies on
>> sdei_init() to detect the SDEI version and the framework for registering
>> and unregistering events. The dependencies are as follows
>>
>> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
>> ghes_init() => sdei_init()
>>
>> Based on above, move sdei_init and ghes_init as far ahead as possible,
>> right after acpi_iort_init() in acpi_init(). After this patch, there are
>> still errors missing, but we have to tolerate it until the software
>> reporting capability is built. And this patch merely reduce the time
>> before GHES initialization. The boot dmesg before this patch:
>>
>> [ 3.688586] HEST: Table parsing has been initialized.
>> ...
>> [ 33.204340] calling sdei_init+0x0/0x120 @ 1
>> [ 33.208645] sdei: SDEIv1.0 (0x0) detected in firmware.
>> ...
>> [ 36.005390] calling ghes_init+0x0/0x11c @ 1
>> [ 36.190021] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>>
>> After this patch, the boot dmesg like bellow:
>>
>> [ 3.688664] HEST: Table parsing has been initialized.
>> [ 3.688691] sdei: SDEIv1.0 (0x0) detected in firmware.
>> [ 3.694557] GHES: APEI firmware first mode is enabled by APEI bit and WHEA _OSC.
>>
>> As we can see, the initialization of GHES is advanced by 33 seconds.
>> (It should be noted that the effect of optimization varies with the
>> platform.)
>>
>> Signed-off-by: Shuai Xue <[email protected]>
>> ---
>> drivers/acpi/apei/ghes.c | 18 ++++++++----------
>> drivers/acpi/bus.c | 4 ++++
>> drivers/acpi/pci_root.c | 4 ++--
>> drivers/firmware/arm_sdei.c | 13 ++-----------
>> include/acpi/apei.h | 2 ++
>> include/linux/arm_sdei.h | 2 ++
>> 6 files changed, 20 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 0c5c9acc6254..64f24a9f213f 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -1457,27 +1457,26 @@ static struct platform_driver ghes_platform_driver = {
>> .remove = ghes_remove,
>> };
>>
>> -static int __init ghes_init(void)
>> +void __init ghes_init(void)
>> {
>> int rc;
>>
>> if (acpi_disabled)
>> - return -ENODEV;
>> + return;
>>
>> switch (hest_disable) {
>> case HEST_NOT_FOUND:
>> - return -ENODEV;
>> + pr_info(GHES_PFX "HEST is not found!\n");
>
> I don't know whether this "HEST is not found" message is worthwhile or
> not. I don't think lack of an HEST is an error, and users may be
> alarmed. But this is an ACPI thing, so up to you and Rafael.

If we explicit call ghes_init(), we can't tell if ghes is initialized
successfully based on the return value of initcall. So I add a info message.
What's your opinion, Rafeal?

>> + return;
>> case HEST_DISABLED:
>> pr_info(GHES_PFX "HEST is not enabled!\n");
>> - return -EINVAL;
>> + return;
>> default:
>> break;
>> }
>>
>> - if (ghes_disable) {
>> + if (ghes_disable)
>> pr_info(GHES_PFX "GHES is not enabled!\n");
>> - return -EINVAL;
>> - }
>>
>> ghes_nmi_init_cxt();
>
> Previously, if "ghes_disable", we returned immediately. After this
> patch, if "ghes_disable", we print "GHES is not enabled!\n", but then
> we go on to call ghes_nmi_init_cxt() and register the
> &ghes_platform_driver. That looks wrong. Maybe you meant to keep a
> "return" here?

Sorry, it's my wrong. I will fix it in next version.

>> @@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
>> else
>> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>>
>> - return 0;
>> + return;
>> err:
>> - return rc;
>> + ghes_disable = 1;
>
> Why do you set ghes_disable here? As far as I can tell, we will never
> look at it again. The places we do look at it are:
>
> - ghes_init(): earlier in this function, so we've already done that,
>
> - acpi_hest_init(): we've already called that, too, and
>
> - acpi_bus_osc_negotiate_platform_control(): called from
> acpi_bus_init(), which we've already called.

I add it for future potential usage. Thank you for pointing it out.
If you think it is not necessary, I will delete it in next version.


>> }
>> -device_initcall(ghes_init);
>> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
>> index 07f604832fd6..1dcd71df2cd5 100644
>> --- a/drivers/acpi/bus.c
>> +++ b/drivers/acpi/bus.c
>> @@ -30,6 +30,7 @@
>> #include <linux/acpi_viot.h>
>> #include <linux/pci.h>
>> #include <acpi/apei.h>
>> +#include <linux/arm_sdei.h>
>> #include <linux/suspend.h>
>> #include <linux/prmt.h>
>>
>> @@ -1331,6 +1332,9 @@ static int __init acpi_init(void)
>>
>> pci_mmcfg_late_init();
>> acpi_iort_init();
>> + acpi_hest_init();
>> + sdei_init();
>> + ghes_init();
>
> sdei_init() and ghes_init() stick out here because they're the only
> functions without an "acpi_" prefix. Maybe a separate patch to rename
> them?

Great idea. I will add a patch to rename them with "acpi_" prefix

>> acpi_scan_init();
>> acpi_ec_init();
>> acpi_debugfs_init();
>> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
>> index b76db99cced3..14e20af44627 100644
>> --- a/drivers/acpi/pci_root.c
>> +++ b/drivers/acpi/pci_root.c
>> @@ -23,7 +23,7 @@
>> #include <linux/dmi.h>
>> #include <linux/platform_data/x86/apple.h>
>> #include <acpi/apei.h> /* for acpi_hest_init() */
>
> This #include looks like it may no longer be necessary.
>
>> -
>> +#include <linux/arm_sdei.h> /* for sdei_init() */
>
> I don't think this #include is necessary.

Thank you for pointing it out. I will delete it in next version.

>
>> #include "internal.h"
>>
>> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
>> @@ -943,7 +943,7 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>>
>> void __init acpi_pci_root_init(void)
>> {
>> - acpi_hest_init();
>> +
>
> Drop the blank line here.

Got it. Will fix it.

>
>> if (acpi_pci_disabled)
>> return;
>>
>> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
>> index a7e762c352f9..1e1a51510e83 100644
>> --- a/drivers/firmware/arm_sdei.c
>> +++ b/drivers/firmware/arm_sdei.c
>> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
>> return true;
>> }
>>
>> -static int __init sdei_init(void)
>> +void __init sdei_init(void)
>> {
>> struct platform_device *pdev;
>> int ret;
>>
>> ret = platform_driver_register(&sdei_driver);
>> if (ret || !sdei_present_acpi())
>> - return ret;
>> + return;
>>
>> pdev = platform_device_register_simple(sdei_driver.driver.name,
>> 0, NULL, 0);
>> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
>> pr_info("Failed to register ACPI:SDEI platform device %d\n",
>> ret);
>> }
>> -
>> - return ret;
>> }
>>
>> -/*
>> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
>> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
>> - * by device_initcall(). We want to be called in the middle.
>> - */
>> -subsys_initcall_sync(sdei_init);
>> -
>> int sdei_event_handler(struct pt_regs *regs,
>> struct sdei_registered_event *arg)
>> {
>> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
>> index ece0a8af2bae..7dbd6363fda7 100644
>> --- a/include/acpi/apei.h
>> +++ b/include/acpi/apei.h
>> @@ -27,8 +27,10 @@ extern int hest_disable;
>> extern int erst_disable;
>> #ifdef CONFIG_ACPI_APEI_GHES
>> extern bool ghes_disable;
>> +void __init ghes_init(void);
>> #else
>> #define ghes_disable 1
>> +static inline void ghes_init(void) { return; }
>
> "return" is unnecessary here.
>
>> #endif
>>
>> #ifdef CONFIG_ACPI_APEI
>> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
>> index 0a241c5c911d..9c987188b692 100644
>> --- a/include/linux/arm_sdei.h
>> +++ b/include/linux/arm_sdei.h
>> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
>> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
>> int sdei_mask_local_cpu(void);
>> int sdei_unmask_local_cpu(void);
>> +void __init sdei_init(void);
>> #else
>> static inline int sdei_mask_local_cpu(void) { return 0; }
>> static inline int sdei_unmask_local_cpu(void) { return 0; }
>> +static inline void sdei_init(void) { return ; }
>
> "return" is unnecessary here.

Got it. Will delete "return" in ghes_init(), sdei_init() and acpi_hest_init().

>> #endif /* CONFIG_ARM_SDE_INTERFACE */
>>
>>
>> --
>> 2.20.1.12.g72788fdb
>>
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> [email protected]
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

2022-01-21 20:04:05

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v5] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On Wed, Jan 19, 2022 at 02:40:11PM +0800, Shuai Xue wrote:
> [+to Rafael, question about HEST/GHES/SDEI init]
>
> Hi, Bjorn,
>
> Thank you for your comments and quick reply.
>
> 在 2022/1/19 AM6:49, Bjorn Helgaas 写道:
> > On Sun, Jan 16, 2022 at 04:43:10PM +0800, Shuai Xue wrote:
> >> On an ACPI system, ACPI is initialised very early from a
> >> subsys_initcall(), while SDEI is not ready until a
> >> subsys_initcall_sync(). This patch is to reduce the time before GHES
> >> initialization.
> >>
> >> The SDEI driver provides functions (e.g. apei_sdei_register_ghes(),
> >> apei_sdei_unregister_ghes()) to register or unregister event callback
> >> for dispatcher in firmware. When the GHES driver probing, it registers
> >> the corresponding callback according to the notification type specified
> >> by GHES. If the GHES notification type is SDEI, the GHES driver will
> >> call apei_sdei_register_ghes() to register event call.
> >>
> >> When the firmware emits an event, it migrates the handling of the event
> >> into the kernel at the registered entry-point __sdei_asm_handler. And
> >> finally, the kernel will call the registered event callback and return
> >> status_code to indicate the status of event handling. SDEI_EV_FAILED
> >> indicates that the kernel failed to handle the event.
> >>
> >> Consequently, when an error occurs during kernel booting, the kernel is
> >> unable to handle and report errors until the GHES driver is initialized
> >> by device_initcall(), in which the event callback is registered. For
> >> example, when the kernel booting, the console logs many times from
> >> firmware before GHES drivers init in our platform:
> >>
> >> Trip in MM PCIe RAS handle(Intr:910)
> >> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
> >> Find RP(98:1.0)
> >> --Walk dev(98:1.0) CE:0 UCE:4000
> >> ...
> >> ERROR: sdei_dispatch_event(32a) ret:-1
> >> --handler(910) end
> >
> > If I understand correctly, the firmware noticed an error, tried to
> > report it to the kernel, and is complaining because the kernel isn't
> > ready to handle it yet. And the reason for this patch is to reduce
> > these complaints from the firmware.
>
> My thoughts exactly :)
>
> > That doesn't seem like a very good reason for this patch. There is
> > *always* a window before the kernel is ready to handle events from the
> > firmware.
>
> Yes, there is always a window. But if we could do better in kernel that
> reduces the window by 90% (from 33 seconds to 3 second), why not?
>
> > Why is the firmware noticing these errors in the first place? If
> > you're seeing these complaints regularly, my guess is that either you
> > have some terrible hardware or (more likely) the firmware isn't
> > clearing some expected error condition correctly. For example, maybe
> > the Unsupported Request errors that happen while enumerating PCIe
> > devices are being reported.
> >
> > If you register the callback function, the kernel will now start
> > seeing these error reports. What happens then? Does the kernel log
> > the errors somewhere? Is that better than the current situation where
> > the firmware logs them?
>
> Yep, it is a hardware issue. The firmware only logs in console
> (ttyAMA0) and we can not see it in kernel side. After the kernel
> starts seeing these error reports, we could see EDAC/ghes and
> efi/cper detailed logs in dmesg. We did not notice the problem until
> we check the console log, which inspired us to reduce the window
> when kernel startup, so that we can see the message clearly and
> properly. I think the intuition is to check the log of dmesg, not
> the console.

> > However, I DO think that:
> >
> > - Removing acpi_hest_init() from acpi_pci_root_init(), and
> >
> > - Converting ghes_init() and sdei_init() from initcalls to explicit
> > calls
> >
> > are very good reasons to do something like this patch because HEST is
> > not PCI-specific, and IMO, explicit calls are better than initcalls
> > because initcall ordering is implicit and not well-defined within a
> > level.
>
> Haha, if the above reasons still don't convince you, I would like to
> accept yours :) Should we do it in one patch or separate it into two
> patches?

IMO, this can be done in one patch, but this would probably go via
Rafael.

> >> -static int __init ghes_init(void)
> >> +void __init ghes_init(void)
> >> {
> >> int rc;
> >>
> >> if (acpi_disabled)
> >> - return -ENODEV;
> >> + return;
> >>
> >> switch (hest_disable) {
> >> case HEST_NOT_FOUND:
> >> - return -ENODEV;
> >> + pr_info(GHES_PFX "HEST is not found!\n");
> >
> > I don't know whether this "HEST is not found" message is
> > worthwhile or not. I don't think lack of an HEST is an error, and
> > users may be alarmed. But this is an ACPI thing, so up to you and
> > Rafael.
>
> If we explicit call ghes_init(), we can't tell if ghes is
> initialized successfully based on the return value of initcall. So I
> add a info message.

When ghes_init() is an initcall and you return -ENODEV for the
HEST_NOT_FOUND case, I don't think we log any message about that, do
we? do_one_initcall() will capture and return the -ENODEV, but the
caller (do_initcall_level()) just ignores it.

> >> @@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
> >> else
> >> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
> >>
> >> - return 0;
> >> + return;
> >> err:
> >> - return rc;
> >> + ghes_disable = 1;
> >
> > Why do you set ghes_disable here? As far as I can tell, we will never
> > look at it again. The places we do look at it are:
> >
> > - ghes_init(): earlier in this function, so we've already done that,
> >
> > - acpi_hest_init(): we've already called that, too, and
> >
> > - acpi_bus_osc_negotiate_platform_control(): called from
> > acpi_bus_init(), which we've already called.
>
> I add it for future potential usage. Thank you for pointing it out.
> If you think it is not necessary, I will delete it in next version.

I think it is not necessary to save information that will never be
used. If you need it in the future, you can add it and the reason
will be obvious.

Bjorn

2022-01-21 21:03:41

by Shuai Xue

[permalink] [raw]
Subject: Re: [PATCH v5] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

Hi, Bjorn,

Thank you for your valuable comments.

在 2022/1/20 AM4:42, Bjorn Helgaas 写道:
> On Wed, Jan 19, 2022 at 02:40:11PM +0800, Shuai Xue wrote:
>> [+to Rafael, question about HEST/GHES/SDEI init]
>>
>> Hi, Bjorn,
>>
>> Thank you for your comments and quick reply.
>>
>> 在 2022/1/19 AM6:49, Bjorn Helgaas 写道:
>>> On Sun, Jan 16, 2022 at 04:43:10PM +0800, Shuai Xue wrote:
>>>> On an ACPI system, ACPI is initialised very early from a
>>>> subsys_initcall(), while SDEI is not ready until a
>>>> subsys_initcall_sync(). This patch is to reduce the time before GHES
>>>> initialization.
>>>>
>>>> The SDEI driver provides functions (e.g. apei_sdei_register_ghes(),
>>>> apei_sdei_unregister_ghes()) to register or unregister event callback
>>>> for dispatcher in firmware. When the GHES driver probing, it registers
>>>> the corresponding callback according to the notification type specified
>>>> by GHES. If the GHES notification type is SDEI, the GHES driver will
>>>> call apei_sdei_register_ghes() to register event call.
>>>>
>>>> When the firmware emits an event, it migrates the handling of the event
>>>> into the kernel at the registered entry-point __sdei_asm_handler. And
>>>> finally, the kernel will call the registered event callback and return
>>>> status_code to indicate the status of event handling. SDEI_EV_FAILED
>>>> indicates that the kernel failed to handle the event.
>>>>
>>>> Consequently, when an error occurs during kernel booting, the kernel is
>>>> unable to handle and report errors until the GHES driver is initialized
>>>> by device_initcall(), in which the event callback is registered. For
>>>> example, when the kernel booting, the console logs many times from
>>>> firmware before GHES drivers init in our platform:
>>>>
>>>> Trip in MM PCIe RAS handle(Intr:910)
>>>> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
>>>> Find RP(98:1.0)
>>>> --Walk dev(98:1.0) CE:0 UCE:4000
>>>> ...
>>>> ERROR: sdei_dispatch_event(32a) ret:-1
>>>> --handler(910) end
>>>
>>> If I understand correctly, the firmware noticed an error, tried to
>>> report it to the kernel, and is complaining because the kernel isn't
>>> ready to handle it yet. And the reason for this patch is to reduce
>>> these complaints from the firmware.
>>
>> My thoughts exactly :)
>>
>>> That doesn't seem like a very good reason for this patch. There is
>>> *always* a window before the kernel is ready to handle events from the
>>> firmware.
>>
>> Yes, there is always a window. But if we could do better in kernel that
>> reduces the window by 90% (from 33 seconds to 3 second), why not?
>>
>>> Why is the firmware noticing these errors in the first place? If
>>> you're seeing these complaints regularly, my guess is that either you
>>> have some terrible hardware or (more likely) the firmware isn't
>>> clearing some expected error condition correctly. For example, maybe
>>> the Unsupported Request errors that happen while enumerating PCIe
>>> devices are being reported.
>>>
>>> If you register the callback function, the kernel will now start
>>> seeing these error reports. What happens then? Does the kernel log
>>> the errors somewhere? Is that better than the current situation where
>>> the firmware logs them?
>>
>> Yep, it is a hardware issue. The firmware only logs in console
>> (ttyAMA0) and we can not see it in kernel side. After the kernel
>> starts seeing these error reports, we could see EDAC/ghes and
>> efi/cper detailed logs in dmesg. We did not notice the problem until
>> we check the console log, which inspired us to reduce the window
>> when kernel startup, so that we can see the message clearly and
>> properly. I think the intuition is to check the log of dmesg, not
>> the console.
>
>>> However, I DO think that:
>>>
>>> - Removing acpi_hest_init() from acpi_pci_root_init(), and
>>>
>>> - Converting ghes_init() and sdei_init() from initcalls to explicit
>>> calls
>>>
>>> are very good reasons to do something like this patch because HEST is
>>> not PCI-specific, and IMO, explicit calls are better than initcalls
>>> because initcall ordering is implicit and not well-defined within a
>>> level.
>>
>> Haha, if the above reasons still don't convince you, I would like to
>> accept yours :) Should we do it in one patch or separate it into two
>> patches?
>
> IMO, this can be done in one patch, but this would probably go via
> Rafael.

Got it, I will send a new patch and cc to Rafael.

>>>> -static int __init ghes_init(void)
>>>> +void __init ghes_init(void)
>>>> {
>>>> int rc;
>>>>
>>>> if (acpi_disabled)
>>>> - return -ENODEV;
>>>> + return;
>>>>
>>>> switch (hest_disable) {
>>>> case HEST_NOT_FOUND:
>>>> - return -ENODEV;
>>>> + pr_info(GHES_PFX "HEST is not found!\n");
>>>
>>> I don't know whether this "HEST is not found" message is
>>> worthwhile or not. I don't think lack of an HEST is an error, and
>>> users may be alarmed. But this is an ACPI thing, so up to you and
>>> Rafael.
>>
>> If we explicit call ghes_init(), we can't tell if ghes is
>> initialized successfully based on the return value of initcall. So I
>> add a info message.
>
> When ghes_init() is an initcall and you return -ENODEV for the
> HEST_NOT_FOUND case, I don't think we log any message about that, do
> we? do_one_initcall() will capture and return the -ENODEV, but the
> caller (do_initcall_level()) just ignores it.

I see, thank you. I will delete it in next version.

>>>> @@ -1495,8 +1494,7 @@ static int __init ghes_init(void)
>>>> else
>>>> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>>>>
>>>> - return 0;
>>>> + return;
>>>> err:
>>>> - return rc;
>>>> + ghes_disable = 1;
>>>
>>> Why do you set ghes_disable here? As far as I can tell, we will never
>>> look at it again. The places we do look at it are:
>>>
>>> - ghes_init(): earlier in this function, so we've already done that,
>>>
>>> - acpi_hest_init(): we've already called that, too, and
>>>
>>> - acpi_bus_osc_negotiate_platform_control(): called from
>>> acpi_bus_init(), which we've already called.
>>
>> I add it for future potential usage. Thank you for pointing it out.
>> If you think it is not necessary, I will delete it in next version.
>
> I think it is not necessary to save information that will never be
> used. If you need it in the future, you can add it and the reason
> will be obvious.
>
> Bjorn

You are right. I will delete it.

Best Regards,
Shuai


2022-01-21 21:06:45

by Shuai Xue

[permalink] [raw]
Subject: [PATCH v6] ACPI: explicit init HEST, SDEI and GHES in apci_init

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and (un)register events. The
dependencies are as follows:

ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
ghes_init() => sdei_init()

HEST is not PCI-specific and initcall ordering is implicit and not
well-defined within a level.

Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
convert ghes_init() and sdei_init() from initcalls to explicit calls in the
following order:

acpi_hest_init()
sdei_init()
ghes_init()

Suggested-by: Bjorn Helgaas <[email protected]>
Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 17 +++++++----------
drivers/acpi/bus.c | 4 ++++
drivers/acpi/pci_root.c | 3 ---
drivers/firmware/arm_sdei.c | 13 ++-----------
include/acpi/apei.h | 4 +++-
include/linux/arm_sdei.h | 2 ++
6 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254..bed4f10cfcb8 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,33 +1457,33 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-static int __init ghes_init(void)
+void __init ghes_init(void)
{
int rc;

if (acpi_disabled)
- return -ENODEV;
+ return;

switch (hest_disable) {
case HEST_NOT_FOUND:
- return -ENODEV;
+ return;
case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n");
- return -EINVAL;
+ return;
default:
break;
}

if (ghes_disable) {
pr_info(GHES_PFX "GHES is not enabled!\n");
- return -EINVAL;
+ return;
}

ghes_nmi_init_cxt();

rc = platform_driver_register(&ghes_platform_driver);
if (rc)
- goto err;
+ return;

rc = apei_osc_setup();
if (rc == 0 && osc_sb_apei_support_acked)
@@ -1495,8 +1495,5 @@ static int __init ghes_init(void)
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");

- return 0;
-err:
- return rc;
+ return;
}
-device_initcall(ghes_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6..1dcd71df2cd5 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -30,6 +30,7 @@
#include <linux/acpi_viot.h>
#include <linux/pci.h>
#include <acpi/apei.h>
+#include <linux/arm_sdei.h>
#include <linux/suspend.h>
#include <linux/prmt.h>

@@ -1331,6 +1332,9 @@ static int __init acpi_init(void)

pci_mmcfg_late_init();
acpi_iort_init();
+ acpi_hest_init();
+ sdei_init();
+ ghes_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index b76db99cced3..6f9e75d14808 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -22,8 +22,6 @@
#include <linux/slab.h>
#include <linux/dmi.h>
#include <linux/platform_data/x86/apple.h>
-#include <acpi/apei.h> /* for acpi_hest_init() */
-
#include "internal.h"

#define ACPI_PCI_ROOT_CLASS "pci_bridge"
@@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,

void __init acpi_pci_root_init(void)
{
- acpi_hest_init();
if (acpi_pci_disabled)
return;

diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
return true;
}

-static int __init sdei_init(void)
+void __init sdei_init(void)
{
struct platform_device *pdev;
int ret;

ret = platform_driver_register(&sdei_driver);
if (ret || !sdei_present_acpi())
- return ret;
+ return;

pdev = platform_device_register_simple(sdei_driver.driver.name,
0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
pr_info("Failed to register ACPI:SDEI platform device %d\n",
ret);
}
-
- return ret;
}

-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..4e60dd73c3bb 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,14 +27,16 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
+void __init ghes_init(void);
#else
#define ghes_disable 1
+static inline void ghes_init(void) { }
#endif

#ifdef CONFIG_ACPI_APEI
void __init acpi_hest_init(void);
#else
-static inline void acpi_hest_init(void) { return; }
+static inline void acpi_hest_init(void) { }
#endif

int erst_write(const struct cper_record_header *record);
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..14dc461b0e82 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
/* For use by arch code when CPU hotplug notifiers are not appropriate. */
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { }
#endif /* CONFIG_ARM_SDE_INTERFACE */


--
2.20.1.12.g72788fdb

2022-01-21 22:24:17

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v5] ACPI: Move sdei_init and ghes_init ahead to handle platform errors earlier

On Wed, Jan 19, 2022 at 9:43 PM Bjorn Helgaas <[email protected]> wrote:
>
> On Wed, Jan 19, 2022 at 02:40:11PM +0800, Shuai Xue wrote:
> > [+to Rafael, question about HEST/GHES/SDEI init]
> >
> > Hi, Bjorn,
> >
> > Thank you for your comments and quick reply.
> >
> > 在 2022/1/19 AM6:49, Bjorn Helgaas 写道:
> > > On Sun, Jan 16, 2022 at 04:43:10PM +0800, Shuai Xue wrote:
> > >> On an ACPI system, ACPI is initialised very early from a
> > >> subsys_initcall(), while SDEI is not ready until a
> > >> subsys_initcall_sync(). This patch is to reduce the time before GHES
> > >> initialization.
> > >>
> > >> The SDEI driver provides functions (e.g. apei_sdei_register_ghes(),
> > >> apei_sdei_unregister_ghes()) to register or unregister event callback
> > >> for dispatcher in firmware. When the GHES driver probing, it registers
> > >> the corresponding callback according to the notification type specified
> > >> by GHES. If the GHES notification type is SDEI, the GHES driver will
> > >> call apei_sdei_register_ghes() to register event call.
> > >>
> > >> When the firmware emits an event, it migrates the handling of the event
> > >> into the kernel at the registered entry-point __sdei_asm_handler. And
> > >> finally, the kernel will call the registered event callback and return
> > >> status_code to indicate the status of event handling. SDEI_EV_FAILED
> > >> indicates that the kernel failed to handle the event.
> > >>
> > >> Consequently, when an error occurs during kernel booting, the kernel is
> > >> unable to handle and report errors until the GHES driver is initialized
> > >> by device_initcall(), in which the event callback is registered. For
> > >> example, when the kernel booting, the console logs many times from
> > >> firmware before GHES drivers init in our platform:
> > >>
> > >> Trip in MM PCIe RAS handle(Intr:910)
> > >> Clean PE[1.1.1] ERR_STS:0x4000100 -> 0 INT_STS:F0000000
> > >> Find RP(98:1.0)
> > >> --Walk dev(98:1.0) CE:0 UCE:4000
> > >> ...
> > >> ERROR: sdei_dispatch_event(32a) ret:-1
> > >> --handler(910) end
> > >
> > > If I understand correctly, the firmware noticed an error, tried to
> > > report it to the kernel, and is complaining because the kernel isn't
> > > ready to handle it yet. And the reason for this patch is to reduce
> > > these complaints from the firmware.
> >
> > My thoughts exactly :)
> >
> > > That doesn't seem like a very good reason for this patch. There is
> > > *always* a window before the kernel is ready to handle events from the
> > > firmware.
> >
> > Yes, there is always a window. But if we could do better in kernel that
> > reduces the window by 90% (from 33 seconds to 3 second), why not?
> >
> > > Why is the firmware noticing these errors in the first place? If
> > > you're seeing these complaints regularly, my guess is that either you
> > > have some terrible hardware or (more likely) the firmware isn't
> > > clearing some expected error condition correctly. For example, maybe
> > > the Unsupported Request errors that happen while enumerating PCIe
> > > devices are being reported.
> > >
> > > If you register the callback function, the kernel will now start
> > > seeing these error reports. What happens then? Does the kernel log
> > > the errors somewhere? Is that better than the current situation where
> > > the firmware logs them?
> >
> > Yep, it is a hardware issue. The firmware only logs in console
> > (ttyAMA0) and we can not see it in kernel side. After the kernel
> > starts seeing these error reports, we could see EDAC/ghes and
> > efi/cper detailed logs in dmesg. We did not notice the problem until
> > we check the console log, which inspired us to reduce the window
> > when kernel startup, so that we can see the message clearly and
> > properly. I think the intuition is to check the log of dmesg, not
> > the console.
>
> > > However, I DO think that:
> > >
> > > - Removing acpi_hest_init() from acpi_pci_root_init(), and
> > >
> > > - Converting ghes_init() and sdei_init() from initcalls to explicit
> > > calls
> > >
> > > are very good reasons to do something like this patch because HEST is
> > > not PCI-specific, and IMO, explicit calls are better than initcalls
> > > because initcall ordering is implicit and not well-defined within a
> > > level.
> >
> > Haha, if the above reasons still don't convince you, I would like to
> > accept yours :) Should we do it in one patch or separate it into two
> > patches?
>
> IMO, this can be done in one patch, but this would probably go via
> Rafael.

Yes, that would make sense IMO.

2022-01-21 22:25:19

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v6] ACPI: explicit init HEST, SDEI and GHES in apci_init

On Thu, Jan 20, 2022 at 01:05:22PM +0800, Shuai Xue wrote:
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and (un)register events. The
> dependencies are as follows:
>
> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> ghes_init() => sdei_init()
>
> HEST is not PCI-specific and initcall ordering is implicit and not
> well-defined within a level.
>
> Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
> convert ghes_init() and sdei_init() from initcalls to explicit calls in the
> following order:
>
> acpi_hest_init()
> sdei_init()
> ghes_init()
>
> Suggested-by: Bjorn Helgaas <[email protected]>

I didn't suggest the approach; I just reviewed the patch and the
commit log and proposed moving it out of acpi_pci_root_init(). That
doesn't need to be acknowledged.

> Signed-off-by: Shuai Xue <[email protected]>

Reviewed-by: Bjorn Helgaas <[email protected]>

Minor extra "return" below.

> ---
> drivers/acpi/apei/ghes.c | 17 +++++++----------
> drivers/acpi/bus.c | 4 ++++
> drivers/acpi/pci_root.c | 3 ---
> drivers/firmware/arm_sdei.c | 13 ++-----------
> include/acpi/apei.h | 4 +++-
> include/linux/arm_sdei.h | 2 ++
> 6 files changed, 18 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c5c9acc6254..bed4f10cfcb8 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -1457,33 +1457,33 @@ static struct platform_driver ghes_platform_driver = {
> .remove = ghes_remove,
> };
>
> -static int __init ghes_init(void)
> +void __init ghes_init(void)
> {
> int rc;
>
> if (acpi_disabled)
> - return -ENODEV;
> + return;
>
> switch (hest_disable) {
> case HEST_NOT_FOUND:
> - return -ENODEV;
> + return;
> case HEST_DISABLED:
> pr_info(GHES_PFX "HEST is not enabled!\n");
> - return -EINVAL;
> + return;
> default:
> break;
> }
>
> if (ghes_disable) {
> pr_info(GHES_PFX "GHES is not enabled!\n");
> - return -EINVAL;
> + return;
> }
>
> ghes_nmi_init_cxt();
>
> rc = platform_driver_register(&ghes_platform_driver);
> if (rc)
> - goto err;
> + return;
>
> rc = apei_osc_setup();
> if (rc == 0 && osc_sb_apei_support_acked)
> @@ -1495,8 +1495,5 @@ static int __init ghes_init(void)
> else
> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>
> - return 0;
> -err:
> - return rc;
> + return;

Unnecessary "return".

> }
> -device_initcall(ghes_init);
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index 07f604832fd6..1dcd71df2cd5 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -30,6 +30,7 @@
> #include <linux/acpi_viot.h>
> #include <linux/pci.h>
> #include <acpi/apei.h>
> +#include <linux/arm_sdei.h>

This "arm" looks a little out of place in this supposedly arch-generic
code. Not really a new thing with this patch, since this #include
already appears in drivers/acpi/apei/ghes.c. Maybe it's unavoidable.

> #include <linux/suspend.h>
> #include <linux/prmt.h>
>
> @@ -1331,6 +1332,9 @@ static int __init acpi_init(void)
>
> pci_mmcfg_late_init();
> acpi_iort_init();
> + acpi_hest_init();
> + sdei_init();
> + ghes_init();
> acpi_scan_init();
> acpi_ec_init();
> acpi_debugfs_init();
> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> index b76db99cced3..6f9e75d14808 100644
> --- a/drivers/acpi/pci_root.c
> +++ b/drivers/acpi/pci_root.c
> @@ -22,8 +22,6 @@
> #include <linux/slab.h>
> #include <linux/dmi.h>
> #include <linux/platform_data/x86/apple.h>
> -#include <acpi/apei.h> /* for acpi_hest_init() */
> -
> #include "internal.h"
>
> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>
> void __init acpi_pci_root_init(void)
> {
> - acpi_hest_init();
> if (acpi_pci_disabled)
> return;
>
> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> index a7e762c352f9..1e1a51510e83 100644
> --- a/drivers/firmware/arm_sdei.c
> +++ b/drivers/firmware/arm_sdei.c
> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> return true;
> }
>
> -static int __init sdei_init(void)
> +void __init sdei_init(void)
> {
> struct platform_device *pdev;
> int ret;
>
> ret = platform_driver_register(&sdei_driver);
> if (ret || !sdei_present_acpi())
> - return ret;
> + return;
>
> pdev = platform_device_register_simple(sdei_driver.driver.name,
> 0, NULL, 0);
> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> pr_info("Failed to register ACPI:SDEI platform device %d\n",
> ret);
> }
> -
> - return ret;
> }
>
> -/*
> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> - * by device_initcall(). We want to be called in the middle.
> - */
> -subsys_initcall_sync(sdei_init);
> -
> int sdei_event_handler(struct pt_regs *regs,
> struct sdei_registered_event *arg)
> {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index ece0a8af2bae..4e60dd73c3bb 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -27,14 +27,16 @@ extern int hest_disable;
> extern int erst_disable;
> #ifdef CONFIG_ACPI_APEI_GHES
> extern bool ghes_disable;
> +void __init ghes_init(void);
> #else
> #define ghes_disable 1
> +static inline void ghes_init(void) { }
> #endif
>
> #ifdef CONFIG_ACPI_APEI
> void __init acpi_hest_init(void);
> #else
> -static inline void acpi_hest_init(void) { return; }
> +static inline void acpi_hest_init(void) { }
> #endif
>
> int erst_write(const struct cper_record_header *record);
> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> index 0a241c5c911d..14dc461b0e82 100644
> --- a/include/linux/arm_sdei.h
> +++ b/include/linux/arm_sdei.h
> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> int sdei_mask_local_cpu(void);
> int sdei_unmask_local_cpu(void);
> +void __init sdei_init(void);
> #else
> static inline int sdei_mask_local_cpu(void) { return 0; }
> static inline int sdei_unmask_local_cpu(void) { return 0; }
> +static inline void sdei_init(void) { }
> #endif /* CONFIG_ARM_SDE_INTERFACE */
>
>
> --
> 2.20.1.12.g72788fdb
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

2022-01-22 00:32:20

by Shuai Xue

[permalink] [raw]
Subject: Re: [PATCH v6] ACPI: explicit init HEST, SDEI and GHES in apci_init

Hi, Bjorn,

Thank you for your comments.

在 2022/1/21 AM12:22, Bjorn Helgaas 写道:
> On Thu, Jan 20, 2022 at 01:05:22PM +0800, Shuai Xue wrote:
>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>> the estatus memory pool. On the other hand, ghes_init() relies on
>> sdei_init() to detect the SDEI version and (un)register events. The
>> dependencies are as follows:
>>
>> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
>> ghes_init() => sdei_init()
>>
>> HEST is not PCI-specific and initcall ordering is implicit and not
>> well-defined within a level.
>>
>> Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
>> convert ghes_init() and sdei_init() from initcalls to explicit calls in the
>> following order:
>>
>> acpi_hest_init()
>> sdei_init()
>> ghes_init()
>>
>> Suggested-by: Bjorn Helgaas <[email protected]>
>
> I didn't suggest the approach; I just reviewed the patch and the
> commit log and proposed moving it out of acpi_pci_root_init(). That
> doesn't need to be acknowledged.

I will delete it in next version :)

>> Signed-off-by: Shuai Xue <[email protected]>
>
> Reviewed-by: Bjorn Helgaas <[email protected]>

Will add this in next version.


> Minor extra "return" below.
>> ---
>> drivers/acpi/apei/ghes.c | 17 +++++++----------
>> drivers/acpi/bus.c | 4 ++++
>> drivers/acpi/pci_root.c | 3 ---
>> drivers/firmware/arm_sdei.c | 13 ++-----------
>> include/acpi/apei.h | 4 +++-
>> include/linux/arm_sdei.h | 2 ++
>> 6 files changed, 18 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 0c5c9acc6254..bed4f10cfcb8 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -1457,33 +1457,33 @@ static struct platform_driver ghes_platform_driver = {
>> .remove = ghes_remove,
>> };
>>
>> -static int __init ghes_init(void)
>> +void __init ghes_init(void)
>> {
>> int rc;
>>
>> if (acpi_disabled)
>> - return -ENODEV;
>> + return;
>>
>> switch (hest_disable) {
>> case HEST_NOT_FOUND:
>> - return -ENODEV;
>> + return;
>> case HEST_DISABLED:
>> pr_info(GHES_PFX "HEST is not enabled!\n");
>> - return -EINVAL;
>> + return;
>> default:
>> break;
>> }
>>
>> if (ghes_disable) {
>> pr_info(GHES_PFX "GHES is not enabled!\n");
>> - return -EINVAL;
>> + return;
>> }
>>
>> ghes_nmi_init_cxt();
>>
>> rc = platform_driver_register(&ghes_platform_driver);
>> if (rc)
>> - goto err;
>> + return;
>>
>> rc = apei_osc_setup();
>> if (rc == 0 && osc_sb_apei_support_acked)
>> @@ -1495,8 +1495,5 @@ static int __init ghes_init(void)
>> else
>> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>>
>> - return 0;
>> -err:
>> - return rc;
>> + return;
>
> Unnecessary "return".

Will fix it in next version.

>
>> }
>> -device_initcall(ghes_init);
>> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
>> index 07f604832fd6..1dcd71df2cd5 100644
>> --- a/drivers/acpi/bus.c
>> +++ b/drivers/acpi/bus.c
>> @@ -30,6 +30,7 @@
>> #include <linux/acpi_viot.h>
>> #include <linux/pci.h>
>> #include <acpi/apei.h>
>> +#include <linux/arm_sdei.h>
>
> This "arm" looks a little out of place in this supposedly arch-generic
> code. Not really a new thing with this patch, since this #include
> already appears in drivers/acpi/apei/ghes.c. Maybe it's unavoidable.

Yep, should we move sdei_init() into the beginning of ghes_init()?

> sdei_init() and ghes_init() stick out here because they're the only
> functions without an "acpi_" prefix. Maybe a separate patch to rename
> them?

Sorry, I forgot to send a patch to rename them. I have a question about this.

> Software Delegated Exception Interface (|SDEI|) is an Arm specification for
> Non-secure world to register handlers with firmware to receive notifications
> about system events.
> LINK: https://github.com/ARM-software/arm-trusted-firmware/blob/master/docs/components/sdei.rst

I think SDEI is not a ACPI Specification but Arm specification so we should
not rename sdei_init() with an "acpi_" prefix. If we move sdei_init() into
ghes_init(), and rename ghes_init() to acpi_ghes_init(), then all looks
fine? What's your opinion, Bjorn?

Best Regards.

Shuai

>
>> #include <linux/suspend.h>
>> #include <linux/prmt.h>
>>
>> @@ -1331,6 +1332,9 @@ static int __init acpi_init(void)
>>
>> pci_mmcfg_late_init();
>> acpi_iort_init();
>> + acpi_hest_init();
>> + sdei_init();
>> + ghes_init();
>> acpi_scan_init();
>> acpi_ec_init();
>> acpi_debugfs_init();
>> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
>> index b76db99cced3..6f9e75d14808 100644
>> --- a/drivers/acpi/pci_root.c
>> +++ b/drivers/acpi/pci_root.c
>> @@ -22,8 +22,6 @@
>> #include <linux/slab.h>
>> #include <linux/dmi.h>
>> #include <linux/platform_data/x86/apple.h>
>> -#include <acpi/apei.h> /* for acpi_hest_init() */
>> -
>> #include "internal.h"
>>
>> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
>> @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>>
>> void __init acpi_pci_root_init(void)
>> {
>> - acpi_hest_init();
>> if (acpi_pci_disabled)
>> return;
>>
>> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
>> index a7e762c352f9..1e1a51510e83 100644
>> --- a/drivers/firmware/arm_sdei.c
>> +++ b/drivers/firmware/arm_sdei.c
>> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
>> return true;
>> }
>>
>> -static int __init sdei_init(void)
>> +void __init sdei_init(void)
>> {
>> struct platform_device *pdev;
>> int ret;
>>
>> ret = platform_driver_register(&sdei_driver);
>> if (ret || !sdei_present_acpi())
>> - return ret;
>> + return;
>>
>> pdev = platform_device_register_simple(sdei_driver.driver.name,
>> 0, NULL, 0);
>> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
>> pr_info("Failed to register ACPI:SDEI platform device %d\n",
>> ret);
>> }
>> -
>> - return ret;
>> }
>>
>> -/*
>> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
>> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
>> - * by device_initcall(). We want to be called in the middle.
>> - */
>> -subsys_initcall_sync(sdei_init);
>> -
>> int sdei_event_handler(struct pt_regs *regs,
>> struct sdei_registered_event *arg)
>> {
>> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
>> index ece0a8af2bae..4e60dd73c3bb 100644
>> --- a/include/acpi/apei.h
>> +++ b/include/acpi/apei.h
>> @@ -27,14 +27,16 @@ extern int hest_disable;
>> extern int erst_disable;
>> #ifdef CONFIG_ACPI_APEI_GHES
>> extern bool ghes_disable;
>> +void __init ghes_init(void);
>> #else
>> #define ghes_disable 1
>> +static inline void ghes_init(void) { }
>> #endif
>>
>> #ifdef CONFIG_ACPI_APEI
>> void __init acpi_hest_init(void);
>> #else
>> -static inline void acpi_hest_init(void) { return; }
>> +static inline void acpi_hest_init(void) { }
>> #endif
>>
>> int erst_write(const struct cper_record_header *record);
>> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
>> index 0a241c5c911d..14dc461b0e82 100644
>> --- a/include/linux/arm_sdei.h
>> +++ b/include/linux/arm_sdei.h
>> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
>> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
>> int sdei_mask_local_cpu(void);
>> int sdei_unmask_local_cpu(void);
>> +void __init sdei_init(void);
>> #else
>> static inline int sdei_mask_local_cpu(void) { return 0; }
>> static inline int sdei_unmask_local_cpu(void) { return 0; }
>> +static inline void sdei_init(void) { }
>> #endif /* CONFIG_ARM_SDE_INTERFACE */
>>
>>
>> --
>> 2.20.1.12.g72788fdb
>>
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> [email protected]
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

2022-01-22 03:55:10

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH v6] ACPI: explicit init HEST, SDEI and GHES in apci_init

On Fri, Jan 21, 2022 at 11:43:25AM +0800, Shuai Xue wrote:
> 在 2022/1/21 AM12:22, Bjorn Helgaas 写道:
> > On Thu, Jan 20, 2022 at 01:05:22PM +0800, Shuai Xue wrote:
> >> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> >> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> >> the estatus memory pool. On the other hand, ghes_init() relies on
> >> sdei_init() to detect the SDEI version and (un)register events. The
> >> dependencies are as follows:
> >>
> >> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> >> ghes_init() => sdei_init()
> >>
> >> HEST is not PCI-specific and initcall ordering is implicit and not
> >> well-defined within a level.
> >>
> >> Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
> >> convert ghes_init() and sdei_init() from initcalls to explicit calls in the
> >> following order:
> >>
> >> acpi_hest_init()
> >> sdei_init()
> >> ghes_init()

> >> --- a/drivers/acpi/bus.c
> >> +++ b/drivers/acpi/bus.c
> >> @@ -30,6 +30,7 @@
> >> #include <linux/acpi_viot.h>
> >> #include <linux/pci.h>
> >> #include <acpi/apei.h>
> >> +#include <linux/arm_sdei.h>
> >
> > This "arm" looks a little out of place in this supposedly arch-generic
> > code. Not really a new thing with this patch, since this #include
> > already appears in drivers/acpi/apei/ghes.c. Maybe it's unavoidable.
>
> Yep, should we move sdei_init() into the beginning of ghes_init()?
> ...

> > Software Delegated Exception Interface (|SDEI|) is an Arm specification for
> > Non-secure world to register handlers with firmware to receive notifications
> > about system events.
> > LINK: https://github.com/ARM-software/arm-trusted-firmware/blob/master/docs/components/sdei.rst
>
> I think SDEI is not a ACPI Specification but Arm specification so we should
> not rename sdei_init() with an "acpi_" prefix. If we move sdei_init() into
> ghes_init(), and rename ghes_init() to acpi_ghes_init(), then all looks
> fine? What's your opinion, Bjorn?

Makes sense to me, especially since drivers/acpi/apei/ghes.c already
includes linux/arm_sdei.h. This is Rafael's area.

Bjorn

2022-01-23 00:17:26

by Shuai Xue

[permalink] [raw]
Subject: [PATCH v7 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and (un)register events. The
dependencies are as follows:

ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
ghes_init() => sdei_init()

HEST is not PCI-specific and initcall ordering is implicit and not
well-defined within a level.

Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
convert ghes_init() and sdei_init() from initcalls to explicit calls in the
following order:

acpi_hest_init()
ghes_init()
sdei_init()

Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 19 ++++++++-----------
drivers/acpi/bus.c | 2 ++
drivers/acpi/pci_root.c | 3 ---
drivers/firmware/Kconfig | 1 +
drivers/firmware/arm_sdei.c | 13 ++-----------
include/acpi/apei.h | 4 +++-
include/linux/arm_sdei.h | 2 ++
7 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254..aadc0a972f18 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-static int __init ghes_init(void)
+void __init ghes_init(void)
{
int rc;

+ sdei_init();
+
if (acpi_disabled)
- return -ENODEV;
+ return;

switch (hest_disable) {
case HEST_NOT_FOUND:
- return -ENODEV;
+ return;
case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n");
- return -EINVAL;
+ return;
default:
break;
}

if (ghes_disable) {
pr_info(GHES_PFX "GHES is not enabled!\n");
- return -EINVAL;
+ return;
}

ghes_nmi_init_cxt();

rc = platform_driver_register(&ghes_platform_driver);
if (rc)
- goto err;
+ return;

rc = apei_osc_setup();
if (rc == 0 && osc_sb_apei_support_acked)
@@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
-
- return 0;
-err:
- return rc;
}
-device_initcall(ghes_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6..3f403db20f69 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1331,6 +1331,8 @@ static int __init acpi_init(void)

pci_mmcfg_late_init();
acpi_iort_init();
+ acpi_hest_init();
+ ghes_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index b76db99cced3..6f9e75d14808 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -22,8 +22,6 @@
#include <linux/slab.h>
#include <linux/dmi.h>
#include <linux/platform_data/x86/apple.h>
-#include <acpi/apei.h> /* for acpi_hest_init() */
-
#include "internal.h"

#define ACPI_PCI_ROOT_CLASS "pci_bridge"
@@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,

void __init acpi_pci_root_init(void)
{
- acpi_hest_init();
if (acpi_pci_disabled)
return;

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 75cb91055c17..ad114d9cdf8e 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
config ARM_SDE_INTERFACE
bool "ARM Software Delegated Exception Interface (SDEI)"
depends on ARM64
+ select ACPI_APEI_GHES
help
The Software Delegated Exception Interface (SDEI) is an ARM
standard for registering callbacks from the platform firmware
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
return true;
}

-static int __init sdei_init(void)
+void __init sdei_init(void)
{
struct platform_device *pdev;
int ret;

ret = platform_driver_register(&sdei_driver);
if (ret || !sdei_present_acpi())
- return ret;
+ return;

pdev = platform_device_register_simple(sdei_driver.driver.name,
0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
pr_info("Failed to register ACPI:SDEI platform device %d\n",
ret);
}
-
- return ret;
}

-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..4e60dd73c3bb 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,14 +27,16 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
+void __init ghes_init(void);
#else
#define ghes_disable 1
+static inline void ghes_init(void) { }
#endif

#ifdef CONFIG_ACPI_APEI
void __init acpi_hest_init(void);
#else
-static inline void acpi_hest_init(void) { return; }
+static inline void acpi_hest_init(void) { }
#endif

int erst_write(const struct cper_record_header *record);
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..14dc461b0e82 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
/* For use by arch code when CPU hotplug notifiers are not appropriate. */
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { }
#endif /* CONFIG_ARM_SDE_INTERFACE */


--
2.20.1.12.g72788fdb

2022-01-23 00:17:28

by Shuai Xue

[permalink] [raw]
Subject: [PATCH v7 2/2] ACPI: APEI: rename ghes_init with an "acpi_" prefix

Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 2 +-
drivers/acpi/bus.c | 2 +-
include/acpi/apei.h | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index aadc0a972f18..d91ad378c00d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,7 +1457,7 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-void __init ghes_init(void)
+void __init acpi_ghes_init(void)
{
int rc;

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 3f403db20f69..cd374210fb9f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1332,7 +1332,7 @@ static int __init acpi_init(void)
pci_mmcfg_late_init();
acpi_iort_init();
acpi_hest_init();
- ghes_init();
+ acpi_ghes_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index 4e60dd73c3bb..afaca3a075e8 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,10 +27,10 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
-void __init ghes_init(void);
+void __init acpi_ghes_init(void);
#else
#define ghes_disable 1
-static inline void ghes_init(void) { }
+static inline void acpi_ghes_init(void) { }
#endif

#ifdef CONFIG_ACPI_APEI
--
2.20.1.12.g72788fdb

2022-02-10 12:17:26

by Shuai Xue

[permalink] [raw]
Subject: Re: [PATCH v7 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

在 2022/1/22 PM1:26, Shuai Xue 写道:
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and (un)register events. The
> dependencies are as follows:
>
> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> ghes_init() => sdei_init()
>
> HEST is not PCI-specific and initcall ordering is implicit and not
> well-defined within a level.
>
> Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
> convert ghes_init() and sdei_init() from initcalls to explicit calls in the
> following order:
>
> acpi_hest_init()
> ghes_init()
> sdei_init()
>
> Signed-off-by: Shuai Xue <[email protected]>
> ---
> drivers/acpi/apei/ghes.c | 19 ++++++++-----------
> drivers/acpi/bus.c | 2 ++
> drivers/acpi/pci_root.c | 3 ---
> drivers/firmware/Kconfig | 1 +
> drivers/firmware/arm_sdei.c | 13 ++-----------
> include/acpi/apei.h | 4 +++-
> include/linux/arm_sdei.h | 2 ++
> 7 files changed, 18 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c5c9acc6254..aadc0a972f18 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
> .remove = ghes_remove,
> };
>
> -static int __init ghes_init(void)
> +void __init ghes_init(void)
> {
> int rc;
>
> + sdei_init();
> +
> if (acpi_disabled)
> - return -ENODEV;
> + return;
>
> switch (hest_disable) {
> case HEST_NOT_FOUND:
> - return -ENODEV;
> + return;
> case HEST_DISABLED:
> pr_info(GHES_PFX "HEST is not enabled!\n");
> - return -EINVAL;
> + return;
> default:
> break;
> }
>
> if (ghes_disable) {
> pr_info(GHES_PFX "GHES is not enabled!\n");
> - return -EINVAL;
> + return;
> }
>
> ghes_nmi_init_cxt();
>
> rc = platform_driver_register(&ghes_platform_driver);
> if (rc)
> - goto err;
> + return;
>
> rc = apei_osc_setup();
> if (rc == 0 && osc_sb_apei_support_acked)
> @@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
> pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
> else
> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
> -
> - return 0;
> -err:
> - return rc;
> }
> -device_initcall(ghes_init);
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index 07f604832fd6..3f403db20f69 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -1331,6 +1331,8 @@ static int __init acpi_init(void)
>
> pci_mmcfg_late_init();
> acpi_iort_init();
> + acpi_hest_init();
> + ghes_init();
> acpi_scan_init();
> acpi_ec_init();
> acpi_debugfs_init();
> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> index b76db99cced3..6f9e75d14808 100644
> --- a/drivers/acpi/pci_root.c
> +++ b/drivers/acpi/pci_root.c
> @@ -22,8 +22,6 @@
> #include <linux/slab.h>
> #include <linux/dmi.h>
> #include <linux/platform_data/x86/apple.h>
> -#include <acpi/apei.h> /* for acpi_hest_init() */
> -
> #include "internal.h"
>
> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>
> void __init acpi_pci_root_init(void)
> {
> - acpi_hest_init();
> if (acpi_pci_disabled)
> return;
>
> diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> index 75cb91055c17..ad114d9cdf8e 100644
> --- a/drivers/firmware/Kconfig
> +++ b/drivers/firmware/Kconfig
> @@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
> config ARM_SDE_INTERFACE
> bool "ARM Software Delegated Exception Interface (SDEI)"
> depends on ARM64
> + select ACPI_APEI_GHES
> help
> The Software Delegated Exception Interface (SDEI) is an ARM
> standard for registering callbacks from the platform firmware
> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> index a7e762c352f9..1e1a51510e83 100644
> --- a/drivers/firmware/arm_sdei.c
> +++ b/drivers/firmware/arm_sdei.c
> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> return true;
> }
>
> -static int __init sdei_init(void)
> +void __init sdei_init(void)
> {
> struct platform_device *pdev;
> int ret;
>
> ret = platform_driver_register(&sdei_driver);
> if (ret || !sdei_present_acpi())
> - return ret;
> + return;
>
> pdev = platform_device_register_simple(sdei_driver.driver.name,
> 0, NULL, 0);
> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> pr_info("Failed to register ACPI:SDEI platform device %d\n",
> ret);
> }
> -
> - return ret;
> }
>
> -/*
> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> - * by device_initcall(). We want to be called in the middle.
> - */
> -subsys_initcall_sync(sdei_init);
> -
> int sdei_event_handler(struct pt_regs *regs,
> struct sdei_registered_event *arg)
> {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index ece0a8af2bae..4e60dd73c3bb 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -27,14 +27,16 @@ extern int hest_disable;
> extern int erst_disable;
> #ifdef CONFIG_ACPI_APEI_GHES
> extern bool ghes_disable;
> +void __init ghes_init(void);
> #else
> #define ghes_disable 1
> +static inline void ghes_init(void) { }
> #endif
>
> #ifdef CONFIG_ACPI_APEI
> void __init acpi_hest_init(void);
> #else
> -static inline void acpi_hest_init(void) { return; }
> +static inline void acpi_hest_init(void) { }
> #endif
>
> int erst_write(const struct cper_record_header *record);
> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> index 0a241c5c911d..14dc461b0e82 100644
> --- a/include/linux/arm_sdei.h
> +++ b/include/linux/arm_sdei.h
> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> int sdei_mask_local_cpu(void);
> int sdei_unmask_local_cpu(void);
> +void __init sdei_init(void);
> #else
> static inline int sdei_mask_local_cpu(void) { return 0; }
> static inline int sdei_unmask_local_cpu(void) { return 0; }
> +static inline void sdei_init(void) { }
> #endif /* CONFIG_ARM_SDE_INTERFACE */
>
>

Hi folks,

I am wondering if you have any comments on this series of patches?

Thank you.

Best Regards,
Shuai

2022-02-14 21:33:42

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v7 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

On Thu, Feb 10, 2022 at 10:40 AM Shuai Xue <[email protected]> wrote:
>
> 在 2022/1/22 PM1:26, Shuai Xue 写道:
> > From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> > memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> > the estatus memory pool. On the other hand, ghes_init() relies on
> > sdei_init() to detect the SDEI version and (un)register events. The
> > dependencies are as follows:
> >
> > ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> > ghes_init() => sdei_init()
> >
> > HEST is not PCI-specific and initcall ordering is implicit and not
> > well-defined within a level.
> >
> > Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
> > convert ghes_init() and sdei_init() from initcalls to explicit calls in the
> > following order:
> >
> > acpi_hest_init()
> > ghes_init()
> > sdei_init()
> >
> > Signed-off-by: Shuai Xue <[email protected]>
> > ---
> > drivers/acpi/apei/ghes.c | 19 ++++++++-----------
> > drivers/acpi/bus.c | 2 ++
> > drivers/acpi/pci_root.c | 3 ---
> > drivers/firmware/Kconfig | 1 +
> > drivers/firmware/arm_sdei.c | 13 ++-----------
> > include/acpi/apei.h | 4 +++-
> > include/linux/arm_sdei.h | 2 ++
> > 7 files changed, 18 insertions(+), 26 deletions(-)
> >
> > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> > index 0c5c9acc6254..aadc0a972f18 100644
> > --- a/drivers/acpi/apei/ghes.c
> > +++ b/drivers/acpi/apei/ghes.c
> > @@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
> > .remove = ghes_remove,
> > };
> >
> > -static int __init ghes_init(void)
> > +void __init ghes_init(void)
> > {
> > int rc;
> >
> > + sdei_init();
> > +
> > if (acpi_disabled)
> > - return -ENODEV;
> > + return;
> >
> > switch (hest_disable) {
> > case HEST_NOT_FOUND:
> > - return -ENODEV;
> > + return;
> > case HEST_DISABLED:
> > pr_info(GHES_PFX "HEST is not enabled!\n");
> > - return -EINVAL;
> > + return;
> > default:
> > break;
> > }
> >
> > if (ghes_disable) {
> > pr_info(GHES_PFX "GHES is not enabled!\n");
> > - return -EINVAL;
> > + return;
> > }
> >
> > ghes_nmi_init_cxt();
> >
> > rc = platform_driver_register(&ghes_platform_driver);
> > if (rc)
> > - goto err;
> > + return;
> >
> > rc = apei_osc_setup();
> > if (rc == 0 && osc_sb_apei_support_acked)
> > @@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
> > pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
> > else
> > pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
> > -
> > - return 0;
> > -err:
> > - return rc;
> > }
> > -device_initcall(ghes_init);
> > diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> > index 07f604832fd6..3f403db20f69 100644
> > --- a/drivers/acpi/bus.c
> > +++ b/drivers/acpi/bus.c
> > @@ -1331,6 +1331,8 @@ static int __init acpi_init(void)
> >
> > pci_mmcfg_late_init();
> > acpi_iort_init();
> > + acpi_hest_init();
> > + ghes_init();
> > acpi_scan_init();
> > acpi_ec_init();
> > acpi_debugfs_init();
> > diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> > index b76db99cced3..6f9e75d14808 100644
> > --- a/drivers/acpi/pci_root.c
> > +++ b/drivers/acpi/pci_root.c
> > @@ -22,8 +22,6 @@
> > #include <linux/slab.h>
> > #include <linux/dmi.h>
> > #include <linux/platform_data/x86/apple.h>
> > -#include <acpi/apei.h> /* for acpi_hest_init() */
> > -
> > #include "internal.h"
> >
> > #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> > @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
> >
> > void __init acpi_pci_root_init(void)
> > {
> > - acpi_hest_init();
> > if (acpi_pci_disabled)
> > return;
> >
> > diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> > index 75cb91055c17..ad114d9cdf8e 100644
> > --- a/drivers/firmware/Kconfig
> > +++ b/drivers/firmware/Kconfig
> > @@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
> > config ARM_SDE_INTERFACE
> > bool "ARM Software Delegated Exception Interface (SDEI)"
> > depends on ARM64
> > + select ACPI_APEI_GHES
> > help
> > The Software Delegated Exception Interface (SDEI) is an ARM
> > standard for registering callbacks from the platform firmware
> > diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> > index a7e762c352f9..1e1a51510e83 100644
> > --- a/drivers/firmware/arm_sdei.c
> > +++ b/drivers/firmware/arm_sdei.c
> > @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> > return true;
> > }
> >
> > -static int __init sdei_init(void)
> > +void __init sdei_init(void)
> > {
> > struct platform_device *pdev;
> > int ret;
> >
> > ret = platform_driver_register(&sdei_driver);
> > if (ret || !sdei_present_acpi())
> > - return ret;
> > + return;
> >
> > pdev = platform_device_register_simple(sdei_driver.driver.name,
> > 0, NULL, 0);
> > @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> > pr_info("Failed to register ACPI:SDEI platform device %d\n",
> > ret);
> > }
> > -
> > - return ret;
> > }
> >
> > -/*
> > - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> > - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> > - * by device_initcall(). We want to be called in the middle.
> > - */
> > -subsys_initcall_sync(sdei_init);
> > -
> > int sdei_event_handler(struct pt_regs *regs,
> > struct sdei_registered_event *arg)
> > {
> > diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> > index ece0a8af2bae..4e60dd73c3bb 100644
> > --- a/include/acpi/apei.h
> > +++ b/include/acpi/apei.h
> > @@ -27,14 +27,16 @@ extern int hest_disable;
> > extern int erst_disable;
> > #ifdef CONFIG_ACPI_APEI_GHES
> > extern bool ghes_disable;
> > +void __init ghes_init(void);
> > #else
> > #define ghes_disable 1
> > +static inline void ghes_init(void) { }
> > #endif
> >
> > #ifdef CONFIG_ACPI_APEI
> > void __init acpi_hest_init(void);
> > #else
> > -static inline void acpi_hest_init(void) { return; }
> > +static inline void acpi_hest_init(void) { }
> > #endif
> >
> > int erst_write(const struct cper_record_header *record);
> > diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> > index 0a241c5c911d..14dc461b0e82 100644
> > --- a/include/linux/arm_sdei.h
> > +++ b/include/linux/arm_sdei.h
> > @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> > /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> > int sdei_mask_local_cpu(void);
> > int sdei_unmask_local_cpu(void);
> > +void __init sdei_init(void);
> > #else
> > static inline int sdei_mask_local_cpu(void) { return 0; }
> > static inline int sdei_unmask_local_cpu(void) { return 0; }
> > +static inline void sdei_init(void) { }
> > #endif /* CONFIG_ARM_SDE_INTERFACE */
> >
> >
>
> Hi folks,
>
> I am wondering if you have any comments on this series of patches?

I've applied both patches as 5.18 material, thanks!

2022-02-22 04:10:03

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v7 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

On Mon, Feb 21, 2022 at 7:18 PM Nathan Chancellor <[email protected]> wrote:
>
> Hi Shuai,
>
> On Sat, Jan 22, 2022 at 01:26:17PM +0800, Shuai Xue wrote:
> > From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> > memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> > the estatus memory pool. On the other hand, ghes_init() relies on
> > sdei_init() to detect the SDEI version and (un)register events. The
> > dependencies are as follows:
> >
> > ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> > ghes_init() => sdei_init()
> >
> > HEST is not PCI-specific and initcall ordering is implicit and not
> > well-defined within a level.
> >
> > Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
> > convert ghes_init() and sdei_init() from initcalls to explicit calls in the
> > following order:
> >
> > acpi_hest_init()
> > ghes_init()
> > sdei_init()
> >
> > Signed-off-by: Shuai Xue <[email protected]>
> > ---
> > drivers/acpi/apei/ghes.c | 19 ++++++++-----------
> > drivers/acpi/bus.c | 2 ++
> > drivers/acpi/pci_root.c | 3 ---
> > drivers/firmware/Kconfig | 1 +
> > drivers/firmware/arm_sdei.c | 13 ++-----------
> > include/acpi/apei.h | 4 +++-
> > include/linux/arm_sdei.h | 2 ++
> > 7 files changed, 18 insertions(+), 26 deletions(-)
> >
> > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> > index 0c5c9acc6254..aadc0a972f18 100644
> > --- a/drivers/acpi/apei/ghes.c
> > +++ b/drivers/acpi/apei/ghes.c
> > @@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
> > .remove = ghes_remove,
> > };
> >
> > -static int __init ghes_init(void)
> > +void __init ghes_init(void)
> > {
> > int rc;
> >
> > + sdei_init();
> > +
> > if (acpi_disabled)
> > - return -ENODEV;
> > + return;
> >
> > switch (hest_disable) {
> > case HEST_NOT_FOUND:
> > - return -ENODEV;
> > + return;
> > case HEST_DISABLED:
> > pr_info(GHES_PFX "HEST is not enabled!\n");
> > - return -EINVAL;
> > + return;
> > default:
> > break;
> > }
> >
> > if (ghes_disable) {
> > pr_info(GHES_PFX "GHES is not enabled!\n");
> > - return -EINVAL;
> > + return;
> > }
> >
> > ghes_nmi_init_cxt();
> >
> > rc = platform_driver_register(&ghes_platform_driver);
> > if (rc)
> > - goto err;
> > + return;
> >
> > rc = apei_osc_setup();
> > if (rc == 0 && osc_sb_apei_support_acked)
> > @@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
> > pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
> > else
> > pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
> > -
> > - return 0;
> > -err:
> > - return rc;
> > }
> > -device_initcall(ghes_init);
> > diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> > index 07f604832fd6..3f403db20f69 100644
> > --- a/drivers/acpi/bus.c
> > +++ b/drivers/acpi/bus.c
> > @@ -1331,6 +1331,8 @@ static int __init acpi_init(void)
> >
> > pci_mmcfg_late_init();
> > acpi_iort_init();
> > + acpi_hest_init();
> > + ghes_init();
> > acpi_scan_init();
> > acpi_ec_init();
> > acpi_debugfs_init();
> > diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> > index b76db99cced3..6f9e75d14808 100644
> > --- a/drivers/acpi/pci_root.c
> > +++ b/drivers/acpi/pci_root.c
> > @@ -22,8 +22,6 @@
> > #include <linux/slab.h>
> > #include <linux/dmi.h>
> > #include <linux/platform_data/x86/apple.h>
> > -#include <acpi/apei.h> /* for acpi_hest_init() */
> > -
> > #include "internal.h"
> >
> > #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> > @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
> >
> > void __init acpi_pci_root_init(void)
> > {
> > - acpi_hest_init();
> > if (acpi_pci_disabled)
> > return;
> >
> > diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> > index 75cb91055c17..ad114d9cdf8e 100644
> > --- a/drivers/firmware/Kconfig
> > +++ b/drivers/firmware/Kconfig
> > @@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
> > config ARM_SDE_INTERFACE
> > bool "ARM Software Delegated Exception Interface (SDEI)"
> > depends on ARM64
> > + select ACPI_APEI_GHES
>
> As the kernel test robot pointed out [1], you cannot do this.
> CONFIG_ACPI_APEI_GHES is a user selectable symbol that has dependencies,
> which 'select' completely overrides, resulting in build failures when
> CONFIG_ACPI_APEI is not enabled.
>
> If CONFIG_ARM_SDE_INTERFACE truly requires CONFIG_ACPI_APEI_GHES, you
> should have "depends on ACPI_APEI_GHES".
>
> If CONFIG_ARM_SDE_INTERFACE soft depends on CONFIG_ACPI_APEI_GHES for
> functionality but can work without it, you could use
> "imply ACPI_APEI_GHES", which will enable CONFIG_ACPI_APEI_GHES if its
> dependencies are met.
>
> I noticed the same error with Alpine Linux's aarch64 configuration [2]
> if you wanted a quick configuration to test with.
>
> [1]: https://lore.kernel.org/r/[email protected]/
> [2]: https://git.alpinelinux.org/aports/plain/community/linux-edge/config-edge.aarch64

Note that I've dropped these patches from my linux-next branch now.

2022-02-22 04:26:20

by Nathan Chancellor

[permalink] [raw]
Subject: Re: [PATCH v7 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

Hi Shuai,

On Sat, Jan 22, 2022 at 01:26:17PM +0800, Shuai Xue wrote:
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and (un)register events. The
> dependencies are as follows:
>
> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> ghes_init() => sdei_init()
>
> HEST is not PCI-specific and initcall ordering is implicit and not
> well-defined within a level.
>
> Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
> convert ghes_init() and sdei_init() from initcalls to explicit calls in the
> following order:
>
> acpi_hest_init()
> ghes_init()
> sdei_init()
>
> Signed-off-by: Shuai Xue <[email protected]>
> ---
> drivers/acpi/apei/ghes.c | 19 ++++++++-----------
> drivers/acpi/bus.c | 2 ++
> drivers/acpi/pci_root.c | 3 ---
> drivers/firmware/Kconfig | 1 +
> drivers/firmware/arm_sdei.c | 13 ++-----------
> include/acpi/apei.h | 4 +++-
> include/linux/arm_sdei.h | 2 ++
> 7 files changed, 18 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c5c9acc6254..aadc0a972f18 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
> .remove = ghes_remove,
> };
>
> -static int __init ghes_init(void)
> +void __init ghes_init(void)
> {
> int rc;
>
> + sdei_init();
> +
> if (acpi_disabled)
> - return -ENODEV;
> + return;
>
> switch (hest_disable) {
> case HEST_NOT_FOUND:
> - return -ENODEV;
> + return;
> case HEST_DISABLED:
> pr_info(GHES_PFX "HEST is not enabled!\n");
> - return -EINVAL;
> + return;
> default:
> break;
> }
>
> if (ghes_disable) {
> pr_info(GHES_PFX "GHES is not enabled!\n");
> - return -EINVAL;
> + return;
> }
>
> ghes_nmi_init_cxt();
>
> rc = platform_driver_register(&ghes_platform_driver);
> if (rc)
> - goto err;
> + return;
>
> rc = apei_osc_setup();
> if (rc == 0 && osc_sb_apei_support_acked)
> @@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
> pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
> else
> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
> -
> - return 0;
> -err:
> - return rc;
> }
> -device_initcall(ghes_init);
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index 07f604832fd6..3f403db20f69 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -1331,6 +1331,8 @@ static int __init acpi_init(void)
>
> pci_mmcfg_late_init();
> acpi_iort_init();
> + acpi_hest_init();
> + ghes_init();
> acpi_scan_init();
> acpi_ec_init();
> acpi_debugfs_init();
> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> index b76db99cced3..6f9e75d14808 100644
> --- a/drivers/acpi/pci_root.c
> +++ b/drivers/acpi/pci_root.c
> @@ -22,8 +22,6 @@
> #include <linux/slab.h>
> #include <linux/dmi.h>
> #include <linux/platform_data/x86/apple.h>
> -#include <acpi/apei.h> /* for acpi_hest_init() */
> -
> #include "internal.h"
>
> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>
> void __init acpi_pci_root_init(void)
> {
> - acpi_hest_init();
> if (acpi_pci_disabled)
> return;
>
> diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> index 75cb91055c17..ad114d9cdf8e 100644
> --- a/drivers/firmware/Kconfig
> +++ b/drivers/firmware/Kconfig
> @@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
> config ARM_SDE_INTERFACE
> bool "ARM Software Delegated Exception Interface (SDEI)"
> depends on ARM64
> + select ACPI_APEI_GHES

As the kernel test robot pointed out [1], you cannot do this.
CONFIG_ACPI_APEI_GHES is a user selectable symbol that has dependencies,
which 'select' completely overrides, resulting in build failures when
CONFIG_ACPI_APEI is not enabled.

If CONFIG_ARM_SDE_INTERFACE truly requires CONFIG_ACPI_APEI_GHES, you
should have "depends on ACPI_APEI_GHES".

If CONFIG_ARM_SDE_INTERFACE soft depends on CONFIG_ACPI_APEI_GHES for
functionality but can work without it, you could use
"imply ACPI_APEI_GHES", which will enable CONFIG_ACPI_APEI_GHES if its
dependencies are met.

I noticed the same error with Alpine Linux's aarch64 configuration [2]
if you wanted a quick configuration to test with.

[1]: https://lore.kernel.org/r/[email protected]/
[2]: https://git.alpinelinux.org/aports/plain/community/linux-edge/config-edge.aarch64

Cheers,
Nathan

> help
> The Software Delegated Exception Interface (SDEI) is an ARM
> standard for registering callbacks from the platform firmware
> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> index a7e762c352f9..1e1a51510e83 100644
> --- a/drivers/firmware/arm_sdei.c
> +++ b/drivers/firmware/arm_sdei.c
> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> return true;
> }
>
> -static int __init sdei_init(void)
> +void __init sdei_init(void)
> {
> struct platform_device *pdev;
> int ret;
>
> ret = platform_driver_register(&sdei_driver);
> if (ret || !sdei_present_acpi())
> - return ret;
> + return;
>
> pdev = platform_device_register_simple(sdei_driver.driver.name,
> 0, NULL, 0);
> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> pr_info("Failed to register ACPI:SDEI platform device %d\n",
> ret);
> }
> -
> - return ret;
> }
>
> -/*
> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> - * by device_initcall(). We want to be called in the middle.
> - */
> -subsys_initcall_sync(sdei_init);
> -
> int sdei_event_handler(struct pt_regs *regs,
> struct sdei_registered_event *arg)
> {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index ece0a8af2bae..4e60dd73c3bb 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -27,14 +27,16 @@ extern int hest_disable;
> extern int erst_disable;
> #ifdef CONFIG_ACPI_APEI_GHES
> extern bool ghes_disable;
> +void __init ghes_init(void);
> #else
> #define ghes_disable 1
> +static inline void ghes_init(void) { }
> #endif
>
> #ifdef CONFIG_ACPI_APEI
> void __init acpi_hest_init(void);
> #else
> -static inline void acpi_hest_init(void) { return; }
> +static inline void acpi_hest_init(void) { }
> #endif
>
> int erst_write(const struct cper_record_header *record);
> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> index 0a241c5c911d..14dc461b0e82 100644
> --- a/include/linux/arm_sdei.h
> +++ b/include/linux/arm_sdei.h
> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> int sdei_mask_local_cpu(void);
> int sdei_unmask_local_cpu(void);
> +void __init sdei_init(void);
> #else
> static inline int sdei_mask_local_cpu(void) { return 0; }
> static inline int sdei_unmask_local_cpu(void) { return 0; }
> +static inline void sdei_init(void) { }
> #endif /* CONFIG_ARM_SDE_INTERFACE */
>
>
> --
> 2.20.1.12.g72788fdb
>
>

2022-02-22 06:18:20

by Shuai Xue

[permalink] [raw]
Subject: Re: [PATCH v7 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

Hi, Nathan and Rafael,

在 2022/2/22 AM2:18, Nathan Chancellor 写道:
> Hi Shuai,
>
> On Sat, Jan 22, 2022 at 01:26:17PM +0800, Shuai Xue wrote:
>> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
>> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
>> the estatus memory pool. On the other hand, ghes_init() relies on
>> sdei_init() to detect the SDEI version and (un)register events. The
>> dependencies are as follows:
>>
>> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
>> ghes_init() => sdei_init()
>>
>> HEST is not PCI-specific and initcall ordering is implicit and not
>> well-defined within a level.
>>
>> Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
>> convert ghes_init() and sdei_init() from initcalls to explicit calls in the
>> following order:
>>
>> acpi_hest_init()
>> ghes_init()
>> sdei_init()
>>
>> Signed-off-by: Shuai Xue <[email protected]>
>> ---
>> drivers/acpi/apei/ghes.c | 19 ++++++++-----------
>> drivers/acpi/bus.c | 2 ++
>> drivers/acpi/pci_root.c | 3 ---
>> drivers/firmware/Kconfig | 1 +
>> drivers/firmware/arm_sdei.c | 13 ++-----------
>> include/acpi/apei.h | 4 +++-
>> include/linux/arm_sdei.h | 2 ++
>> 7 files changed, 18 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 0c5c9acc6254..aadc0a972f18 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
>> .remove = ghes_remove,
>> };
>>
>> -static int __init ghes_init(void)
>> +void __init ghes_init(void)
>> {
>> int rc;
>>
>> + sdei_init();
>> +
>> if (acpi_disabled)
>> - return -ENODEV;
>> + return;
>>
>> switch (hest_disable) {
>> case HEST_NOT_FOUND:
>> - return -ENODEV;
>> + return;
>> case HEST_DISABLED:
>> pr_info(GHES_PFX "HEST is not enabled!\n");
>> - return -EINVAL;
>> + return;
>> default:
>> break;
>> }
>>
>> if (ghes_disable) {
>> pr_info(GHES_PFX "GHES is not enabled!\n");
>> - return -EINVAL;
>> + return;
>> }
>>
>> ghes_nmi_init_cxt();
>>
>> rc = platform_driver_register(&ghes_platform_driver);
>> if (rc)
>> - goto err;
>> + return;
>>
>> rc = apei_osc_setup();
>> if (rc == 0 && osc_sb_apei_support_acked)
>> @@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
>> pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
>> else
>> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
>> -
>> - return 0;
>> -err:
>> - return rc;
>> }
>> -device_initcall(ghes_init);
>> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
>> index 07f604832fd6..3f403db20f69 100644
>> --- a/drivers/acpi/bus.c
>> +++ b/drivers/acpi/bus.c
>> @@ -1331,6 +1331,8 @@ static int __init acpi_init(void)
>>
>> pci_mmcfg_late_init();
>> acpi_iort_init();
>> + acpi_hest_init();
>> + ghes_init();
>> acpi_scan_init();
>> acpi_ec_init();
>> acpi_debugfs_init();
>> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
>> index b76db99cced3..6f9e75d14808 100644
>> --- a/drivers/acpi/pci_root.c
>> +++ b/drivers/acpi/pci_root.c
>> @@ -22,8 +22,6 @@
>> #include <linux/slab.h>
>> #include <linux/dmi.h>
>> #include <linux/platform_data/x86/apple.h>
>> -#include <acpi/apei.h> /* for acpi_hest_init() */
>> -
>> #include "internal.h"
>>
>> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
>> @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>>
>> void __init acpi_pci_root_init(void)
>> {
>> - acpi_hest_init();
>> if (acpi_pci_disabled)
>> return;
>>
>> diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
>> index 75cb91055c17..ad114d9cdf8e 100644
>> --- a/drivers/firmware/Kconfig
>> +++ b/drivers/firmware/Kconfig
>> @@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
>> config ARM_SDE_INTERFACE
>> bool "ARM Software Delegated Exception Interface (SDEI)"
>> depends on ARM64
>> + select ACPI_APEI_GHES
>
> As the kernel test robot pointed out [1], you cannot do this.
> CONFIG_ACPI_APEI_GHES is a user selectable symbol that has dependencies,
> which 'select' completely overrides, resulting in build failures when
> CONFIG_ACPI_APEI is not enabled.
>
> If CONFIG_ARM_SDE_INTERFACE truly requires CONFIG_ACPI_APEI_GHES, you
> should have "depends on ACPI_APEI_GHES".
>
> If CONFIG_ARM_SDE_INTERFACE soft depends on CONFIG_ACPI_APEI_GHES for
> functionality but can work without it, you could use
> "imply ACPI_APEI_GHES", which will enable CONFIG_ACPI_APEI_GHES if its
> dependencies are met.
>
> I noticed the same error with Alpine Linux's aarch64 configuration [2]
> if you wanted a quick configuration to test with.
>
> [1]: https://lore.kernel.org/r/[email protected]/
> [2]: https://git.alpinelinux.org/aports/plain/community/linux-edge/config-edge.aarch64

Thank you for explaining the difference among 'select', 'depends on' and 'imply'.
I was wrong to use 'select'. sdei_init() is called in ghes_init() now, in other words,
CONFIG_ARM_SDE_INTERFACE truly requires CONFIG_ACPI_APEI_GHES, so we should use
'depends on'. I will send a new patch set to fix this problem.

Best Regards,
Shuai

2022-02-27 12:48:02

by Shuai Xue

[permalink] [raw]
Subject: [PATCH v8 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
the estatus memory pool. On the other hand, ghes_init() relies on
sdei_init() to detect the SDEI version and (un)register events. The
dependencies are as follows:

ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
ghes_init() => sdei_init()

HEST is not PCI-specific and initcall ordering is implicit and not
well-defined within a level.

Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
convert ghes_init() and sdei_init() from initcalls to explicit calls in the
following order:

acpi_hest_init()
ghes_init()
sdei_init()

Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 19 ++++++++-----------
drivers/acpi/bus.c | 2 ++
drivers/acpi/pci_root.c | 3 ---
drivers/firmware/Kconfig | 1 +
drivers/firmware/arm_sdei.c | 13 ++-----------
include/acpi/apei.h | 4 +++-
include/linux/arm_sdei.h | 2 ++
7 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 0c5c9acc6254..aadc0a972f18 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-static int __init ghes_init(void)
+void __init ghes_init(void)
{
int rc;

+ sdei_init();
+
if (acpi_disabled)
- return -ENODEV;
+ return;

switch (hest_disable) {
case HEST_NOT_FOUND:
- return -ENODEV;
+ return;
case HEST_DISABLED:
pr_info(GHES_PFX "HEST is not enabled!\n");
- return -EINVAL;
+ return;
default:
break;
}

if (ghes_disable) {
pr_info(GHES_PFX "GHES is not enabled!\n");
- return -EINVAL;
+ return;
}

ghes_nmi_init_cxt();

rc = platform_driver_register(&ghes_platform_driver);
if (rc)
- goto err;
+ return;

rc = apei_osc_setup();
if (rc == 0 && osc_sb_apei_support_acked)
@@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
else
pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
-
- return 0;
-err:
- return rc;
}
-device_initcall(ghes_init);
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 07f604832fd6..3f403db20f69 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1331,6 +1331,8 @@ static int __init acpi_init(void)

pci_mmcfg_late_init();
acpi_iort_init();
+ acpi_hest_init();
+ ghes_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index b76db99cced3..6f9e75d14808 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -22,8 +22,6 @@
#include <linux/slab.h>
#include <linux/dmi.h>
#include <linux/platform_data/x86/apple.h>
-#include <acpi/apei.h> /* for acpi_hest_init() */
-
#include "internal.h"

#define ACPI_PCI_ROOT_CLASS "pci_bridge"
@@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,

void __init acpi_pci_root_init(void)
{
- acpi_hest_init();
if (acpi_pci_disabled)
return;

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 75cb91055c17..e5cfb01353d8 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
config ARM_SDE_INTERFACE
bool "ARM Software Delegated Exception Interface (SDEI)"
depends on ARM64
+ depends on ACPI_APEI_GHES
help
The Software Delegated Exception Interface (SDEI) is an ARM
standard for registering callbacks from the platform firmware
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index a7e762c352f9..1e1a51510e83 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
return true;
}

-static int __init sdei_init(void)
+void __init sdei_init(void)
{
struct platform_device *pdev;
int ret;

ret = platform_driver_register(&sdei_driver);
if (ret || !sdei_present_acpi())
- return ret;
+ return;

pdev = platform_device_register_simple(sdei_driver.driver.name,
0, NULL, 0);
@@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
pr_info("Failed to register ACPI:SDEI platform device %d\n",
ret);
}
-
- return ret;
}

-/*
- * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
- * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
- * by device_initcall(). We want to be called in the middle.
- */
-subsys_initcall_sync(sdei_init);
-
int sdei_event_handler(struct pt_regs *regs,
struct sdei_registered_event *arg)
{
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index ece0a8af2bae..4e60dd73c3bb 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,14 +27,16 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
+void __init ghes_init(void);
#else
#define ghes_disable 1
+static inline void ghes_init(void) { }
#endif

#ifdef CONFIG_ACPI_APEI
void __init acpi_hest_init(void);
#else
-static inline void acpi_hest_init(void) { return; }
+static inline void acpi_hest_init(void) { }
#endif

int erst_write(const struct cper_record_header *record);
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 0a241c5c911d..14dc461b0e82 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
/* For use by arch code when CPU hotplug notifiers are not appropriate. */
int sdei_mask_local_cpu(void);
int sdei_unmask_local_cpu(void);
+void __init sdei_init(void);
#else
static inline int sdei_mask_local_cpu(void) { return 0; }
static inline int sdei_unmask_local_cpu(void) { return 0; }
+static inline void sdei_init(void) { }
#endif /* CONFIG_ARM_SDE_INTERFACE */


--
2.20.1.12.g72788fdb

2022-02-27 13:26:54

by Shuai Xue

[permalink] [raw]
Subject: [PATCH v8 2/2] ACPI: APEI: rename ghes_init with an "acpi_" prefix

ghes_init() sticks out in acpi_init() because it is the only functions
without an "acpi_" prefix.

Rename ghes_init with an "acpi_" prefix, then all looks fine.

Signed-off-by: Shuai Xue <[email protected]>
---
drivers/acpi/apei/ghes.c | 2 +-
drivers/acpi/bus.c | 2 +-
include/acpi/apei.h | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index aadc0a972f18..d91ad378c00d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -1457,7 +1457,7 @@ static struct platform_driver ghes_platform_driver = {
.remove = ghes_remove,
};

-void __init ghes_init(void)
+void __init acpi_ghes_init(void)
{
int rc;

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 3f403db20f69..cd374210fb9f 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1332,7 +1332,7 @@ static int __init acpi_init(void)
pci_mmcfg_late_init();
acpi_iort_init();
acpi_hest_init();
- ghes_init();
+ acpi_ghes_init();
acpi_scan_init();
acpi_ec_init();
acpi_debugfs_init();
diff --git a/include/acpi/apei.h b/include/acpi/apei.h
index 4e60dd73c3bb..afaca3a075e8 100644
--- a/include/acpi/apei.h
+++ b/include/acpi/apei.h
@@ -27,10 +27,10 @@ extern int hest_disable;
extern int erst_disable;
#ifdef CONFIG_ACPI_APEI_GHES
extern bool ghes_disable;
-void __init ghes_init(void);
+void __init acpi_ghes_init(void);
#else
#define ghes_disable 1
-static inline void ghes_init(void) { }
+static inline void acpi_ghes_init(void) { }
#endif

#ifdef CONFIG_ACPI_APEI
--
2.20.1.12.g72788fdb

2022-03-04 08:39:58

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v8 1/2] ACPI: APEI: explicit init HEST and GHES in apci_init

On Sun, Feb 27, 2022 at 1:26 PM Shuai Xue <[email protected]> wrote:
>
> From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus
> memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage
> the estatus memory pool. On the other hand, ghes_init() relies on
> sdei_init() to detect the SDEI version and (un)register events. The
> dependencies are as follows:
>
> ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init()
> ghes_init() => sdei_init()
>
> HEST is not PCI-specific and initcall ordering is implicit and not
> well-defined within a level.
>
> Based on above, remove acpi_hest_init() from acpi_pci_root_init() and
> convert ghes_init() and sdei_init() from initcalls to explicit calls in the
> following order:
>
> acpi_hest_init()
> ghes_init()
> sdei_init()
>
> Signed-off-by: Shuai Xue <[email protected]>

Applied as 5.18 material along with the [2/2], thanks!

> ---
> drivers/acpi/apei/ghes.c | 19 ++++++++-----------
> drivers/acpi/bus.c | 2 ++
> drivers/acpi/pci_root.c | 3 ---
> drivers/firmware/Kconfig | 1 +
> drivers/firmware/arm_sdei.c | 13 ++-----------
> include/acpi/apei.h | 4 +++-
> include/linux/arm_sdei.h | 2 ++
> 7 files changed, 18 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 0c5c9acc6254..aadc0a972f18 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -1457,33 +1457,35 @@ static struct platform_driver ghes_platform_driver = {
> .remove = ghes_remove,
> };
>
> -static int __init ghes_init(void)
> +void __init ghes_init(void)
> {
> int rc;
>
> + sdei_init();
> +
> if (acpi_disabled)
> - return -ENODEV;
> + return;
>
> switch (hest_disable) {
> case HEST_NOT_FOUND:
> - return -ENODEV;
> + return;
> case HEST_DISABLED:
> pr_info(GHES_PFX "HEST is not enabled!\n");
> - return -EINVAL;
> + return;
> default:
> break;
> }
>
> if (ghes_disable) {
> pr_info(GHES_PFX "GHES is not enabled!\n");
> - return -EINVAL;
> + return;
> }
>
> ghes_nmi_init_cxt();
>
> rc = platform_driver_register(&ghes_platform_driver);
> if (rc)
> - goto err;
> + return;
>
> rc = apei_osc_setup();
> if (rc == 0 && osc_sb_apei_support_acked)
> @@ -1494,9 +1496,4 @@ static int __init ghes_init(void)
> pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
> else
> pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
> -
> - return 0;
> -err:
> - return rc;
> }
> -device_initcall(ghes_init);
> diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
> index 07f604832fd6..3f403db20f69 100644
> --- a/drivers/acpi/bus.c
> +++ b/drivers/acpi/bus.c
> @@ -1331,6 +1331,8 @@ static int __init acpi_init(void)
>
> pci_mmcfg_late_init();
> acpi_iort_init();
> + acpi_hest_init();
> + ghes_init();
> acpi_scan_init();
> acpi_ec_init();
> acpi_debugfs_init();
> diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
> index b76db99cced3..6f9e75d14808 100644
> --- a/drivers/acpi/pci_root.c
> +++ b/drivers/acpi/pci_root.c
> @@ -22,8 +22,6 @@
> #include <linux/slab.h>
> #include <linux/dmi.h>
> #include <linux/platform_data/x86/apple.h>
> -#include <acpi/apei.h> /* for acpi_hest_init() */
> -
> #include "internal.h"
>
> #define ACPI_PCI_ROOT_CLASS "pci_bridge"
> @@ -943,7 +941,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
>
> void __init acpi_pci_root_init(void)
> {
> - acpi_hest_init();
> if (acpi_pci_disabled)
> return;
>
> diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
> index 75cb91055c17..e5cfb01353d8 100644
> --- a/drivers/firmware/Kconfig
> +++ b/drivers/firmware/Kconfig
> @@ -40,6 +40,7 @@ config ARM_SCPI_POWER_DOMAIN
> config ARM_SDE_INTERFACE
> bool "ARM Software Delegated Exception Interface (SDEI)"
> depends on ARM64
> + depends on ACPI_APEI_GHES
> help
> The Software Delegated Exception Interface (SDEI) is an ARM
> standard for registering callbacks from the platform firmware
> diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
> index a7e762c352f9..1e1a51510e83 100644
> --- a/drivers/firmware/arm_sdei.c
> +++ b/drivers/firmware/arm_sdei.c
> @@ -1059,14 +1059,14 @@ static bool __init sdei_present_acpi(void)
> return true;
> }
>
> -static int __init sdei_init(void)
> +void __init sdei_init(void)
> {
> struct platform_device *pdev;
> int ret;
>
> ret = platform_driver_register(&sdei_driver);
> if (ret || !sdei_present_acpi())
> - return ret;
> + return;
>
> pdev = platform_device_register_simple(sdei_driver.driver.name,
> 0, NULL, 0);
> @@ -1076,17 +1076,8 @@ static int __init sdei_init(void)
> pr_info("Failed to register ACPI:SDEI platform device %d\n",
> ret);
> }
> -
> - return ret;
> }
>
> -/*
> - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
> - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
> - * by device_initcall(). We want to be called in the middle.
> - */
> -subsys_initcall_sync(sdei_init);
> -
> int sdei_event_handler(struct pt_regs *regs,
> struct sdei_registered_event *arg)
> {
> diff --git a/include/acpi/apei.h b/include/acpi/apei.h
> index ece0a8af2bae..4e60dd73c3bb 100644
> --- a/include/acpi/apei.h
> +++ b/include/acpi/apei.h
> @@ -27,14 +27,16 @@ extern int hest_disable;
> extern int erst_disable;
> #ifdef CONFIG_ACPI_APEI_GHES
> extern bool ghes_disable;
> +void __init ghes_init(void);
> #else
> #define ghes_disable 1
> +static inline void ghes_init(void) { }
> #endif
>
> #ifdef CONFIG_ACPI_APEI
> void __init acpi_hest_init(void);
> #else
> -static inline void acpi_hest_init(void) { return; }
> +static inline void acpi_hest_init(void) { }
> #endif
>
> int erst_write(const struct cper_record_header *record);
> diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
> index 0a241c5c911d..14dc461b0e82 100644
> --- a/include/linux/arm_sdei.h
> +++ b/include/linux/arm_sdei.h
> @@ -46,9 +46,11 @@ int sdei_unregister_ghes(struct ghes *ghes);
> /* For use by arch code when CPU hotplug notifiers are not appropriate. */
> int sdei_mask_local_cpu(void);
> int sdei_unmask_local_cpu(void);
> +void __init sdei_init(void);
> #else
> static inline int sdei_mask_local_cpu(void) { return 0; }
> static inline int sdei_unmask_local_cpu(void) { return 0; }
> +static inline void sdei_init(void) { }
> #endif /* CONFIG_ARM_SDE_INTERFACE */
>
>
> --
> 2.20.1.12.g72788fdb
>