2023-01-04 01:57:46

by Rob Barnes

[permalink] [raw]
Subject: [PATCH v3 0/2] Handle CrOS EC Panics

Currently the OS ignores EC panics when they occur.
After reporting a panic, the EC will force a hard reset,
possibly after a short delay. This may cause loss of data.

These patches add a handler for CrOS EC panics. When
a panic is detected the OS will attempt to flush critical
data for debugging purposes and attempt an orderly shutdown.

Changelog since v2:
- Minor updates to commit messages

Changelog since v1:
- Updated commit messages
- Split into two patches
- Moved panic handle before mkbp loop
- Switched to dev_emerg


Rob Barnes (2):
platform/chrome: cros_ec: Poll EC log on EC panic
platform/chrome: cros_ec: Shutdown on EC Panic

drivers/platform/chrome/cros_ec_debugfs.c | 23 +++++++++++++++++++++
drivers/platform/chrome/cros_ec_lpc.c | 10 +++++++++
include/linux/platform_data/cros_ec_proto.h | 9 ++++++++
3 files changed, 42 insertions(+)

--
2.39.0.314.g84b9a713c41-goog


2023-01-04 01:57:46

by Rob Barnes

[permalink] [raw]
Subject: [PATCH v3 1/2] platform/chrome: cros_ec: Poll EC log on EC panic

Add handler for CrOS EC panic events. When a panic is reported,
immediately poll for EC log.

This should result in the log leading to the EC panic being
preserved.

ACPI_NOTIFY_CROS_EC_PANIC is defined in coreboot at
https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/src/ec/google/chromeec/acpi/ec.asl

Signed-off-by: Rob Barnes <[email protected]>
---

Changelog since v1:
- Split into two patches
- Moved panic handle before mkbp loop
- Switched to dev_emerg message

drivers/platform/chrome/cros_ec_debugfs.c | 23 +++++++++++++++++++++
drivers/platform/chrome/cros_ec_lpc.c | 7 +++++++
include/linux/platform_data/cros_ec_proto.h | 9 ++++++++
3 files changed, 39 insertions(+)

diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 21d973fc6be2..34f7b46f8761 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -49,6 +49,7 @@ struct cros_ec_debugfs {
struct delayed_work log_poll_work;
/* EC panicinfo */
struct debugfs_blob_wrapper panicinfo_blob;
+ struct notifier_block notifier_panic;
};

/*
@@ -437,6 +438,22 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
return ret;
}

+static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
+ unsigned long queued_during_suspend, void *_notify)
+{
+ struct cros_ec_debugfs *debug_info =
+ container_of(nb, struct cros_ec_debugfs, notifier_panic);
+
+ if (debug_info->log_buffer.buf) {
+ /* Force log poll work to run immediately */
+ mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
+ /* Block until log poll work finishes */
+ flush_delayed_work(&debug_info->log_poll_work);
+ }
+
+ return NOTIFY_DONE;
+}
+
static int cros_ec_debugfs_probe(struct platform_device *pd)
{
struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
@@ -473,6 +490,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
&ec->ec_dev->suspend_timeout_ms);

+ debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
+ ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
+ &debug_info->notifier_panic);
+ if (ret)
+ goto remove_debugfs;
+
ec->debug_info = debug_info;

dev_set_drvdata(&pd->dev, ec);
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 7fc8f82280ac..5738f1d25091 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -320,6 +320,13 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)

ec_dev->last_event_time = cros_ec_get_time_ns();

+ if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
+ dev_emerg(ec_dev->dev, "CrOS EC Panic Reported. Shutdown is imminent!");
+ blocking_notifier_call_chain(&ec_dev->panic_notifier, 0, ec_dev);
+ /* Do not query for other events after a panic is reported */
+ return;
+ }
+
if (ec_dev->mkbp_event_supported)
do {
ret = cros_ec_get_next_event(ec_dev, NULL,
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index e43107e0bee1..7fb2196f99b0 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -41,6 +41,13 @@
#define EC_MAX_REQUEST_OVERHEAD 1
#define EC_MAX_RESPONSE_OVERHEAD 32

+/*
+ * EC panic is not covered by the standard (0-F) ACPI notify values.
+ * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
+ * device specific ACPI notify range.
+ */
+#define ACPI_NOTIFY_CROS_EC_PANIC 0xB0
+
/*
* Command interface between EC and AP, for LPC, I2C and SPI interfaces.
*/
@@ -176,6 +183,8 @@ struct cros_ec_device {
/* The platform devices used by the mfd driver */
struct platform_device *ec;
struct platform_device *pd;
+
+ struct blocking_notifier_head panic_notifier;
};

/**
--
2.39.0.314.g84b9a713c41-goog

2023-01-05 04:54:56

by Prashant Malani

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] platform/chrome: cros_ec: Poll EC log on EC panic

On Tue, Jan 3, 2023 at 5:15 PM Rob Barnes <[email protected]> wrote:
>
> Add handler for CrOS EC panic events. When a panic is reported,
> immediately poll for EC log.
>
> This should result in the log leading to the EC panic being
> preserved.
>
> ACPI_NOTIFY_CROS_EC_PANIC is defined in coreboot at
> https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/src/ec/google/chromeec/acpi/ec.asl
>
> Signed-off-by: Rob Barnes <[email protected]>
FWIW:
Reviewed-by: Prashant Malani <[email protected]>

Subject: Re: [PATCH v3 0/2] Handle CrOS EC Panics

Hello:

This series was applied to chrome-platform/linux.git (for-kernelci)
by Tzung-Bi Shih <[email protected]>:

On Wed, 4 Jan 2023 01:15:22 +0000 you wrote:
> Currently the OS ignores EC panics when they occur.
> After reporting a panic, the EC will force a hard reset,
> possibly after a short delay. This may cause loss of data.
>
> These patches add a handler for CrOS EC panics. When
> a panic is detected the OS will attempt to flush critical
> data for debugging purposes and attempt an orderly shutdown.
>
> [...]

Here is the summary with links:
- [v3,1/2] platform/chrome: cros_ec: Poll EC log on EC panic
https://git.kernel.org/chrome-platform/c/d90fa2c64d59
- [v3,2/2] platform/chrome: cros_ec: Shutdown on EC Panic
https://git.kernel.org/chrome-platform/c/957445d730ba

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html


Subject: Re: [PATCH v3 0/2] Handle CrOS EC Panics

Hello:

This series was applied to chrome-platform/linux.git (for-next)
by Tzung-Bi Shih <[email protected]>:

On Wed, 4 Jan 2023 01:15:22 +0000 you wrote:
> Currently the OS ignores EC panics when they occur.
> After reporting a panic, the EC will force a hard reset,
> possibly after a short delay. This may cause loss of data.
>
> These patches add a handler for CrOS EC panics. When
> a panic is detected the OS will attempt to flush critical
> data for debugging purposes and attempt an orderly shutdown.
>
> [...]

Here is the summary with links:
- [v3,1/2] platform/chrome: cros_ec: Poll EC log on EC panic
https://git.kernel.org/chrome-platform/c/d90fa2c64d59
- [v3,2/2] platform/chrome: cros_ec: Shutdown on EC Panic
https://git.kernel.org/chrome-platform/c/957445d730ba

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html


2023-01-10 22:18:27

by Marek Szyprowski

[permalink] [raw]
Subject: Re: [PATCH v3 1/2] platform/chrome: cros_ec: Poll EC log on EC panic

Hi Rob,

On 04.01.2023 02:15, Rob Barnes wrote:
> Add handler for CrOS EC panic events. When a panic is reported,
> immediately poll for EC log.
>
> This should result in the log leading to the EC panic being
> preserved.
>
> ACPI_NOTIFY_CROS_EC_PANIC is defined in coreboot at
> https://review.coreboot.org/plugins/gitiles/coreboot/+/refs/heads/master/src/ec/google/chromeec/acpi/ec.asl
>
> Signed-off-by: Rob Barnes <[email protected]>

This patch landed in today's linux-next as commit d90fa2c64d59
("platform/chrome: cros_ec: Poll EC log on EC panic"). Unfortunately it
introduces the following runtime warning on my test systems:

INFO: trying to register non-static key.
The code is fine but needs lockdep annotation, or maybe
you didn't initialize this object before use?
turning off the locking correctness validator.
CPU: 0 PID: 62 Comm: kworker/u16:1 Not tainted
6.2.0-rc3-next-20230110-00037-g7c2b0426386a #6112
Hardware name: Samsung Exynos (Flattened Device Tree)
Workqueue: events_unbound async_run_entry_fn
 unwind_backtrace from show_stack+0x10/0x14
 show_stack from dump_stack_lvl+0x58/0x70
 dump_stack_lvl from register_lock_class+0x998/0x9a8
 register_lock_class from __lock_acquire+0x6c/0x2a80
 __lock_acquire from lock_acquire+0x124/0x3f0
 lock_acquire from down_write+0x40/0xd8
 down_write from blocking_notifier_chain_register+0x28/0x58
 blocking_notifier_chain_register from cros_ec_debugfs_probe+0x324/0x3b4
 cros_ec_debugfs_probe from platform_probe+0x5c/0xb8
 platform_probe from really_probe+0xe0/0x414
 really_probe from __driver_probe_device+0x9c/0x200
 __driver_probe_device from driver_probe_device+0x30/0xc0
 driver_probe_device from __device_attach_driver+0xa8/0x120
 __device_attach_driver from bus_for_each_drv+0x7c/0xc0
 bus_for_each_drv from __device_attach_async_helper+0xa0/0xf4
 __device_attach_async_helper from async_run_entry_fn+0x40/0x154
 async_run_entry_fn from process_one_work+0x288/0x790
 process_one_work from worker_thread+0x44/0x504
 worker_thread from kthread+0xf0/0x124
 kthread from ret_from_fork+0x14/0x2c
Exception stack(0xf0a6dfb0 to 0xf0a6dff8)
...
------------[ cut here ]------------

That's because the panic_notifier entry is not initialized to the sane
value. Adding the following line:

BLOCKING_INIT_NOTIFIER_HEAD(&ec_dev->panic_notifier);

to drivers/platform/chrome/cros_ec.c to cros_ec_register() function
fixes the issue. I will send the incremental fix in a few minutes.

> ---
>
> Changelog since v1:
> - Split into two patches
> - Moved panic handle before mkbp loop
> - Switched to dev_emerg message
>
> drivers/platform/chrome/cros_ec_debugfs.c | 23 +++++++++++++++++++++
> drivers/platform/chrome/cros_ec_lpc.c | 7 +++++++
> include/linux/platform_data/cros_ec_proto.h | 9 ++++++++
> 3 files changed, 39 insertions(+)
>
> diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
> index 21d973fc6be2..34f7b46f8761 100644
> --- a/drivers/platform/chrome/cros_ec_debugfs.c
> +++ b/drivers/platform/chrome/cros_ec_debugfs.c
> @@ -49,6 +49,7 @@ struct cros_ec_debugfs {
> struct delayed_work log_poll_work;
> /* EC panicinfo */
> struct debugfs_blob_wrapper panicinfo_blob;
> + struct notifier_block notifier_panic;
> };
>
> /*
> @@ -437,6 +438,22 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
> return ret;
> }
>
> +static int cros_ec_debugfs_panic_event(struct notifier_block *nb,
> + unsigned long queued_during_suspend, void *_notify)
> +{
> + struct cros_ec_debugfs *debug_info =
> + container_of(nb, struct cros_ec_debugfs, notifier_panic);
> +
> + if (debug_info->log_buffer.buf) {
> + /* Force log poll work to run immediately */
> + mod_delayed_work(debug_info->log_poll_work.wq, &debug_info->log_poll_work, 0);
> + /* Block until log poll work finishes */
> + flush_delayed_work(&debug_info->log_poll_work);
> + }
> +
> + return NOTIFY_DONE;
> +}
> +
> static int cros_ec_debugfs_probe(struct platform_device *pd)
> {
> struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
> @@ -473,6 +490,12 @@ static int cros_ec_debugfs_probe(struct platform_device *pd)
> debugfs_create_u16("suspend_timeout_ms", 0664, debug_info->dir,
> &ec->ec_dev->suspend_timeout_ms);
>
> + debug_info->notifier_panic.notifier_call = cros_ec_debugfs_panic_event;
> + ret = blocking_notifier_chain_register(&ec->ec_dev->panic_notifier,
> + &debug_info->notifier_panic);
> + if (ret)
> + goto remove_debugfs;
> +
> ec->debug_info = debug_info;
>
> dev_set_drvdata(&pd->dev, ec);
> diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
> index 7fc8f82280ac..5738f1d25091 100644
> --- a/drivers/platform/chrome/cros_ec_lpc.c
> +++ b/drivers/platform/chrome/cros_ec_lpc.c
> @@ -320,6 +320,13 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
>
> ec_dev->last_event_time = cros_ec_get_time_ns();
>
> + if (value == ACPI_NOTIFY_CROS_EC_PANIC) {
> + dev_emerg(ec_dev->dev, "CrOS EC Panic Reported. Shutdown is imminent!");
> + blocking_notifier_call_chain(&ec_dev->panic_notifier, 0, ec_dev);
> + /* Do not query for other events after a panic is reported */
> + return;
> + }
> +
> if (ec_dev->mkbp_event_supported)
> do {
> ret = cros_ec_get_next_event(ec_dev, NULL,
> diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
> index e43107e0bee1..7fb2196f99b0 100644
> --- a/include/linux/platform_data/cros_ec_proto.h
> +++ b/include/linux/platform_data/cros_ec_proto.h
> @@ -41,6 +41,13 @@
> #define EC_MAX_REQUEST_OVERHEAD 1
> #define EC_MAX_RESPONSE_OVERHEAD 32
>
> +/*
> + * EC panic is not covered by the standard (0-F) ACPI notify values.
> + * Arbitrarily choosing B0 to notify ec panic, which is in the 84-BF
> + * device specific ACPI notify range.
> + */
> +#define ACPI_NOTIFY_CROS_EC_PANIC 0xB0
> +
> /*
> * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
> */
> @@ -176,6 +183,8 @@ struct cros_ec_device {
> /* The platform devices used by the mfd driver */
> struct platform_device *ec;
> struct platform_device *pd;
> +
> + struct blocking_notifier_head panic_notifier;
> };
>
> /**

Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland