2022-10-31 21:05:02

by Mario Limonciello

[permalink] [raw]
Subject: [RFC 0/3] Introduce infrastructure to report time in deepest state

Sven van Ashbrook brought a patch to the kernel mailing list that
attempted to change the reporting level of a s0ix entry issue to a
different debugging level so that infastructure used by Google could
better scan logs to catch problems.

This approach was rejected, but during the conversation another
suggestion was made by David E. Box to introduce some infrastructure
into the kernel to report this information.

As it's information that is reported by both AMD and Intel platforms
over s2idle, this seems to make sense.

This series introduces a new sysfs file, a symbol for kernel modules to
use to populate it and changes to both AMD and Intel drivers to utilize
it.

The expectation is that userspace could read this file after s2idle
occurred to infer how much of the s2idle cycle was spent in the deepest
hardware state.

Mario Limonciello (3):
PM: Add a sysfs file to represent whether hardware reached the deepest
state
platform/x86/amd: pmc: Report duration of time in deepest hw state
platform/x86/intel/pmc: core: Report duration of time in deepest HW
state

Documentation/ABI/testing/sysfs-power | 8 ++++++++
drivers/platform/x86/amd/pmc.c | 4 +---
drivers/platform/x86/intel/pmc/core.c | 2 ++
include/linux/suspend.h | 2 ++
kernel/power/main.c | 14 ++++++++++++++
5 files changed, 27 insertions(+), 3 deletions(-)

--
2.34.1



2022-10-31 21:05:59

by Mario Limonciello

[permalink] [raw]
Subject: [RFC 3/3] platform/x86/intel/pmc: core: Report duration of time in deepest HW state

intel_pmc_core displays a warning when a suspend didn't get to the deepest
state.

This information is generally useful to userspace as well which may use
it to collect further debugging data. Report this to the standard kernel
reporting infrastructure that can be accessed from sysfs.

Signed-off-by: Mario Limonciello <[email protected]>
---
drivers/platform/x86/intel/pmc/core.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 17ec5825d13d7..9e58228b01f91 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -2116,6 +2116,8 @@ static __maybe_unused int pmc_core_resume(struct device *dev)
if (!pmcdev->check_counters)
return 0;

+ pm_set_hw_deepest_state(pmcdev->s0ix_counter);
+
if (!pmc_core_is_s0ix_failed(pmcdev))
return 0;

--
2.34.1


2022-10-31 21:34:08

by Mario Limonciello

[permalink] [raw]
Subject: [RFC 1/3] PM: Add a sysfs file to represent whether hardware reached the deepest state

Both AMD and Intel SoCs have a concept of reporting whether the hardware
reached the deepest possible hardware state over s2idle as well as how much
time was spent in that state.

This information is valuable to both chip designers and system designers as
it helps to identify when there are problems with power consumption over
an s2idle cycle.

To make the information discoverable, create a new sysfs file and a symbol
that drivers from supported manufacturers can use to advertise this
information.

Suggested-by: David E Box <[email protected]>
Signed-off-by: Mario Limonciello <[email protected]>
---
Documentation/ABI/testing/sysfs-power | 8 ++++++++
include/linux/suspend.h | 2 ++
kernel/power/main.c | 14 ++++++++++++++
3 files changed, 24 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index f99d433ff3117..25c52d7469b9c 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -413,6 +413,14 @@ Description:
The /sys/power/suspend_stats/last_failed_step file contains
the last failed step in the suspend/resume path.

+What: /sys/power/suspend_stats/last_hw_deepest_state
+Date: December 2022
+Contact: Mario Limonciello <[email protected]>
+Description:
+ The /sys/power/suspend_stats/last_hw_deepest_state file contains
+ the amount of time spent in the deepest hardware sleep state.
+ This is measured in microseconds.
+
What: /sys/power/sync_on_suspend
Date: October 2019
Contact: Jonas Meurer <[email protected]>
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index cfe19a0289185..30b8a8018299b 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -68,6 +68,7 @@ struct suspend_stats {
int last_failed_errno;
int errno[REC_FAILED_NUM];
int last_failed_step;
+ u64 last_hw_deepest_state;
enum suspend_stat_step failed_steps[REC_FAILED_NUM];
};

@@ -489,6 +490,7 @@ void restore_processor_state(void);
extern int register_pm_notifier(struct notifier_block *nb);
extern int unregister_pm_notifier(struct notifier_block *nb);
extern void ksys_sync_helper(void);
+extern void pm_set_hw_deepest_state(u64 duration);

#define pm_notifier(fn, pri) { \
static struct notifier_block fn##_nb = \
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 31ec4a9b9d704..1bda5d2d26c53 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -54,6 +54,12 @@ void unlock_system_sleep(unsigned int flags)
}
EXPORT_SYMBOL_GPL(unlock_system_sleep);

+void pm_set_hw_deepest_state(u64 duration)
+{
+ suspend_stats.last_hw_deepest_state = duration;
+}
+EXPORT_SYMBOL_GPL(pm_set_hw_deepest_state);
+
void ksys_sync_helper(void)
{
ktime_t start;
@@ -377,6 +383,13 @@ static ssize_t last_failed_step_show(struct kobject *kobj,
}
static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);

+static ssize_t last_hw_deepest_state_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%llu\n", suspend_stats.last_hw_deepest_state);
+}
+static struct kobj_attribute last_hw_deepest_state = __ATTR_RO(last_hw_deepest_state);
+
static struct attribute *suspend_attrs[] = {
&success.attr,
&fail.attr,
@@ -391,6 +404,7 @@ static struct attribute *suspend_attrs[] = {
&last_failed_dev.attr,
&last_failed_errno.attr,
&last_failed_step.attr,
+ &last_hw_deepest_state.attr,
NULL,
};

--
2.34.1


2022-10-31 21:47:55

by Mario Limonciello

[permalink] [raw]
Subject: [RFC 2/3] platform/x86/amd: pmc: Report duration of time in deepest hw state

amd_pmc displays a warning when a suspend didn't get to the deepest
state and a dynamic debugging message with the duration if it did.

Rather than logging to dynamic debugging the duration spent in the
deepest state, report this to the standard kernel reporting infrastructure
that can be accessed from sysfs.

Signed-off-by: Mario Limonciello <[email protected]>
---
drivers/platform/x86/amd/pmc.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c
index 3703b61fb5c00..0f94c919edd43 100644
--- a/drivers/platform/x86/amd/pmc.c
+++ b/drivers/platform/x86/amd/pmc.c
@@ -373,9 +373,7 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev)

if (!table.s0i3_last_entry_status)
dev_warn(pdev->dev, "Last suspend didn't reach deepest state\n");
- else
- dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n",
- table.timein_s0i3_lastcapture);
+ pm_set_hw_deepest_state(table.s0i3_last_entry_status ? table.timein_s0i3_lastcapture : 0);
}
#endif

--
2.34.1


2022-11-01 17:55:54

by Sven van Ashbrook

[permalink] [raw]
Subject: Re: [RFC 1/3] PM: Add a sysfs file to represent whether hardware reached the deepest state

On Mon, Oct 31, 2022 at 4:43 PM Mario Limonciello
<[email protected]> wrote:
>
> +void pm_set_hw_deepest_state(u64 duration)
> +{
> + suspend_stats.last_hw_deepest_state = duration;

I'm wondering if we could add a userspace notification here. Then
userspace wouldn't have to synchronize with the suspend/resume state
of the system when reading this entry.

What about sysfs_notify() ? Or via udev?

> +}
> +EXPORT_SYMBOL_GPL(pm_set_hw_deepest_state);

2022-11-01 20:30:22

by Mario Limonciello

[permalink] [raw]
Subject: Re: [RFC 1/3] PM: Add a sysfs file to represent whether hardware reached the deepest state

On 11/1/2022 12:37, Sven van Ashbrook wrote:
> On Mon, Oct 31, 2022 at 4:43 PM Mario Limonciello
> <[email protected]> wrote:
>>
>> +void pm_set_hw_deepest_state(u64 duration)
>> +{
>> + suspend_stats.last_hw_deepest_state = duration;
>
> I'm wondering if we could add a userspace notification here. Then
> userspace wouldn't have to synchronize with the suspend/resume state
> of the system when reading this entry.
>
> What about sysfs_notify() ? Or via udev?

The question I would have is what kobj would you notify? power_kobj?
Lots of other things write to suspend stats, but nothing else sends
notifications. So why is this one going to be special?

To me I don't think it's much different for userspace to "always" read
the file after resume vs wait for the notification and then read it.

To make userspace flexible to multiple kernel versions it seems to me
that userspace could check if that file exists before suspend and if it
does then check the value after suspend.

>
>> +}
>> +EXPORT_SYMBOL_GPL(pm_set_hw_deepest_state);


2022-11-08 20:44:47

by David E. Box

[permalink] [raw]
Subject: Re: [RFC 1/3] PM: Add a sysfs file to represent whether hardware reached the deepest state

Hi Mario,

On Mon, 2022-10-31 at 15:43 -0500, Mario Limonciello wrote:
> Both AMD and Intel SoCs have a concept of reporting whether the hardware
> reached the deepest possible hardware state over s2idle as well as how much
> time was spent in that state.
>
> This information is valuable to both chip designers and system designers as
> it helps to identify when there are problems with power consumption over
> an s2idle cycle.
>
> To make the information discoverable, create a new sysfs file and a symbol
> that drivers from supported manufacturers can use to advertise this
> information.
>
> Suggested-by: David E Box <[email protected]>
> Signed-off-by: Mario Limonciello <[email protected]>
> ---
> Documentation/ABI/testing/sysfs-power | 8 ++++++++
> include/linux/suspend.h | 2 ++
> kernel/power/main.c | 14 ++++++++++++++
> 3 files changed, 24 insertions(+)
>
> diff --git a/Documentation/ABI/testing/sysfs-power
> b/Documentation/ABI/testing/sysfs-power
> index f99d433ff3117..25c52d7469b9c 100644
> --- a/Documentation/ABI/testing/sysfs-power
> +++ b/Documentation/ABI/testing/sysfs-power
> @@ -413,6 +413,14 @@ Description:
> The /sys/power/suspend_stats/last_failed_step file contains
> the last failed step in the suspend/resume path.
>
> +What: /sys/power/suspend_stats/last_hw_deepest_state
> +Date: December 2022
> +Contact: Mario Limonciello <[email protected]>
> +Description:
> + The /sys/power/suspend_stats/last_hw_deepest_state file contains
> + the amount of time spent in the deepest hardware sleep state.
> + This is measured in microseconds.

The name doesn't really fit with the measurement. Maybe last_hw_state_residency
or last_low_power_idle_res (using the spec description of this feature).

I left out deepest in the name because it would be my preference, in case of
multiple hardware states, that this defaults to the total time spent in any low
power idle state rather than just the deepest. At least we would use it that
way.

> +
> What: /sys/power/sync_on_suspend
> Date: October 2019
> Contact: Jonas Meurer <[email protected]>
> diff --git a/include/linux/suspend.h b/include/linux/suspend.h
> index cfe19a0289185..30b8a8018299b 100644
> --- a/include/linux/suspend.h
> +++ b/include/linux/suspend.h
> @@ -68,6 +68,7 @@ struct suspend_stats {
> int last_failed_errno;
> int errno[REC_FAILED_NUM];
> int last_failed_step;
> + u64 last_hw_deepest_state;
> enum suspend_stat_step failed_steps[REC_FAILED_NUM];
> };
>
> @@ -489,6 +490,7 @@ void restore_processor_state(void);
> extern int register_pm_notifier(struct notifier_block *nb);
> extern int unregister_pm_notifier(struct notifier_block *nb);
> extern void ksys_sync_helper(void);
> +extern void pm_set_hw_deepest_state(u64 duration);
>
> #define pm_notifier(fn, pri) { \
> static struct notifier_block fn##_nb = \
> diff --git a/kernel/power/main.c b/kernel/power/main.c
> index 31ec4a9b9d704..1bda5d2d26c53 100644
> --- a/kernel/power/main.c
> +++ b/kernel/power/main.c
> @@ -54,6 +54,12 @@ void unlock_system_sleep(unsigned int flags)
> }
> EXPORT_SYMBOL_GPL(unlock_system_sleep);
>
> +void pm_set_hw_deepest_state(u64 duration)
> +{
> + suspend_stats.last_hw_deepest_state = duration;
> +}
> +EXPORT_SYMBOL_GPL(pm_set_hw_deepest_state);
> +
> void ksys_sync_helper(void)
> {
> ktime_t start;
> @@ -377,6 +383,13 @@ static ssize_t last_failed_step_show(struct kobject
> *kobj,
> }
> static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
>
> +static ssize_t last_hw_deepest_state_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sprintf(buf, "%llu\n", suspend_stats.last_hw_deepest_state);
> +}
> +static struct kobj_attribute last_hw_deepest_state =
> __ATTR_RO(last_hw_deepest_state);
> +
> static struct attribute *suspend_attrs[] = {
> &success.attr,
> &fail.attr,
> @@ -391,6 +404,7 @@ static struct attribute *suspend_attrs[] = {
> &last_failed_dev.attr,
> &last_failed_errno.attr,
> &last_failed_step.attr,
> + &last_hw_deepest_state.attr,
> NULL,
> };
>

This would appear on all systems, whether or not they support low power idle. It
would make sense to either prevent its display on unsupported systems or provide
another file to show if low power idle is supported.

David