An important part of validating that s0ix worked properly is to check how
much of a cycle was spent in a hardware sleep state.
The reporting of hardware sleep is a mix of kernel messages and sysfs
files that vary from vendor to vendor. Collecting this information
requires extra information on the kernel command line or fetching from
debugfs.
To make this information more readily accessible introduce a new file in
suspend_stats that drivers can report into during their resume routine.
Userspace can fetch this information and compare it against the duration
of the cycle to allow determining residency percentages and flagging
problems.
v5->v6
* Add sysfs file for total
* In cases we know overflow return -EOVERFLOW
* Update documentation
* Rename symbol
* Fix kernel robot reported missing stub
Mario Limonciello (4):
PM: Add sysfs files to represent time spent in hardware sleep state
platform/x86/amd: pmc: Report duration of time in hw sleep state
platform/x86/intel/pmc: core: Always capture counters on suspend
platform/x86/intel/pmc: core: Report duration of time in HW sleep
state
Documentation/ABI/testing/sysfs-power | 27 ++++++++++++++++
drivers/platform/x86/amd/pmc.c | 5 ++-
drivers/platform/x86/intel/pmc/core.c | 18 ++++++-----
drivers/platform/x86/intel/pmc/core.h | 2 --
include/linux/suspend.h | 5 +++
kernel/power/main.c | 45 +++++++++++++++++++++++++++
6 files changed, 89 insertions(+), 13 deletions(-)
--
2.34.1
Userspace can't easily discover how much of a sleep cycle was spent in a
hardware sleep state without using kernel tracing and vendor specific sysfs
or debugfs files.
To make this information more discoverable, introduce two new sysfs files:
1) The time spent in a hw sleep state for last cycle.
2) The time spent in a hw sleep state since the kernel booted
Both of these files will be present only if the system supports s2idle.
Signed-off-by: Mario Limonciello <[email protected]>
---
v5->v6:
* Add total attribute as well
* Change text for documentation
* Adjust flow of is_visible callback.
* If overflow was detected in total attribute return -EOVERFLOW
* Rename symbol
* Add stub for symbol for builds without CONFIG_PM_SLEEP
v4->v5:
* Provide time in microseconds instead of percent. Userspace can convert
this if desirable.
---
Documentation/ABI/testing/sysfs-power | 27 ++++++++++++++++
include/linux/suspend.h | 5 +++
kernel/power/main.c | 45 +++++++++++++++++++++++++++
3 files changed, 77 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index f99d433ff311..37240575d282 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -413,6 +413,33 @@ Description:
The /sys/power/suspend_stats/last_failed_step file contains
the last failed step in the suspend/resume path.
+What: /sys/power/suspend_stats/last_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <[email protected]>
+Description:
+ The /sys/power/suspend_stats/last_hw_sleep file
+ contains the duration of time spent in a hardware sleep
+ state in the most recent system suspend-resume cycle.
+ This number is measured in microseconds.
+
+ NOTE: Limitations in the size of the hardware counters may
+ cause this value to be inaccurate in longer sleep cycles.
+
+What: /sys/power/suspend_stats/total_hw_sleep
+Date: June 2023
+Contact: Mario Limonciello <[email protected]>
+Description:
+ The /sys/power/suspend_stats/total_hw_sleep file
+ contains the aggregate of time spent in a hardware sleep
+ state since the kernel was booted. This number
+ is measured in microseconds.
+
+ NOTE: Limitations in the size of the hardware counters may
+ cause this value to be inaccurate in longer sleep cycles.
+
+ If an overflow has been detected this file will return
+ -EOVERFLOW.
+
What: /sys/power/sync_on_suspend
Date: October 2019
Contact: Jonas Meurer <[email protected]>
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index cfe19a028918..069ef0c0ae57 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -68,6 +68,8 @@ struct suspend_stats {
int last_failed_errno;
int errno[REC_FAILED_NUM];
int last_failed_step;
+ u64 last_hw_sleep;
+ u64 total_hw_sleep;
enum suspend_stat_step failed_steps[REC_FAILED_NUM];
};
@@ -489,6 +491,7 @@ void restore_processor_state(void);
extern int register_pm_notifier(struct notifier_block *nb);
extern int unregister_pm_notifier(struct notifier_block *nb);
extern void ksys_sync_helper(void);
+extern void pm_report_hw_sleep_time(u64 t);
#define pm_notifier(fn, pri) { \
static struct notifier_block fn##_nb = \
@@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
return 0;
}
+static inline void pm_report_hw_sleep_time(u64 t) {};
+
static inline void ksys_sync_helper(void) {}
#define pm_notifier(fn, pri) do { (void)(fn); } while (0)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 31ec4a9b9d70..ffd4dd43cbdd 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -6,6 +6,7 @@
* Copyright (c) 2003 Open Source Development Lab
*/
+#include <linux/acpi.h>
#include <linux/export.h>
#include <linux/kobject.h>
#include <linux/string.h>
@@ -83,6 +84,18 @@ int unregister_pm_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(unregister_pm_notifier);
+void pm_report_hw_sleep_time(u64 t)
+{
+ suspend_stats.last_hw_sleep = t;
+ if (suspend_stats.total_hw_sleep == -EOVERFLOW)
+ return;
+ if (suspend_stats.total_hw_sleep + t < suspend_stats.total_hw_sleep)
+ suspend_stats.total_hw_sleep = -EOVERFLOW;
+ else
+ suspend_stats.total_hw_sleep += t;
+}
+EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
+
int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down)
{
int ret;
@@ -377,6 +390,22 @@ static ssize_t last_failed_step_show(struct kobject *kobj,
}
static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
+static ssize_t last_hw_sleep_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
+}
+static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
+
+static ssize_t total_hw_sleep_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ if (suspend_stats.total_hw_sleep == -EOVERFLOW)
+ return suspend_stats.total_hw_sleep;
+ return sysfs_emit(buf, "%llu\n", suspend_stats.total_hw_sleep);
+}
+static struct kobj_attribute total_hw_sleep = __ATTR_RO(total_hw_sleep);
+
static struct attribute *suspend_attrs[] = {
&success.attr,
&fail.attr,
@@ -391,12 +420,28 @@ static struct attribute *suspend_attrs[] = {
&last_failed_dev.attr,
&last_failed_errno.attr,
&last_failed_step.attr,
+ &last_hw_sleep.attr,
+ &total_hw_sleep.attr,
NULL,
};
+static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx)
+{
+ if (attr != &last_hw_sleep.attr &&
+ attr != &total_hw_sleep.attr)
+ return 0444;
+
+#ifdef CONFIG_ACPI
+ if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
+ return 0444;
+#endif
+ return 0;
+}
+
static const struct attribute_group suspend_attr_group = {
.name = "suspend_stats",
.attrs = suspend_attrs,
+ .is_visible = suspend_attr_is_visible,
};
#ifdef CONFIG_DEBUG_FS
--
2.34.1
Currently counters are only captured during suspend when the
warn_on_s0ix_failures module parameter is set.
In order to relay this counter information to the kernel reporting
infrastructure adjust it so that the counters are always captured.
warn_on_s0ix_failures will be utilized solely for messaging by
the driver instead.
Reviewed-by: David E. Box <[email protected]>
Signed-off-by: Mario Limonciello <[email protected]>
---
v5->v6:
* Pick up tag
v4->v5:
* Squash patches together
* Add extra pm_suspend_via_firmware() check for resume routine too
---
drivers/platform/x86/intel/pmc/core.c | 13 +++++--------
drivers/platform/x86/intel/pmc/core.h | 2 --
2 files changed, 5 insertions(+), 10 deletions(-)
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index b9591969e0fa..925c5d676a43 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -1179,12 +1179,6 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
{
struct pmc_dev *pmcdev = dev_get_drvdata(dev);
- pmcdev->check_counters = false;
-
- /* No warnings on S0ix failures */
- if (!warn_on_s0ix_failures)
- return 0;
-
/* Check if the syspend will actually use S0ix */
if (pm_suspend_via_firmware())
return 0;
@@ -1197,7 +1191,6 @@ static __maybe_unused int pmc_core_suspend(struct device *dev)
if (pmc_core_dev_state_get(pmcdev, &pmcdev->s0ix_counter))
return -EIO;
- pmcdev->check_counters = true;
return 0;
}
@@ -1233,12 +1226,16 @@ static __maybe_unused int pmc_core_resume(struct device *dev)
const struct pmc_bit_map **maps = pmcdev->map->lpm_sts;
int offset = pmcdev->map->lpm_status_offset;
- if (!pmcdev->check_counters)
+ /* Check if the syspend used S0ix */
+ if (pm_suspend_via_firmware())
return 0;
if (!pmc_core_is_s0ix_failed(pmcdev))
return 0;
+ if (!warn_on_s0ix_failures)
+ return 0;
+
if (pmc_core_is_pc10_failed(pmcdev)) {
/* S0ix failed because of PC10 entry failure */
dev_info(dev, "CPU did not enter PC10!!! (PC10 cnt=0x%llx)\n",
diff --git a/drivers/platform/x86/intel/pmc/core.h b/drivers/platform/x86/intel/pmc/core.h
index 810204d758ab..51d73efceaf3 100644
--- a/drivers/platform/x86/intel/pmc/core.h
+++ b/drivers/platform/x86/intel/pmc/core.h
@@ -319,7 +319,6 @@ struct pmc_reg_map {
* @pmc_xram_read_bit: flag to indicate whether PMC XRAM shadow registers
* used to read MPHY PG and PLL status are available
* @mutex_lock: mutex to complete one transcation
- * @check_counters: On resume, check if counters are getting incremented
* @pc10_counter: PC10 residency counter
* @s0ix_counter: S0ix residency (step adjusted)
* @num_lpm_modes: Count of enabled modes
@@ -338,7 +337,6 @@ struct pmc_dev {
int pmc_xram_read_bit;
struct mutex lock; /* generic mutex lock for PMC Core */
- bool check_counters; /* Check for counter increments on resume */
u64 pc10_counter;
u64 s0ix_counter;
int num_lpm_modes;
--
2.34.1
intel_pmc_core displays a warning when the module parameter
`warn_on_s0ix_failures` is set and a suspend didn't get to a HW sleep
state.
Report this to the standard kernel reporting infrastructure so that
userspace software can query after the suspend cycle is done.
Signed-off-by: Mario Limonciello <[email protected]>
---
v5->v6:
* Handle overflow case
* Use renamed symbol
v4->v5:
* Reword commit message
---
drivers/platform/x86/intel/pmc/core.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c
index 925c5d676a43..0621756792c8 100644
--- a/drivers/platform/x86/intel/pmc/core.c
+++ b/drivers/platform/x86/intel/pmc/core.c
@@ -1214,6 +1214,11 @@ static inline bool pmc_core_is_s0ix_failed(struct pmc_dev *pmcdev)
if (pmc_core_dev_state_get(pmcdev, &s0ix_counter))
return false;
+ if (s0ix_counter >= pmcdev->s0ix_counter)
+ pm_report_hw_sleep_time(s0ix_counter - pmcdev->s0ix_counter);
+ else
+ pm_report_hw_sleep_time(U64_MAX);
+
if (s0ix_counter == pmcdev->s0ix_counter)
return true;
--
2.34.1
amd_pmc displays a warning when a suspend didn't get to the deepest
state and a dynamic debugging message with the duration if it did.
Rather than logging to dynamic debugging the duration spent in the
deepest state, report this to the standard kernel reporting
infrastructure so that userspace software can query after the
suspend cycle is done.
Signed-off-by: Mario Limonciello <[email protected]>
---
drivers/platform/x86/amd/pmc.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c
index 2edaae04a691..6aa474657845 100644
--- a/drivers/platform/x86/amd/pmc.c
+++ b/drivers/platform/x86/amd/pmc.c
@@ -393,9 +393,8 @@ static void amd_pmc_validate_deepest(struct amd_pmc_dev *pdev)
if (!table.s0i3_last_entry_status)
dev_warn(pdev->dev, "Last suspend didn't reach deepest state\n");
- else
- dev_dbg(pdev->dev, "Last suspend in deepest state for %lluus\n",
- table.timein_s0i3_lastcapture);
+ pm_report_hw_sleep_time(table.s0i3_last_entry_status ?
+ table.timein_s0i3_lastcapture : 0);
}
static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev)
--
2.34.1
Hi Mario,
On Mon, 2023-04-03 at 16:18 -0500, Mario Limonciello wrote:
> Userspace can't easily discover how much of a sleep cycle was spent in a
> hardware sleep state without using kernel tracing and vendor specific sysfs
> or debugfs files.
>
> To make this information more discoverable, introduce two new sysfs files:
> 1) The time spent in a hw sleep state for last cycle.
> 2) The time spent in a hw sleep state since the kernel booted
> Both of these files will be present only if the system supports s2idle.
>
> Signed-off-by: Mario Limonciello <[email protected]>
> ---
> v5->v6:
> * Add total attribute as well
> * Change text for documentation
> * Adjust flow of is_visible callback.
> * If overflow was detected in total attribute return -EOVERFLOW
> * Rename symbol
> * Add stub for symbol for builds without CONFIG_PM_SLEEP
> v4->v5:
> * Provide time in microseconds instead of percent. Userspace can convert
> this if desirable.
> ---
> Documentation/ABI/testing/sysfs-power | 27 ++++++++++++++++
> include/linux/suspend.h | 5 +++
> kernel/power/main.c | 45 +++++++++++++++++++++++++++
> 3 files changed, 77 insertions(+)
>
> diff --git a/Documentation/ABI/testing/sysfs-power
> b/Documentation/ABI/testing/sysfs-power
> index f99d433ff311..37240575d282 100644
> --- a/Documentation/ABI/testing/sysfs-power
> +++ b/Documentation/ABI/testing/sysfs-power
> @@ -413,6 +413,33 @@ Description:
> The /sys/power/suspend_stats/last_failed_step file contains
> the last failed step in the suspend/resume path.
>
> +What: /sys/power/suspend_stats/last_hw_sleep
> +Date: June 2023
> +Contact: Mario Limonciello <[email protected]>
> +Description:
> + The /sys/power/suspend_stats/last_hw_sleep file
> + contains the duration of time spent in a hardware sleep
> + state in the most recent system suspend-resume cycle.
> + This number is measured in microseconds.
> +
> + NOTE: Limitations in the size of the hardware counters may
> + cause this value to be inaccurate in longer sleep cycles.
> +
> +What: /sys/power/suspend_stats/total_hw_sleep
> +Date: June 2023
> +Contact: Mario Limonciello <[email protected]>
> +Description:
> + The /sys/power/suspend_stats/total_hw_sleep file
> + contains the aggregate of time spent in a hardware sleep
> + state since the kernel was booted. This number
> + is measured in microseconds.
> +
> + NOTE: Limitations in the size of the hardware counters may
> + cause this value to be inaccurate in longer sleep cycles.
> +
> + If an overflow has been detected this file will return
> + -EOVERFLOW.
> +
> What: /sys/power/sync_on_suspend
> Date: October 2019
> Contact: Jonas Meurer <[email protected]>
> diff --git a/include/linux/suspend.h b/include/linux/suspend.h
> index cfe19a028918..069ef0c0ae57 100644
> --- a/include/linux/suspend.h
> +++ b/include/linux/suspend.h
> @@ -68,6 +68,8 @@ struct suspend_stats {
> int last_failed_errno;
> int errno[REC_FAILED_NUM];
> int last_failed_step;
> + u64 last_hw_sleep;
> + u64 total_hw_sleep;
> enum suspend_stat_step failed_steps[REC_FAILED_NUM];
> };
>
> @@ -489,6 +491,7 @@ void restore_processor_state(void);
> extern int register_pm_notifier(struct notifier_block *nb);
> extern int unregister_pm_notifier(struct notifier_block *nb);
> extern void ksys_sync_helper(void);
> +extern void pm_report_hw_sleep_time(u64 t);
>
> #define pm_notifier(fn, pri) { \
> static struct notifier_block fn##_nb = \
> @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct
> notifier_block *nb)
> return 0;
> }
>
> +static inline void pm_report_hw_sleep_time(u64 t) {};
> +
> static inline void ksys_sync_helper(void) {}
>
> #define pm_notifier(fn, pri) do { (void)(fn); } while (0)
> diff --git a/kernel/power/main.c b/kernel/power/main.c
> index 31ec4a9b9d70..ffd4dd43cbdd 100644
> --- a/kernel/power/main.c
> +++ b/kernel/power/main.c
> @@ -6,6 +6,7 @@
> * Copyright (c) 2003 Open Source Development Lab
> */
>
> +#include <linux/acpi.h>
> #include <linux/export.h>
> #include <linux/kobject.h>
> #include <linux/string.h>
> @@ -83,6 +84,18 @@ int unregister_pm_notifier(struct notifier_block *nb)
> }
> EXPORT_SYMBOL_GPL(unregister_pm_notifier);
>
> +void pm_report_hw_sleep_time(u64 t)
> +{
> + suspend_stats.last_hw_sleep = t;
> + if (suspend_stats.total_hw_sleep == -EOVERFLOW)
> + return;
> + if (suspend_stats.total_hw_sleep + t < suspend_stats.total_hw_sleep)
> + suspend_stats.total_hw_sleep = -EOVERFLOW;
> + else
> + suspend_stats.total_hw_sleep += t;
total_hw_sleep is u64. At microsecond granularity it will never realistically
overflow and isn't worth the check IMO. The overflow concern comes from the u32
hardware counter, but I don't think there's a good way to detect it.
You could just report the maximum hardware counter time as max_hw_sleep so users
know the value isn't reliable if suspended for longer than that.
David
> +}
> +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
> +
> int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long
> val_down)
> {
> int ret;
> @@ -377,6 +390,22 @@ static ssize_t last_failed_step_show(struct kobject
> *kobj,
> }
> static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
>
> +static ssize_t last_hw_sleep_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
> +}
> +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
> +
> +static ssize_t total_hw_sleep_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + if (suspend_stats.total_hw_sleep == -EOVERFLOW)
> + return suspend_stats.total_hw_sleep;
> + return sysfs_emit(buf, "%llu\n", suspend_stats.total_hw_sleep);
> +}
> +static struct kobj_attribute total_hw_sleep = __ATTR_RO(total_hw_sleep);
> +
> static struct attribute *suspend_attrs[] = {
> &success.attr,
> &fail.attr,
> @@ -391,12 +420,28 @@ static struct attribute *suspend_attrs[] = {
> &last_failed_dev.attr,
> &last_failed_errno.attr,
> &last_failed_step.attr,
> + &last_hw_sleep.attr,
> + &total_hw_sleep.attr,
> NULL,
> };
>
> +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute
> *attr, int idx)
> +{
> + if (attr != &last_hw_sleep.attr &&
> + attr != &total_hw_sleep.attr)
> + return 0444;
> +
> +#ifdef CONFIG_ACPI
> + if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
> + return 0444;
> +#endif
> + return 0;
> +}
> +
> static const struct attribute_group suspend_attr_group = {
> .name = "suspend_stats",
> .attrs = suspend_attrs,
> + .is_visible = suspend_attr_is_visible,
> };
>
> #ifdef CONFIG_DEBUG_FS
On Mon, 2023-04-03 at 16:18 -0500, Mario Limonciello wrote:
> intel_pmc_core displays a warning when the module parameter
> `warn_on_s0ix_failures` is set and a suspend didn't get to a HW sleep
> state.
>
> Report this to the standard kernel reporting infrastructure so that
> userspace software can query after the suspend cycle is done.
>
> Signed-off-by: Mario Limonciello <[email protected]>
> ---
> v5->v6:
> * Handle overflow case
> * Use renamed symbol
> v4->v5:
> * Reword commit message
> ---
> drivers/platform/x86/intel/pmc/core.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/drivers/platform/x86/intel/pmc/core.c
> b/drivers/platform/x86/intel/pmc/core.c
> index 925c5d676a43..0621756792c8 100644
> --- a/drivers/platform/x86/intel/pmc/core.c
> +++ b/drivers/platform/x86/intel/pmc/core.c
> @@ -1214,6 +1214,11 @@ static inline bool pmc_core_is_s0ix_failed(struct
> pmc_dev *pmcdev)
> if (pmc_core_dev_state_get(pmcdev, &s0ix_counter))
> return false;
>
> + if (s0ix_counter >= pmcdev->s0ix_counter)
> + pm_report_hw_sleep_time(s0ix_counter - pmcdev->s0ix_counter);
This would drop valid measurements when it's just the case that the counter has
overflowed but hasn't yet wrapped around to the previous value.
> + else
> + pm_report_hw_sleep_time(U64_MAX);
How about no if/else, just:
return (u32)(s0ix_counter - pmcdev->s0ix_counter);
David
> +
> if (s0ix_counter == pmcdev->s0ix_counter)
> return true;
>
Hi,
On 4/5/23 02:42, Box, David E wrote:
> Hi Mario,
>
> On Mon, 2023-04-03 at 16:18 -0500, Mario Limonciello wrote:
>> Userspace can't easily discover how much of a sleep cycle was spent in a
>> hardware sleep state without using kernel tracing and vendor specific sysfs
>> or debugfs files.
>>
>> To make this information more discoverable, introduce two new sysfs files:
>> 1) The time spent in a hw sleep state for last cycle.
>> 2) The time spent in a hw sleep state since the kernel booted
>> Both of these files will be present only if the system supports s2idle.
>>
>> Signed-off-by: Mario Limonciello <[email protected]>
>> ---
>> v5->v6:
>> * Add total attribute as well
>> * Change text for documentation
>> * Adjust flow of is_visible callback.
>> * If overflow was detected in total attribute return -EOVERFLOW
>> * Rename symbol
>> * Add stub for symbol for builds without CONFIG_PM_SLEEP
>> v4->v5:
>> * Provide time in microseconds instead of percent. Userspace can convert
>> this if desirable.
>> ---
>> Documentation/ABI/testing/sysfs-power | 27 ++++++++++++++++
>> include/linux/suspend.h | 5 +++
>> kernel/power/main.c | 45 +++++++++++++++++++++++++++
>> 3 files changed, 77 insertions(+)
>>
>> diff --git a/Documentation/ABI/testing/sysfs-power
>> b/Documentation/ABI/testing/sysfs-power
>> index f99d433ff311..37240575d282 100644
>> --- a/Documentation/ABI/testing/sysfs-power
>> +++ b/Documentation/ABI/testing/sysfs-power
>> @@ -413,6 +413,33 @@ Description:
>> The /sys/power/suspend_stats/last_failed_step file contains
>> the last failed step in the suspend/resume path.
>>
>> +What: /sys/power/suspend_stats/last_hw_sleep
>> +Date: June 2023
>> +Contact: Mario Limonciello <[email protected]>
>> +Description:
>> + The /sys/power/suspend_stats/last_hw_sleep file
>> + contains the duration of time spent in a hardware sleep
>> + state in the most recent system suspend-resume cycle.
>> + This number is measured in microseconds.
>> +
>> + NOTE: Limitations in the size of the hardware counters may
>> + cause this value to be inaccurate in longer sleep cycles.
>> +
>> +What: /sys/power/suspend_stats/total_hw_sleep
>> +Date: June 2023
>> +Contact: Mario Limonciello <[email protected]>
>> +Description:
>> + The /sys/power/suspend_stats/total_hw_sleep file
>> + contains the aggregate of time spent in a hardware sleep
>> + state since the kernel was booted. This number
>> + is measured in microseconds.
>> +
>> + NOTE: Limitations in the size of the hardware counters may
>> + cause this value to be inaccurate in longer sleep cycles.
>> +
>> + If an overflow has been detected this file will return
>> + -EOVERFLOW.
>> +
>> What: /sys/power/sync_on_suspend
>> Date: October 2019
>> Contact: Jonas Meurer <[email protected]>
>> diff --git a/include/linux/suspend.h b/include/linux/suspend.h
>> index cfe19a028918..069ef0c0ae57 100644
>> --- a/include/linux/suspend.h
>> +++ b/include/linux/suspend.h
>> @@ -68,6 +68,8 @@ struct suspend_stats {
>> int last_failed_errno;
>> int errno[REC_FAILED_NUM];
>> int last_failed_step;
>> + u64 last_hw_sleep;
>> + u64 total_hw_sleep;
>> enum suspend_stat_step failed_steps[REC_FAILED_NUM];
>> };
>>
>> @@ -489,6 +491,7 @@ void restore_processor_state(void);
>> extern int register_pm_notifier(struct notifier_block *nb);
>> extern int unregister_pm_notifier(struct notifier_block *nb);
>> extern void ksys_sync_helper(void);
>> +extern void pm_report_hw_sleep_time(u64 t);
>>
>> #define pm_notifier(fn, pri) { \
>> static struct notifier_block fn##_nb = \
>> @@ -526,6 +529,8 @@ static inline int unregister_pm_notifier(struct
>> notifier_block *nb)
>> return 0;
>> }
>>
>> +static inline void pm_report_hw_sleep_time(u64 t) {};
>> +
>> static inline void ksys_sync_helper(void) {}
>>
>> #define pm_notifier(fn, pri) do { (void)(fn); } while (0)
>> diff --git a/kernel/power/main.c b/kernel/power/main.c
>> index 31ec4a9b9d70..ffd4dd43cbdd 100644
>> --- a/kernel/power/main.c
>> +++ b/kernel/power/main.c
>> @@ -6,6 +6,7 @@
>> * Copyright (c) 2003 Open Source Development Lab
>> */
>>
>> +#include <linux/acpi.h>
>> #include <linux/export.h>
>> #include <linux/kobject.h>
>> #include <linux/string.h>
>> @@ -83,6 +84,18 @@ int unregister_pm_notifier(struct notifier_block *nb)
>> }
>> EXPORT_SYMBOL_GPL(unregister_pm_notifier);
>>
>> +void pm_report_hw_sleep_time(u64 t)
>> +{
>> + suspend_stats.last_hw_sleep = t;
>> + if (suspend_stats.total_hw_sleep == -EOVERFLOW)
>> + return;
>> + if (suspend_stats.total_hw_sleep + t < suspend_stats.total_hw_sleep)
>> + suspend_stats.total_hw_sleep = -EOVERFLOW;
>> + else
>> + suspend_stats.total_hw_sleep += t;
>
> total_hw_sleep is u64. At microsecond granularity it will never realistically
> overflow and isn't worth the check IMO. The overflow concern comes from the u32
> hardware counter, but I don't think there's a good way to detect it.
>
> You could just report the maximum hardware counter time as max_hw_sleep so users
> know the value isn't reliable if suspended for longer than that.
Yes I think we need to let userspace know the longest hw-sleep time
the hw can reliably record and then userspace can (and should) chose
to not use/check the counters when the total suspend time is bigger
then the max hw-sleep time.
Other then that this patch-set seems like a good idea to me
and I have no objections / remarks on the pdx86 bits.
Regards,
Hans
>> +}
>> +EXPORT_SYMBOL_GPL(pm_report_hw_sleep_time);
>> +
>> int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long
>> val_down)
>> {
>> int ret;
>> @@ -377,6 +390,22 @@ static ssize_t last_failed_step_show(struct kobject
>> *kobj,
>> }
>> static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);
>>
>> +static ssize_t last_hw_sleep_show(struct kobject *kobj,
>> + struct kobj_attribute *attr, char *buf)
>> +{
>> + return sysfs_emit(buf, "%llu\n", suspend_stats.last_hw_sleep);
>> +}
>> +static struct kobj_attribute last_hw_sleep = __ATTR_RO(last_hw_sleep);
>> +
>> +static ssize_t total_hw_sleep_show(struct kobject *kobj,
>> + struct kobj_attribute *attr, char *buf)
>> +{
>> + if (suspend_stats.total_hw_sleep == -EOVERFLOW)
>> + return suspend_stats.total_hw_sleep;
>> + return sysfs_emit(buf, "%llu\n", suspend_stats.total_hw_sleep);
>> +}
>> +static struct kobj_attribute total_hw_sleep = __ATTR_RO(total_hw_sleep);
>> +
>> static struct attribute *suspend_attrs[] = {
>> &success.attr,
>> &fail.attr,
>> @@ -391,12 +420,28 @@ static struct attribute *suspend_attrs[] = {
>> &last_failed_dev.attr,
>> &last_failed_errno.attr,
>> &last_failed_step.attr,
>> + &last_hw_sleep.attr,
>> + &total_hw_sleep.attr,
>> NULL,
>> };
>>
>> +static umode_t suspend_attr_is_visible(struct kobject *kobj, struct attribute
>> *attr, int idx)
>> +{
>> + if (attr != &last_hw_sleep.attr &&
>> + attr != &total_hw_sleep.attr)
>> + return 0444;
>> +
>> +#ifdef CONFIG_ACPI
>> + if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)
>> + return 0444;
>> +#endif
>> + return 0;
>> +}
>> +
>> static const struct attribute_group suspend_attr_group = {
>> .name = "suspend_stats",
>> .attrs = suspend_attrs,
>> + .is_visible = suspend_attr_is_visible,
>> };
>>
>> #ifdef CONFIG_DEBUG_FS