Hi!
I am proposing a patch in which I want to return the errno code ETIME
instead of EBUSY in enter_state() in the kernel suspend flow. Currently,
EBUSY is returned when an imminent alarm is pending which is checked in
alarmtimer_suspend() in alarmtimer.c. The proposed patch series moves the
check to enter_state() in suspend.c to catch a potential suspend failure
early in the suspend flow. I want to replace EBUSY with ETIME to make it
more diagnosable in userspace, and may be more appropriate considering a
timer is about to expire.
I am reaching out to get an opinion from the
suspend maintainers if this would act as any potential risk in the suspend
flow which only has EBUSY, EAGAIN, and EINVAL as return error codes
currently. This has been developed as part of a patch series, and only the
patch of interest is below this message. Any feedback or insights would be
greatly appreciated.
Thank you,
Pranav Prasad
The alarmtimer driver currently fails suspend attempts when there is an
alarm pending within the next suspend_check_duration_ns nanoseconds, since
the system is expected to wake up soon anyway. The entire suspend process
is initiated even though the system will immediately awaken. This process
includes substantial work before the suspend fails and additional work
afterwards to undo the failed suspend that was attempted. Therefore on
battery-powered devices that initiate suspend attempts from userspace, it
may be advantageous to be able to fail the suspend earlier in the suspend
flow to avoid power consumption instead of unnecessarily doing extra work.
As one data point, an analysis of a subset of Android devices showed that
imminent alarms account for roughly 40% of all suspend failures on average
leading to unnecessary power wastage.
To facilitate this, expose
function time_check_suspend_fail() from alarmtimer to be used by the power
subsystem to perform the check earlier in the suspend flow. Perform the
check in enter_state() and return early if an alarm is to be fired in the
next suspend_check_duration_ns nanoseconds, failing suspend.
Signed-off-by: Pranav Prasad <[email protected]>
Signed-off-by: Kelly Rossmoyer <[email protected]>
---
include/linux/time.h | 1 +
kernel/power/suspend.c | 3 ++
kernel/time/alarmtimer.c | 113 ++++++++++++++++++++++++++++-----------
3 files changed, 87 insertions(+), 30 deletions(-)
diff --git a/include/linux/time.h b/include/linux/time.h
index 16cf4522d6f3..aab7c4e51e11 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -56,6 +56,7 @@ struct tm {
};
void time64_to_tm(time64_t totalsecs, int offset, struct tm *result);
+int time_check_suspend_fail(void);
# include <linux/time32.h>
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index fa3bf161d13f..7a0175dae0d9 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -26,6 +26,7 @@
#include <linux/suspend.h>
#include <linux/syscore_ops.h>
#include <linux/swait.h>
+#include <linux/time.h>
#include <linux/ftrace.h>
#include <trace/events/power.h>
#include <linux/compiler.h>
@@ -564,6 +565,8 @@ static int enter_state(suspend_state_t state)
#endif
} else if (!valid_state(state)) {
return -EINVAL;
+ } else if (time_check_suspend_fail()) {
+ return -ETIME;
}
if (!mutex_trylock(&system_transition_mutex))
return -EBUSY;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index e5d2e560b4c1..085b1ace0c31 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -115,6 +115,84 @@ static int alarmtimer_sysfs_add(void)
return ret;
}
+/**
+ * alarmtimer_init_soonest - Initializes parameters to find soonest alarm.
+ * @min: ptr to relative time to the soonest alarm to expire
+ * @expires: ptr to absolute time of the soonest alarm to expire
+ * @type: ptr to alarm type
+ *
+ */
+static void alarmtimer_init_soonest(ktime_t *min, ktime_t *expires, int *type)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&freezer_delta_lock, flags);
+ *min = freezer_delta;
+ *expires = freezer_expires;
+ *type = freezer_alarmtype;
+ freezer_delta = 0;
+ spin_unlock_irqrestore(&freezer_delta_lock, flags);
+}
+
+/**
+ * alarmtimer_get_soonest - Finds the soonest alarm to expire among the alarm bases.
+ * @min: ptr to relative time to the soonest alarm to expire
+ * @expires: ptr to absolute time of the soonest alarm to expire
+ * @type: ptr to alarm type
+ *
+ */
+static void alarmtimer_get_soonest(ktime_t *min, ktime_t *expires, int *type)
+{
+ int i;
+ unsigned long flags;
+
+ /* Find the soonest timer to expire */
+ for (i = 0; i < ALARM_NUMTYPE; i++) {
+ struct alarm_base *base = &alarm_bases[i];
+ struct timerqueue_node *next;
+ ktime_t delta;
+
+ spin_lock_irqsave(&base->lock, flags);
+ next = timerqueue_getnext(&base->timerqueue);
+ spin_unlock_irqrestore(&base->lock, flags);
+ if (!next)
+ continue;
+ delta = ktime_sub(next->expires, base->get_ktime());
+ if (!(*min) || (delta < *min)) {
+ *expires = next->expires;
+ *min = delta;
+ *type = i;
+ }
+ }
+}
+
+/**
+ * time_check_suspend_fail - Check if suspend should be failed due to an
+ * alarm within the next suspend_check_duration nanoseconds.
+ *
+ * Returns error if suspend should be failed, else returns 0.
+ */
+int time_check_suspend_fail(void)
+{
+ ktime_t min, expires;
+ int type;
+
+ /* Initialize parameters to find soonest timer */
+ alarmtimer_init_soonest(&min, &expires, &type);
+
+ /* Find the soonest timer to expire */
+ alarmtimer_get_soonest(&min, &expires, &type);
+
+ if (min == 0)
+ return 0;
+
+ if (ktime_to_ns(min) < suspend_check_duration_ns)
+ return -EBUSY;
+
+ return 0;
+}
+EXPORT_SYMBOL(time_check_suspend_fail);
+
/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
@@ -296,49 +374,24 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
static int alarmtimer_suspend(struct device *dev)
{
ktime_t min, now, expires;
- int i, ret, type;
+ int ret, type;
struct rtc_device *rtc;
- unsigned long flags;
struct rtc_time tm;
- spin_lock_irqsave(&freezer_delta_lock, flags);
- min = freezer_delta;
- expires = freezer_expires;
- type = freezer_alarmtype;
- freezer_delta = 0;
- spin_unlock_irqrestore(&freezer_delta_lock, flags);
+ /* Initialize parameters to find soonest timer */
+ alarmtimer_init_soonest(&min, &expires, &type);
rtc = alarmtimer_get_rtcdev();
/* If we have no rtcdev, just return */
if (!rtc)
return 0;
- /* Find the soonest timer to expire*/
- for (i = 0; i < ALARM_NUMTYPE; i++) {
- struct alarm_base *base = &alarm_bases[i];
- struct timerqueue_node *next;
- ktime_t delta;
+ /* Find the soonest timer to expire */
+ alarmtimer_get_soonest(&min, &expires, &type);
- spin_lock_irqsave(&base->lock, flags);
- next = timerqueue_getnext(&base->timerqueue);
- spin_unlock_irqrestore(&base->lock, flags);
- if (!next)
- continue;
- delta = ktime_sub(next->expires, base->get_ktime());
- if (!min || (delta < min)) {
- expires = next->expires;
- min = delta;
- type = i;
- }
- }
if (min == 0)
return 0;
- if (ktime_to_ns(min) < suspend_check_duration_ns) {
- pm_wakeup_event(dev, suspend_check_duration_ns/NSEC_PER_MSEC);
- return -EBUSY;
- }
-
trace_alarmtimer_suspend(expires, type);
/* Setup an rtc timer to fire that far in the future */
--
2.43.0.429.g432eaa2c6b-goog
On Wed, Jan 31, 2024 at 8:13 PM Pranav Prasad <[email protected]> wrote:
>
> Hi!
>
> I am proposing a patch in which I want to return the errno code ETIME
> instead of EBUSY in enter_state() in the kernel suspend flow. Currently,
> EBUSY is returned when an imminent alarm is pending which is checked in
> alarmtimer_suspend() in alarmtimer.c. The proposed patch series moves the
> check to enter_state() in suspend.c to catch a potential suspend failure
> early in the suspend flow. I want to replace EBUSY with ETIME to make it
> more diagnosable in userspace, and may be more appropriate considering a
> timer is about to expire.
>
> I am reaching out to get an opinion from the
> suspend maintainers if this would act as any potential risk in the suspend
> flow which only has EBUSY, EAGAIN, and EINVAL as return error codes
> currently. This has been developed as part of a patch series, and only the
> patch of interest is below this message. Any feedback or insights would be
> greatly appreciated.
>
> Thank you,
> Pranav Prasad
>
> The alarmtimer driver currently fails suspend attempts when there is an
> alarm pending within the next suspend_check_duration_ns nanoseconds, since
> the system is expected to wake up soon anyway. The entire suspend process
> is initiated even though the system will immediately awaken. This process
> includes substantial work before the suspend fails and additional work
> afterwards to undo the failed suspend that was attempted. Therefore on
> battery-powered devices that initiate suspend attempts from userspace, it
> may be advantageous to be able to fail the suspend earlier in the suspend
> flow to avoid power consumption instead of unnecessarily doing extra work.
> As one data point, an analysis of a subset of Android devices showed that
> imminent alarms account for roughly 40% of all suspend failures on average
> leading to unnecessary power wastage.
>
> To facilitate this, expose
> function time_check_suspend_fail() from alarmtimer to be used by the power
> subsystem to perform the check earlier in the suspend flow. Perform the
> check in enter_state() and return early if an alarm is to be fired in the
> next suspend_check_duration_ns nanoseconds, failing suspend.
>
> Signed-off-by: Pranav Prasad <[email protected]>
> Signed-off-by: Kelly Rossmoyer <[email protected]>
> ---
> include/linux/time.h | 1 +
> kernel/power/suspend.c | 3 ++
> kernel/time/alarmtimer.c | 113 ++++++++++++++++++++++++++++-----------
> 3 files changed, 87 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/time.h b/include/linux/time.h
> index 16cf4522d6f3..aab7c4e51e11 100644
> --- a/include/linux/time.h
> +++ b/include/linux/time.h
> @@ -56,6 +56,7 @@ struct tm {
> };
>
> void time64_to_tm(time64_t totalsecs, int offset, struct tm *result);
> +int time_check_suspend_fail(void);
>
> # include <linux/time32.h>
>
> diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
> index fa3bf161d13f..7a0175dae0d9 100644
> --- a/kernel/power/suspend.c
> +++ b/kernel/power/suspend.c
> @@ -26,6 +26,7 @@
> #include <linux/suspend.h>
> #include <linux/syscore_ops.h>
> #include <linux/swait.h>
> +#include <linux/time.h>
> #include <linux/ftrace.h>
> #include <trace/events/power.h>
> #include <linux/compiler.h>
> @@ -564,6 +565,8 @@ static int enter_state(suspend_state_t state)
> #endif
> } else if (!valid_state(state)) {
> return -EINVAL;
> + } else if (time_check_suspend_fail()) {
> + return -ETIME;
This causes a function defined in modular code to be called from
non-modular code which is an obvious mistake.
It also makes the generic suspend code call a function defined in a
random driver, which is a total no-go as far as I am concerned.
Why don't you instead define a PM notifier in the alarmtimer driver
and check if it is going to trigger shortly from there? PM notifiers
run before the tasks freezer, so there would be a little difference
timing-wise and you can return whatever error code you like from
there. As an additional benefit, you'd be able to handle hibernation
in the same way.
Thanks!
On Wed, Jan 31 2024 at 19:13, Pranav Prasad wrote:
> Hi!
>
> I am proposing a patch in which I want to return the errno code ETIME
> instead of EBUSY in enter_state() in the kernel suspend flow. Currently,
> EBUSY is returned when an imminent alarm is pending which is checked in
> alarmtimer_suspend() in alarmtimer.c. The proposed patch series moves the
> check to enter_state() in suspend.c to catch a potential suspend failure
> early in the suspend flow. I want to replace EBUSY with ETIME to make it
> more diagnosable in userspace, and may be more appropriate considering a
> timer is about to expire.
>
> I am reaching out to get an opinion from the
> suspend maintainers if this would act as any potential risk in the suspend
> flow which only has EBUSY, EAGAIN, and EINVAL as return error codes
> currently. This has been developed as part of a patch series, and only the
> patch of interest is below this message. Any feedback or insights would be
> greatly appreciated.
>
> Thank you,
> Pranav Prasad
Can you please use a cover letter instead of putting random stuff into
the changelong?
> The alarmtimer driver currently fails suspend attempts when there is an
> alarm pending within the next suspend_check_duration_ns nanoseconds, since
> the system is expected to wake up soon anyway. The entire suspend process
> is initiated even though the system will immediately awaken. This process
> includes substantial work before the suspend fails and additional work
> afterwards to undo the failed suspend that was attempted. Therefore on
> battery-powered devices that initiate suspend attempts from userspace, it
> may be advantageous to be able to fail the suspend earlier in the suspend
> flow to avoid power consumption instead of unnecessarily doing extra work.
> As one data point, an analysis of a subset of Android devices showed that
> imminent alarms account for roughly 40% of all suspend failures on average
> leading to unnecessary power wastage.
>
> To facilitate this, expose
> function time_check_suspend_fail() from alarmtimer to be used by the power
> subsystem to perform the check earlier in the suspend flow. Perform the
> check in enter_state() and return early if an alarm is to be fired in the
> next suspend_check_duration_ns nanoseconds, failing suspend.
>
> Signed-off-by: Pranav Prasad <[email protected]>
> Signed-off-by: Kelly Rossmoyer <[email protected]>
This Signed-off-by chain is bogus.
> +/**
> + * alarmtimer_init_soonest - Initializes parameters to find soonest alarm.
> + * @min: ptr to relative time to the soonest alarm to expire
> + * @expires: ptr to absolute time of the soonest alarm to expire
> + * @type: ptr to alarm type
> + *
> + */
> +static void alarmtimer_init_soonest(ktime_t *min, ktime_t *expires, int *type)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&freezer_delta_lock, flags);
> + *min = freezer_delta;
> + *expires = freezer_expires;
> + *type = freezer_alarmtype;
> + freezer_delta = 0;
> + spin_unlock_irqrestore(&freezer_delta_lock, flags);
> +}
> +
> +/**
> + * alarmtimer_get_soonest - Finds the soonest alarm to expire among the alarm bases.
> + * @min: ptr to relative time to the soonest alarm to expire
> + * @expires: ptr to absolute time of the soonest alarm to expire
> + * @type: ptr to alarm type
> + *
> + */
> +static void alarmtimer_get_soonest(ktime_t *min, ktime_t *expires, int *type)
> +{
> + int i;
> + unsigned long flags;
https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations
Aside of that 'flags' wants to be in the loop scope.
> +
> + /* Find the soonest timer to expire */
> + for (i = 0; i < ALARM_NUMTYPE; i++) {
> + struct alarm_base *base = &alarm_bases[i];
> + struct timerqueue_node *next;
> + ktime_t delta;
> +
> + spin_lock_irqsave(&base->lock, flags);
> + next = timerqueue_getnext(&base->timerqueue);
> + spin_unlock_irqrestore(&base->lock, flags);
> + if (!next)
> + continue;
> + delta = ktime_sub(next->expires, base->get_ktime());
> + if (!(*min) || (delta < *min)) {
The inner brackets are pointless
> + *expires = next->expires;
> + *min = delta;
> + *type = i;
> + }
> + }
> +}
> +
> +/**
> + * time_check_suspend_fail - Check if suspend should be failed due to an
> + * alarm within the next suspend_check_duration nanoseconds.
> + *
> + * Returns error if suspend should be failed, else returns 0.
> + */
> +int time_check_suspend_fail(void)
> +{
> + ktime_t min, expires;
> + int type;
Why is this unconditional and not checking RTC dev?
> + /* Initialize parameters to find soonest timer */
> + alarmtimer_init_soonest(&min, &expires, &type);
How does that make sense? That function evaluates the freezer state, but
there is nothing frozen when this is invoked.
> + /* Find the soonest timer to expire */
> + alarmtimer_get_soonest(&min, &expires, &type);
> +
> + if (min == 0)
> + return 0;
> +
> + if (ktime_to_ns(min) < suspend_check_duration_ns)
> + return -EBUSY;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(time_check_suspend_fail);
What is this export for?
> +
> /**
> * alarmtimer_get_rtcdev - Return selected rtcdevice
> *
> @@ -296,49 +374,24 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
> static int alarmtimer_suspend(struct device *dev)
> {
..
> + /* Initialize parameters to find soonest timer */
> + alarmtimer_init_soonest(&min, &expires, &type);
This wants to be _after_ the RTC dev check, no?
> rtc = alarmtimer_get_rtcdev();
> /* If we have no rtcdev, just return */
> if (!rtc)
> return 0;
>
> + /* Find the soonest timer to expire */
> + alarmtimer_get_soonest(&min, &expires, &type);
>
> - if (ktime_to_ns(min) < suspend_check_duration_ns) {
> - pm_wakeup_event(dev, suspend_check_duration_ns/NSEC_PER_MSEC);
What injects the pm_wakeup_event after this change?
Thanks,
tglx
On Wed, Jan 31 2024 at 21:10, Rafael J. Wysocki wrote:
> On Wed, Jan 31, 2024 at 8:13 PM Pranav Prasad <[email protected]> wrote:
>> @@ -564,6 +565,8 @@ static int enter_state(suspend_state_t state)
>> #endif
>> } else if (!valid_state(state)) {
>> return -EINVAL;
>> + } else if (time_check_suspend_fail()) {
>> + return -ETIME;
>
> This causes a function defined in modular code to be called from
> non-modular code which is an obvious mistake.
>
> It also makes the generic suspend code call a function defined in a
> random driver, which is a total no-go as far as I am concerned.
Alarmtimers is built-in core infrastructure and not a random modular
driver, but nevertheless:
> Why don't you instead define a PM notifier in the alarmtimer driver
> and check if it is going to trigger shortly from there? PM notifiers
> run before the tasks freezer, so there would be a little difference
> timing-wise and you can return whatever error code you like from
> there. As an additional benefit, you'd be able to handle hibernation
> in the same way.
Makes sense.
Thanks,
tglx