Hi!
During the driver suspend phase of kernel suspend, alarmtimer's suspend
callback is invoked and it identifies the earliest next wakeup alarm and
programs that into the HW real time clock (RTC). However, there is an
exception to this process. If the next alarm is within the next 2 seconds,
the alarmtimer driver fails to suspend. In this case, a non-trivial amount
of power is spent to freeze and unfreeze all userspace processes and to
suspend and resume a number of devices. In the vast majority of cases, the
imminent alarm that caused the failure was likely already scheduled before
suspend even started. This provides an opportunity to reduce power
consumption if the suspend failure decision is made earlier in the suspend
flow, before the unnecessary extra work is done.
This patch series aims to achieve a kernel suspend flow in which the check
for an imminent alarm is performed early during the suspend prepare phase.
Changes in v3 from v2:
- Reversed the order of patches
- Formatted variable declarations
- Moved the RTC device check out of alarmtimer_get_soonest()
- Reinstated the check for pending alarm in alarmtimer_suspend()
Changes in v2 from v1:
- Moved the pm_wakeup_event call to the PM notifier
- Added a check for RTC device in the PM notifier
Pranav Prasad (2):
alarmtimer: Add PM notifier to check early for imminent alarm
alarmtimer: Create sysfs to make alarm check window configurable
kernel/time/alarmtimer.c | 164 +++++++++++++++++++++++++++++++++------
1 file changed, 140 insertions(+), 24 deletions(-)
--
2.43.0.687.g38aa6559b0-goog
Currently, the alarmtimer_suspend does not allow the kernel
to suspend if the next alarm is within 2 seconds. Instead, a configurable
value for the window to check the next pending alarm allows developers
to change the value according to their specific device and use case.
As an example, on Android devices if the value is changed to the short
suspend threshold used in Android's SuspendControlService, it is expected
to lower power consumption during kernel suspend.
The value to be configured needs to be found experimentally based on the
device and use case. The current value of 2 seconds will continue to be
used as the default if no changes are made. If the value is configured to
be very small, there may be a pending alarm that would wake the system up
right after the successful kernel suspend, which would devalue the
benefits of having the check. If the value is configured to be very large,
the kernel suspend would be aborted regularly which would cause to lose
the benefits of kernel suspension in general. Therefore, the value
should not lean too much in either direction.
To facilitate this, create alarmtimer sysfs to make the value of
2 seconds configurable.
Signed-off-by: Pranav Prasad <[email protected]>
---
kernel/time/alarmtimer.c | 70 ++++++++++++++++++++++++++++++++++++----
1 file changed, 64 insertions(+), 6 deletions(-)
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 366ca3568f87..d11f8cb945c2 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -34,6 +34,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/alarmtimer.h>
+static const char alarmtimer_group_name[] = "alarmtimer";
+
/**
* struct alarm_base - Alarm timer bases
* @lock: Lock for syncrhonized access to the base
@@ -64,6 +66,56 @@ static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
+/* Duration to check for soonest alarm during kernel suspend */
+static unsigned long suspend_alarm_pending_window_ms = 2 * MSEC_PER_SEC;
+
+static ssize_t suspend_alarm_pending_window_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lu\n", suspend_alarm_pending_window_ms);
+}
+
+static ssize_t suspend_alarm_pending_window_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t n)
+{
+ unsigned long val;
+
+ if (kstrtoul(buf, 10, &val))
+ return -EINVAL;
+
+ suspend_alarm_pending_window_ms = val;
+
+ return n;
+}
+
+static struct kobj_attribute suspend_alarm_pending_window =
+ __ATTR_RW(suspend_alarm_pending_window);
+
+static struct attribute *alarmtimer_attrs[] = {
+ &suspend_alarm_pending_window.attr,
+ NULL,
+};
+
+static const struct attribute_group alarmtimer_attr_group = {
+ .name = alarmtimer_group_name,
+ .attrs = alarmtimer_attrs,
+};
+
+/**
+ * alarmtimer_sysfs_add - Adds sysfs attributes for alarmtimer
+ *
+ * Returns 0 if successful, non-zero value for error.
+ */
+static int alarmtimer_sysfs_add(void)
+{
+ int ret = sysfs_create_group(kernel_kobj, &alarmtimer_attr_group);
+
+ if (ret)
+ pr_warn("[%s] failed to create a sysfs group\n", __func__);
+
+ return ret;
+}
+
/**
* alarmtimer_get_soonest - Finds the soonest alarm to expire among the
* alarm bases.
@@ -105,7 +157,7 @@ static bool alarmtimer_get_soonest(ktime_t *min,
}
static int alarmtimer_pm_callback(struct notifier_block *nb,
- unsigned long mode, void *_unused)
+ unsigned long mode, void *_unused)
{
struct rtc_device *rtc;
ktime_t min, expires;
@@ -122,9 +174,11 @@ static int alarmtimer_pm_callback(struct notifier_block *nb,
if (!alarmtimer_get_soonest(&min, &expires, &type))
return NOTIFY_DONE;
- if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ if (ktime_to_ns(min) <
+ suspend_alarm_pending_window_ms * NSEC_PER_MSEC) {
pr_debug("Suspend abort due to imminent alarm\n");
- pm_wakeup_event(&rtc->dev, 2 * MSEC_PER_SEC);
+ pm_wakeup_event(&rtc->dev,
+ suspend_alarm_pending_window_ms);
return notifier_from_errno(-ETIME);
}
}
@@ -171,8 +225,11 @@ static int alarmtimer_rtc_add_device(struct device *dev)
pdev = platform_device_register_data(dev, "alarmtimer",
PLATFORM_DEVID_AUTO, NULL, 0);
- if (!IS_ERR(pdev))
+ if (!IS_ERR(pdev)) {
device_init_wakeup(&pdev->dev, true);
+ if (alarmtimer_sysfs_add())
+ pr_warn("[%s] Failed to add alarmtimer sysfs attributes\n", __func__);
+ }
spin_lock_irqsave(&rtcdev_lock, flags);
if (!IS_ERR(pdev) && !rtcdev) {
@@ -336,9 +393,10 @@ static int alarmtimer_suspend(struct device *dev)
if (!alarmtimer_get_soonest(&min, &expires, &type))
return 0;
- if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ if (ktime_to_ns(min) <
+ suspend_alarm_pending_window_ms * NSEC_PER_MSEC) {
pr_debug("Suspend abort due to imminent alarm\n");
- pm_wakeup_event(dev, 2 * MSEC_PER_SEC);
+ pm_wakeup_event(dev, suspend_alarm_pending_window_ms);
return -ETIME;
}
--
2.43.0.687.g38aa6559b0-goog
The alarmtimer driver currently fails suspend attempts when there is an
alarm pending within the next 2 seconds, since the system is expected to
wake up soon anyway. The entire suspend process is initiated even though
the system will immediately awaken. This process includes substantial work
before the suspend fails and additional work afterwards to undo the failed
suspend that was attempted. Therefore on battery-powered devices that
initiate suspend attempts from userspace, it may be advantageous to be
able to fail the suspend earlier in the suspend flow to avoid power
consumption instead of unnecessarily doing extra work. As one data point,
an analysis of a subset of Android devices showed that imminent alarms
account for roughly 40% of all suspend failures on average leading to
unnecessary power wastage.
To facilitate this, register a PM notifier in the alarmtimer subsystem
that checks if an alarm is imminent during the prepare stage of kernel
suspend denoted by the event PM_SUSPEND_PREPARE. If an alarm is imminent,
it returns the errno code ETIME instead of EBUSY to userspace in order to
make it easily diagnosable.
Signed-off-by: Pranav Prasad <[email protected]>
---
kernel/time/alarmtimer.c | 100 +++++++++++++++++++++++++++++++--------
1 file changed, 79 insertions(+), 21 deletions(-)
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 4657cb8e8b1f..366ca3568f87 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -27,6 +27,7 @@
#include <linux/compat.h>
#include <linux/module.h>
#include <linux/time_namespace.h>
+#include <linux/suspend.h>
#include "posix-timers.h"
@@ -63,6 +64,78 @@ static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);
+/**
+ * alarmtimer_get_soonest - Finds the soonest alarm to expire among the
+ * alarm bases.
+ * @min: ptr to relative time to the soonest alarm to expire
+ * @expires: ptr to absolute time of the soonest alarm to expire
+ * @type: ptr to alarm type
+ *
+ * Returns true if soonest alarm was found, returns false if don't care.
+ */
+static bool alarmtimer_get_soonest(ktime_t *min,
+ ktime_t *expires, int *type)
+{
+ unsigned long flags;
+ int i;
+
+ /* Find the soonest timer to expire */
+ for (i = 0; i < ALARM_NUMTYPE; i++) {
+ struct alarm_base *base = &alarm_bases[i];
+ struct timerqueue_node *next;
+ ktime_t delta;
+
+ spin_lock_irqsave(&base->lock, flags);
+ next = timerqueue_getnext(&base->timerqueue);
+ spin_unlock_irqrestore(&base->lock, flags);
+ if (!next)
+ continue;
+ delta = ktime_sub(next->expires, base->get_ktime());
+ if (*min == 0 || delta < *min) {
+ *expires = next->expires;
+ *min = delta;
+ *type = i;
+ }
+ }
+
+ if (*min == 0)
+ return false;
+
+ return true;
+}
+
+static int alarmtimer_pm_callback(struct notifier_block *nb,
+ unsigned long mode, void *_unused)
+{
+ struct rtc_device *rtc;
+ ktime_t min, expires;
+ int type;
+
+ switch (mode) {
+ case PM_SUSPEND_PREPARE:
+ rtc = alarmtimer_get_rtcdev();
+ /* If we have no rtcdev, just return */
+ if (!rtc)
+ return NOTIFY_DONE;
+
+ /* Find the soonest timer to expire */
+ if (!alarmtimer_get_soonest(&min, &expires, &type))
+ return NOTIFY_DONE;
+
+ if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ pr_debug("Suspend abort due to imminent alarm\n");
+ pm_wakeup_event(&rtc->dev, 2 * MSEC_PER_SEC);
+ return notifier_from_errno(-ETIME);
+ }
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block alarmtimer_pm_notifier = {
+ .notifier_call = alarmtimer_pm_callback,
+};
+
/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
@@ -126,6 +199,7 @@ static int alarmtimer_rtc_add_device(struct device *dev)
static inline void alarmtimer_rtc_timer_init(void)
{
rtc_timer_init(&rtctimer, NULL, NULL);
+ register_pm_notifier(&alarmtimer_pm_notifier);
}
static struct class_interface alarmtimer_rtc_interface = {
@@ -241,10 +315,10 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
static int alarmtimer_suspend(struct device *dev)
{
ktime_t min, now, expires;
- int i, ret, type;
struct rtc_device *rtc;
unsigned long flags;
struct rtc_time tm;
+ int ret, type;
spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
@@ -258,30 +332,14 @@ static int alarmtimer_suspend(struct device *dev)
if (!rtc)
return 0;
- /* Find the soonest timer to expire*/
- for (i = 0; i < ALARM_NUMTYPE; i++) {
- struct alarm_base *base = &alarm_bases[i];
- struct timerqueue_node *next;
- ktime_t delta;
-
- spin_lock_irqsave(&base->lock, flags);
- next = timerqueue_getnext(&base->timerqueue);
- spin_unlock_irqrestore(&base->lock, flags);
- if (!next)
- continue;
- delta = ktime_sub(next->expires, base->get_ktime());
- if (!min || (delta < min)) {
- expires = next->expires;
- min = delta;
- type = i;
- }
- }
- if (min == 0)
+ /* Find the soonest timer to expire */
+ if (!alarmtimer_get_soonest(&min, &expires, &type))
return 0;
if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ pr_debug("Suspend abort due to imminent alarm\n");
pm_wakeup_event(dev, 2 * MSEC_PER_SEC);
- return -EBUSY;
+ return -ETIME;
}
trace_alarmtimer_suspend(expires, type);
--
2.43.0.687.g38aa6559b0-goog
On Wed, Feb 14 2024 at 09:29, Pranav Prasad wrote:
> +static int alarmtimer_pm_callback(struct notifier_block *nb,
> + unsigned long mode, void *_unused)
> +{
> + struct rtc_device *rtc;
> + ktime_t min, expires;
> + int type;
> +
> + switch (mode) {
> + case PM_SUSPEND_PREPARE:
> + rtc = alarmtimer_get_rtcdev();
> + /* If we have no rtcdev, just return */
> + if (!rtc)
> + return NOTIFY_DONE;
> +
> + /* Find the soonest timer to expire */
> + if (!alarmtimer_get_soonest(&min, &expires, &type))
> + return NOTIFY_DONE;
Brilliant. Instead of a NULL pointer dereference you decided to add
undefined behaviour this time.
As it survived your "testing" it is obviously correct, right?
I'm tired of your approach to throw stuff at the wall in a hurry and see
what sticks.
Stop this frenzy. Sit down, take your time and do proper engineering
before coming back with this to me.
Thanks,
tglx