2024-01-18 18:15:19

by Pranav Prasad

[permalink] [raw]
Subject: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs

The alarmtimer driver currently fails suspend attempts when there is an
alarm pending within the next 2 seconds, since the system is expected to wake
up soon anyway. The entire suspend process is initiated even though the
system will immediately awaken. This process includes substantial work before
the suspend fails and additional work afterwards to undo the failed suspend
that was attempted. Therefore on battery-powered devices that initiate suspend
attempts from userspace, it may be advantageous to be able to skip the entire
suspend attempt to avoid power consumption instead of unnecessarily trying and
failing. As one data point, an analysis of a subset of Android devices showed that
imminent alarms account for roughly 40% of all suspend failures on average leading
to unnecessary power wastage.

To facilitate this, create a sysfs node for the alarmtimer subsystem to
provide information about the next coming alarm to userspace. When userspace code
reads the sysfs node, the same code flow already used in suspend will run to
provide a value for the time until the next alarm. Userspace code may then opt to
avoid suspend attempts if the next alarm pending is considered "too soon" for
the suspend to be worthwhile.

This mechanism has some limitations that the value readers must take into
account. First, due to any latencies between the calculation of the value and the
reader actually acting upon it (copying the data out to userspace, scheduling
delays, etc), the value will already be "obsolete" by the time the reader can act
upon it. Second, since alarms can be scheduled and canceled at any time, there
is no guarantee that the specific alarm that was "next" during the read
still exists, or that an earlier alarm hasn't been scheduled since the read.
Since no guarantee is provided about the value that was read remaining correct
by the time the reader uses it, consumers should only treat this value as a
hint to influence power optimization decisions rather than as a reliable
prediction of future events.

Signed-off-by: Pranav Prasad <[email protected]>
Signed-off-by: Kelly Rossmoyer <[email protected]>
---
kernel/time/alarmtimer.c | 131 +++++++++++++++++++++++++++++++--------
1 file changed, 105 insertions(+), 26 deletions(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 4657cb8e8b1f..80d576b8c2d7 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -27,12 +27,15 @@
#include <linux/compat.h>
#include <linux/module.h>
#include <linux/time_namespace.h>
+#include <linux/sysfs.h>

#include "posix-timers.h"

#define CREATE_TRACE_POINTS
#include <trace/events/alarmtimer.h>

+static const char alarmtimer_group_name[] = "alarmtimer";
+
/**
* struct alarm_base - Alarm timer bases
* @lock: Lock for syncrhonized access to the base
@@ -63,6 +66,99 @@ static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
static DEFINE_SPINLOCK(rtcdev_lock);

+/**
+ * alarmtimer_init_soonest - Initializes parameters to find soonest alarm.
+ * @min: ptr to relative time to the soonest alarm to expire
+ * @expires: ptr to absolute time of the soonest alarm to expire
+ * @type: ptr to alarm type
+ * @reset_freezer_delta: boolean to indicate if freezer_delta should be reset to 0
+ *
+ */
+static void alarmtimer_init_soonest(ktime_t *min, ktime_t *expires, int *type,
+ bool reset_freezer_delta)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&freezer_delta_lock, flags);
+ *min = freezer_delta;
+ *expires = freezer_expires;
+ *type = freezer_alarmtype;
+ if (reset_freezer_delta)
+ freezer_delta = 0;
+ spin_unlock_irqrestore(&freezer_delta_lock, flags);
+}
+
+/**
+ * alarmtimer_get_soonest - Finds the soonest alarm to expire among the alarm bases.
+ * @min: ptr to relative time to the soonest alarm to expire
+ * @expires: ptr to absolute time of the soonest alarm to expire
+ * @type: ptr to alarm type
+ *
+ */
+static void alarmtimer_get_soonest(ktime_t *min, ktime_t *expires, int *type)
+{
+ int i;
+ unsigned long flags;
+
+ /* Find the soonest timer to expire */
+ for (i = 0; i < ALARM_NUMTYPE; i++) {
+ struct alarm_base *base = &alarm_bases[i];
+ struct timerqueue_node *next;
+ ktime_t delta;
+
+ spin_lock_irqsave(&base->lock, flags);
+ next = timerqueue_getnext(&base->timerqueue);
+ spin_unlock_irqrestore(&base->lock, flags);
+ if (!next)
+ continue;
+ delta = ktime_sub(next->expires, base->get_ktime());
+ if (!(*min) || (delta < *min)) {
+ *expires = next->expires;
+ *min = delta;
+ *type = i;
+ }
+ }
+}
+
+static ssize_t next_alarm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ ktime_t min, expires;
+ int type;
+
+ /* Init and find the soonest timer to expire */
+ alarmtimer_init_soonest(&min, &expires, &type, false);
+ alarmtimer_get_soonest(&min, &expires, &type);
+
+ return sysfs_emit(buf, "%lld\n", ktime_to_ns(min));
+}
+static struct kobj_attribute next_alarm = __ATTR_RO(next_alarm);
+
+static struct attribute *alarmtimer_attrs[] = {
+ &next_alarm.attr,
+ NULL,
+};
+
+static const struct attribute_group alarmtimer_attr_group = {
+ .name = alarmtimer_group_name,
+ .attrs = alarmtimer_attrs,
+};
+
+/**
+ * alarmtimer_sysfs_add - Adds sysfs attributes for alarmtimer
+ *
+ * Returns 0 if successful, non-zero value for error.
+ */
+static int alarmtimer_sysfs_add(void)
+{
+ int ret = sysfs_create_group(kernel_kobj, &alarmtimer_attr_group);
+
+ if (ret)
+ pr_warn("[%s] failed to create a sysfs group\n", __func__);
+
+ return ret;
+}
+
/**
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
@@ -98,8 +194,11 @@ static int alarmtimer_rtc_add_device(struct device *dev)

pdev = platform_device_register_data(dev, "alarmtimer",
PLATFORM_DEVID_AUTO, NULL, 0);
- if (!IS_ERR(pdev))
+ if (!IS_ERR(pdev)) {
device_init_wakeup(&pdev->dev, true);
+ if (alarmtimer_sysfs_add())
+ pr_warn("[%s] failed to add alarmtimer sysfs attributes\n", __func__);
+ }

spin_lock_irqsave(&rtcdev_lock, flags);
if (!IS_ERR(pdev) && !rtcdev) {
@@ -241,41 +340,21 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
static int alarmtimer_suspend(struct device *dev)
{
ktime_t min, now, expires;
- int i, ret, type;
+ int ret, type;
struct rtc_device *rtc;
- unsigned long flags;
struct rtc_time tm;

- spin_lock_irqsave(&freezer_delta_lock, flags);
- min = freezer_delta;
- expires = freezer_expires;
- type = freezer_alarmtype;
- freezer_delta = 0;
- spin_unlock_irqrestore(&freezer_delta_lock, flags);
+ /* Initialize parameters to find soonest timer */
+ alarmtimer_init_soonest(&min, &expires, &type, true);

rtc = alarmtimer_get_rtcdev();
/* If we have no rtcdev, just return */
if (!rtc)
return 0;

- /* Find the soonest timer to expire*/
- for (i = 0; i < ALARM_NUMTYPE; i++) {
- struct alarm_base *base = &alarm_bases[i];
- struct timerqueue_node *next;
- ktime_t delta;
+ /* Find the soonest timer to expire */
+ alarmtimer_get_soonest(&min, &expires, &type);

- spin_lock_irqsave(&base->lock, flags);
- next = timerqueue_getnext(&base->timerqueue);
- spin_unlock_irqrestore(&base->lock, flags);
- if (!next)
- continue;
- delta = ktime_sub(next->expires, base->get_ktime());
- if (!min || (delta < min)) {
- expires = next->expires;
- min = delta;
- type = i;
- }
- }
if (min == 0)
return 0;

--
2.43.0.381.gb435a96ce8-goog



2024-01-18 22:12:13

by John Stultz

[permalink] [raw]
Subject: Re: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs

On Thu, Jan 18, 2024 at 10:15 AM Pranav Prasad <[email protected]> wrote:
>
> The alarmtimer driver currently fails suspend attempts when there is an
> alarm pending within the next 2 seconds, since the system is expected to wake
> up soon anyway. The entire suspend process is initiated even though the
> system will immediately awaken. This process includes substantial work before
> the suspend fails and additional work afterwards to undo the failed suspend
> that was attempted. Therefore on battery-powered devices that initiate suspend
> attempts from userspace, it may be advantageous to be able to skip the entire
> suspend attempt to avoid power consumption instead of unnecessarily trying and
> failing. As one data point, an analysis of a subset of Android devices showed that
> imminent alarms account for roughly 40% of all suspend failures on average leading
> to unnecessary power wastage.
>

So thanks for sending this out!

I'm always a bit cautious when exposing stuff to userland,
particularly if it's potentially racy as you mentioned in your
description. One thought I had was might adding a similar check
earlier in the suspend path on the kernel side provide similar benefit
(without requiring userland changes)?

Basically, if I understand the problem:
echo mem > /sys/power/state
<kernel goes through suspending everything>
alarmtimer_suspend()
if (next_alarm < TWO_SECONDS)
return -EBUSY;
<kernel has to resume everything, and all that time was wasted>

So if instead we did:
echo mem > /sys/power/state
enter_state()
if (alarmtimer_immenent())
retrun -EBUSY

So the kernel would come back much faster if the suspend was going to abort.

I suspect you all have considered this already but wanted to
understand what issues that approach has.

thanks
-john

2024-01-19 19:39:17

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs

On Thu, Jan 18 2024 at 14:11, John Stultz wrote:
> I'm always a bit cautious when exposing stuff to userland,
> particularly if it's potentially racy as you mentioned in your
> description. One thought I had was might adding a similar check
> earlier in the suspend path on the kernel side provide similar benefit
> (without requiring userland changes)?
>
> Basically, if I understand the problem:
> echo mem > /sys/power/state
> <kernel goes through suspending everything>
> alarmtimer_suspend()
> if (next_alarm < TWO_SECONDS)
> return -EBUSY;
> <kernel has to resume everything, and all that time was wasted>
>
> So if instead we did:
> echo mem > /sys/power/state
> enter_state()
> if (alarmtimer_immenent())
> retrun -EBUSY
>
> So the kernel would come back much faster if the suspend was going to abort.
>
> I suspect you all have considered this already but wanted to
> understand what issues that approach has.

It has the same race issues as the user space readout has as far as I
understand and it's much simpler.

Thanks,

tglx

2024-01-19 23:09:45

by Pranav Prasad

[permalink] [raw]
Subject: Re: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs

On Fri, Jan 19, 2024 at 11:38 AM Thomas Gleixner <[email protected]> wrote:
>
> On Thu, Jan 18 2024 at 14:11, John Stultz wrote:
> > I'm always a bit cautious when exposing stuff to userland,
> > particularly if it's potentially racy as you mentioned in your
> > description. One thought I had was might adding a similar check
> > earlier in the suspend path on the kernel side provide similar benefit
> > (without requiring userland changes)?
> >
> > Basically, if I understand the problem:
> > echo mem > /sys/power/state
> > <kernel goes through suspending everything>
> > alarmtimer_suspend()
> > if (next_alarm < TWO_SECONDS)
> > return -EBUSY;
> > <kernel has to resume everything, and all that time was wasted>
> >
> > So if instead we did:
> > echo mem > /sys/power/state
> > enter_state()
> > if (alarmtimer_immenent())
> > retrun -EBUSY
> >
> > So the kernel would come back much faster if the suspend was going to abort.
> >
> > I suspect you all have considered this already but wanted to
> > understand what issues that approach has.
>
> It has the same race issues as the user space readout has as far as I
> understand and it's much simpler.
>
> Thanks,
>
> tglx

Thanks John and Thomas for the suggestions!

The reason I did not go ahead with this approach previously was that I
wanted to contain the changes to the alarmtimer subsystem. I see the
benefit in eliminating the dependency on userspace reading from sysfs,
hence I shall work on v2 for this patch with an alternate solution
where I expose a function from alarmtimer to be used by the suspend
prepare flow in kernel/power.

Regards,
Pranav

2024-01-20 10:48:51

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs

Hi Pranav,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/timers/core]
[also build test WARNING on linus/master v6.7 next-20240119]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Pranav-Prasad/alarmtimer-Expose-information-about-next-alarm-to-userspace-via-sysfs/20240119-021809
base: tip/timers/core
patch link: https://lore.kernel.org/r/20240118181459.1663313-1-pranavpp%40google.com
patch subject: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs
config: sh-randconfig-002-20240120 (https://download.01.org/0day-ci/archive/20240120/[email protected]/config)
compiler: sh4-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240120/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

>> kernel/time/alarmtimer.c:37:19: warning: 'alarmtimer_group_name' defined but not used [-Wunused-const-variable=]
37 | static const char alarmtimer_group_name[] = "alarmtimer";
| ^~~~~~~~~~~~~~~~~~~~~


vim +/alarmtimer_group_name +37 kernel/time/alarmtimer.c

36
> 37 static const char alarmtimer_group_name[] = "alarmtimer";
38

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2024-01-21 20:08:38

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs

Hi Pranav,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/timers/core]
[also build test WARNING on linus/master v6.7 next-20240119]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Pranav-Prasad/alarmtimer-Expose-information-about-next-alarm-to-userspace-via-sysfs/20240119-021809
base: tip/timers/core
patch link: https://lore.kernel.org/r/20240118181459.1663313-1-pranavpp%40google.com
patch subject: [PATCH] alarmtimer: Expose information about next alarm to userspace via sysfs
config: um-randconfig-001-20240120 (https://download.01.org/0day-ci/archive/20240122/[email protected]/config)
compiler: clang version 18.0.0git (https://github.com/llvm/llvm-project d92ce344bf641e6bb025b41b3f1a77dd25e2b3e9)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240122/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

In file included from kernel/time/alarmtimer.c:18:
In file included from include/linux/rtc.h:17:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from arch/um/include/asm/hardirq.h:5:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/um/include/asm/io.h:24:
include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
547 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
560 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
| ^
In file included from kernel/time/alarmtimer.c:18:
In file included from include/linux/rtc.h:17:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from arch/um/include/asm/hardirq.h:5:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/um/include/asm/io.h:24:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
573 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
| ^
In file included from kernel/time/alarmtimer.c:18:
In file included from include/linux/rtc.h:17:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from arch/um/include/asm/hardirq.h:5:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/um/include/asm/io.h:24:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
584 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
594 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
604 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
692 | readsb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
700 | readsw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
708 | readsl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
717 | writesb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
726 | writesw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
735 | writesl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
>> kernel/time/alarmtimer.c:37:19: warning: unused variable 'alarmtimer_group_name' [-Wunused-const-variable]
37 | static const char alarmtimer_group_name[] = "alarmtimer";
| ^~~~~~~~~~~~~~~~~~~~~
13 warnings generated.


vim +/alarmtimer_group_name +37 kernel/time/alarmtimer.c

36
> 37 static const char alarmtimer_group_name[] = "alarmtimer";
38

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki