Advertise custom sets of system power states for non-ACPI systems.
Currently, /sys/power/state shows and accepts a static set of choices
that are not necessarily meaningful on all platforms (for example,
suspend-to-disk is an option even on diskless embedded systems, and the
meaning of standby vs. suspend-to-mem is not well-defined on
non-ACPI-systems). This patch allows the platform to register power
states with meaningful names that correspond to the platform's
conventions (for example, "big sleep" and "deep sleep" on TI OMAP), and
only those states that make sense for the platform.
For the time being, the canned set of PM_SUSPEND_STANDBY/MEM/DISK
etc. symbols are preserved, since knowledge of the meanings of those
values have crept into drivers. There is a separate effort underway to
divorce driver suspend flags from the platform suspend state
identifiers. Once that is accomplished, we can then replace the suspend
states available with an entirely custom set. For example, various
embedded platforms have multiple power states that roughly correspond to
suspend-to-mem, and each could be advertised and requested via the PM
interfaces, once drivers no longer look for the one and only
PM_SUSPEND_MEM system suspend state.
If the platform does not register a custom set of power states then the
present-day set remains available as a default. Will send separately a
patch for an embedded platform to show usage. Comments appreciated.
Index: linux-2.6.10/include/linux/pm.h
===================================================================
--- linux-2.6.10.orig/include/linux/pm.h 2005-03-02 00:41:43.000000000 +0000
+++ linux-2.6.10/include/linux/pm.h 2005-03-02 01:12:14.000000000 +0000
@@ -216,8 +216,14 @@
#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 4)
#define PM_DISK_MAX ((__force suspend_disk_method_t) 5)
+struct pm_suspend_method {
+ char *name;
+ suspend_state_t state;
+};
+
struct pm_ops {
suspend_disk_method_t pm_disk_mode;
+ struct pm_suspend_method *pm_suspend_methods;
int (*prepare)(suspend_state_t state);
int (*enter)(suspend_state_t state);
int (*finish)(suspend_state_t state);
Index: linux-2.6.10/kernel/power/main.c
===================================================================
--- linux-2.6.10.orig/kernel/power/main.c 2005-03-02 00:41:41.000000000 +0000
+++ linux-2.6.10/kernel/power/main.c 2005-03-02 01:15:21.000000000 +0000
@@ -228,11 +228,22 @@
-char * pm_states[] = {
- [PM_SUSPEND_STANDBY] = "standby",
- [PM_SUSPEND_MEM] = "mem",
- [PM_SUSPEND_DISK] = "disk",
- NULL,
+struct pm_suspend_method pm_default_suspend_methods[] = {
+ {
+ .name = "standby",
+ .state = PM_SUSPEND_STANDBY,
+ },
+ {
+ .name = "mem",
+ .state = PM_SUSPEND_MEM,
+ },
+ {
+ .name = "disk",
+ .state = PM_SUSPEND_DISK,
+ },
+ {
+ .name = NULL,
+ },
};
@@ -324,19 +335,22 @@
{
int i;
char * s = buf;
+ struct pm_suspend_method *methods = pm_ops->pm_suspend_methods;
+
+ if (! methods)
+ methods = pm_default_suspend_methods;
+
+ for (i=0; methods[i].name; i++)
+ s += sprintf(s,"%s ",methods[i].name);
- for (i = 0; i < PM_SUSPEND_MAX; i++) {
- if (pm_states[i])
- s += sprintf(s,"%s ",pm_states[i]);
- }
s += sprintf(s,"\n");
return (s - buf);
}
static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n)
{
- suspend_state_t state = PM_SUSPEND_STANDBY;
- char ** s;
+ struct pm_suspend_method *methods = pm_ops->pm_suspend_methods;
+ int i;
char *p;
int error;
int len;
@@ -344,12 +358,15 @@
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
- for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
- if (*s && !strncmp(buf, *s, len))
+ if (! methods)
+ methods = pm_default_suspend_methods;
+
+ for (i = 0; methods[i].name; i++) {
+ if (!strncmp(buf, methods[i].name, len))
break;
}
- if (*s)
- error = enter_state(state);
+ if (methods[i].name)
+ error = enter_state(methods[i].state);
else
error = -EINVAL;
return error ? error : n;
An example of custom power states for the TI OMAP family.
/sys/power/states supports a state named "deepsleep", which corresponds
to the platform state actually entered by the present-day system suspend
handler. It no longer offers the option of "disk" suspend which would
not normally be available in an OMAP-based system, nor does it offer the
choices "standby" or "mem", which are currently somewhat arbitrarily
mapped to actual platform power states on OMAPs. In the future the OMAP
could be extended to offer the choice of "big sleep" as well, another
platform-specific low-power mode that falls under the general category
of suspend-to-mem, once it is feasible to no longer use the same set of
system suspend state values for all platforms and drivers (as mentioned
in the base note).
Index: linux-2.6.10/arch/arm/mach-omap/pm.c
===================================================================
--- linux-2.6.10.orig/arch/arm/mach-omap/pm.c 2005-03-02 01:10:27.000000000 +0000
+++ linux-2.6.10/arch/arm/mach-omap/pm.c 2005-03-02 01:13:41.000000000 +0000
@@ -576,8 +576,20 @@
}
+static struct pm_suspend_method omap_pm_suspend_methods[] = {
+ {
+ .name = "deepsleep",
+ .state = PM_SUSPEND_MEM,
+ },
+ {
+ .name = NULL,
+ },
+};
+
+
struct pm_ops omap_pm_ops ={
.pm_disk_mode = 0,
+ .pm_suspend_methods = omap_pm_suspend_methods,
.prepare = omap_pm_prepare,
.enter = omap_pm_enter,
.finish = omap_pm_finish,
On Tue, 2005-03-01 at 18:03 -0800, Todd Poynor wrote:
> Advertise custom sets of system power states for non-ACPI systems.
> Currently, /sys/power/state shows and accepts a static set of choices
> that are not necessarily meaningful on all platforms (for example,
> suspend-to-disk is an option even on diskless embedded systems, and the
> meaning of standby vs. suspend-to-mem is not well-defined on
> non-ACPI-systems). This patch allows the platform to register power
> states with meaningful names that correspond to the platform's
> conventions (for example, "big sleep" and "deep sleep" on TI OMAP), and
> only those states that make sense for the platform.
> .../...
Note that I'd like to rework the whole notion of power states
ultimately. Devices themselves need custom state if we want anything
sane other than global system wide suspend.
Ben.
Hi!
> Advertise custom sets of system power states for non-ACPI systems.
> Currently, /sys/power/state shows and accepts a static set of choices
> that are not necessarily meaningful on all platforms (for example,
> suspend-to-disk is an option even on diskless embedded systems, and the
> meaning of standby vs. suspend-to-mem is not well-defined on
> non-ACPI-systems). This patch allows the platform to register power
> states with meaningful names that correspond to the platform's
> conventions (for example, "big sleep" and "deep sleep" on TI OMAP), and
> only those states that make sense for the platform.
Maybe this is a bit overdone?
Of course you can have suspend-to-disk on most embedded systems; CF
flash card looks just like disk, and you should be able to suspend to
it.
If OMAP has "big sleep" and "deep sleep", why not simply map them to
"standby" and "suspend-to-ram"?
[OTOH patch is not that long; but strings in /sys filesystem are not
for human consumption anyway.]
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!
Pavel Machek wrote:
> Hi!
>
>
>>Advertise custom sets of system power states for non-ACPI systems.
>>Currently, /sys/power/state shows and accepts a static set of choices
>>that are not necessarily meaningful on all platforms (for example,
>>suspend-to-disk is an option even on diskless embedded systems, and the
>>meaning of standby vs. suspend-to-mem is not well-defined on
>>non-ACPI-systems). This patch allows the platform to register power
>>states with meaningful names that correspond to the platform's
>>conventions (for example, "big sleep" and "deep sleep" on TI OMAP), and
>>only those states that make sense for the platform.
>
>
> Maybe this is a bit overdone?
>
> Of course you can have suspend-to-disk on most embedded systems; CF
> flash card looks just like disk, and you should be able to suspend to
> it.
It's possible (on those with CF/PCMCIA etc.), although due to various
problems with things like flash size, write speed, and wear leveling
it's not very common to do so (I've seen two vendors abandon plans for
this, but no doubt somebody does do it) -- that's why I'd like to have
the particular platform register the capability if it happens to have
it, but no, not a big deal.
> If OMAP has "big sleep" and "deep sleep", why not simply map them to
> "standby" and "suspend-to-ram"?
In fact that's more or less what happens (or will happen once drivers
like USB stop looking for PM_SUSPEND_MEM, etc.). There are other
platforms with more than 2 sleep states (say, XScale PXA27x), so this
will start to get a bit problematic. And it seens so easy to truly
handle the platform's states instead of pretending ACPI S1/S3/S4 are the
only methods to suspend any system.
If it's preferable, how about replacing the /sys/power/state "standby"
and "mem" values to "sleep", and have a /sys/power/sleep attribute that
tells the methods of sleep available for the platform, much like
suspend-to-disk methods are handled today? So the sleep attribute would
handle "standby" and "mem" for ACPI systems, and other values for
non-ACPI systems. Thanks,
--
Todd
Hi!
> >If OMAP has "big sleep" and "deep sleep", why not simply map them to
> >"standby" and "suspend-to-ram"?
>
> In fact that's more or less what happens (or will happen once drivers
> like USB stop looking for PM_SUSPEND_MEM, etc.). There are other
> platforms with more than 2 sleep states (say, XScale PXA27x), so this
> will start to get a bit problematic. And it seens so easy to truly
> handle the platform's states instead of pretending ACPI S1/S3/S4 are the
> only methods to suspend any system.
>
> If it's preferable, how about replacing the /sys/power/state "standby"
> and "mem" values to "sleep", and have a /sys/power/sleep attribute that
> tells the methods of sleep available for the platform, much like
> suspend-to-disk methods are handled today? So the sleep attribute would
> handle "standby" and "mem" for ACPI systems, and other values for
> non-ACPI systems. Thanks,
This is userland API. It should not change in random way during stable
series...
...but adding new /sys/power/state might be okay. We should not have
introduced "standby" in the first place [but I guess it is not worth
removing now]. If something has more than 2 states (does user really
want to enter different states in different usage?), I guess we can
add something like "deepmem" or whatever. Is there something with more
than 3 states?
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!
Pavel Machek wrote:
...
> ...but adding new /sys/power/state might be okay. We should not have
> introduced "standby" in the first place [but I guess it is not worth
> removing now]. If something has more than 2 states (does user really
> want to enter different states in different usage?), I guess we can
> add something like "deepmem" or whatever. Is there something with more
> than 3 states?
In most of the cases I'm thinking of, it wouldn't be a user requesting a
state but rather software (say, a cell phone progressively entering
lower power states due to inactivity). I haven't noticed a platform
with more than 3 low-power modes so far, but I'm sure it'll happen soon.
If the time isn't right for incompatible changes to these interfaces
then I guess mapping standby and mem to platform-specific things will
work for now, maybe with some tweak to allow a choice of actual state
entered. At some more opportune time in the future I'll suggest an
attribute that allows a choice of platform-specific method of
suspend-to-mem, somewhat like the "disk" attribute for suspend-to-disk.
Thanks,
--
Todd
Hi!
> >...but adding new /sys/power/state might be okay. We should not have
> >introduced "standby" in the first place [but I guess it is not worth
> >removing now]. If something has more than 2 states (does user really
> >want to enter different states in different usage?), I guess we can
> >add something like "deepmem" or whatever. Is there something with
> >more
> >than 3 states?
>
> In most of the cases I'm thinking of, it wouldn't be a user
> requesting a state but rather software (say, a cell phone
> progressively entering lower power states due to inactivity). I
> haven't noticed a platform with more than 3 low-power modes so far,
Are not your power states more like cpu power states?
These are expected to be system states, and sleeping system
does not take calls, etc...
(Unless you have second cpu that wakes you on incoming call, that is).
> but I'm sure it'll happen soon. If the time isn't right for
> incompatible changes to these interfaces then I guess mapping standby
> and mem to platform-specific things will work for now, maybe with
> some tweak to allow a choice of actual state entered. At some more
> opportune time in the future I'll suggest an attribute that allows a
> choice of platform-specific method of suspend-to-mem, somewhat like
> the "disk" attribute for suspend-to-disk.
Or just resurrect your original patch when one more state is needed.
--
64 bytes from 195.113.31.123: icmp_seq=28 ttl=51 time=448769.1 ms
On Thursday 03 March 2005 6:55 am, Pavel Machek wrote:
> Hi!
>
> > In most of the cases I'm thinking of, it wouldn't be a user
> > requesting a state but rather software (say, a cell phone
> > progressively entering lower power states due to inactivity). I
> > haven't noticed a platform with more than 3 low-power modes so far,
>
> Are not your power states more like cpu power states?
For System-on-Chip devices it can be a fine line. Maybe six
of the most important devices (including CPU) are in a low
power state, but some others are still active.
> These are expected to be system states, and sleeping system
> does not take calls, etc...
Pavel, remember that great big "wakeup" shaped hole in the
current PM framework... ? Even ACPI sleep states support
wakeup mechanisms, although not well under Linux (yet).
One way a sleeping system could take a call is if some
external chip raised a wakeup-enabled IRQ to wake up the
system. And if going from deep sleep to normal operational
state has a low cost, why shouldn't the system routinely
enter deep sleep instead of going to CPU idle state?
It's certainly the case that connecting the USB device
to a host can un-gate that peripheral's 48 MHz clock and
wake the system up from deep sleep.
- Dave
Pavel Machek wrote:
...
>>In most of the cases I'm thinking of, it wouldn't be a user
>>requesting a state but rather software (say, a cell phone
>>progressively entering lower power states due to inactivity). I
>>haven't noticed a platform with more than 3 low-power modes so far,
>
>
> Are not your power states more like cpu power states?
> These are expected to be system states, and sleeping system
> does not take calls, etc...
There's a great variety of behaviors and usage models out there, not
sure I can draw a useful distinction between cpu power states vs. system
states, but the net effect could be considered to be approximately the
same in typical embedded uses: the drivers are called to place
appropriate devices in a low(er)-power state, various platform thingies
are slowed or powered off, and the system stops waiting for something to
wake it up. In some cases the system does not wake up until an explicit
user action (button press, etc.), but more commonly
wake-on-device-activity (including ring from telephony unit) or
time-based actions (including wake on alarm from event in user's
datebook) is also wanted (rather like wake-on-LAN et al). I don't think
this would correspond well to hardware-managed CPU power states like
ACPI C states, for example. Thanks,
--
Todd
On Wednesday 02 March 2005 12:56 am, Pavel Machek wrote:
>
> If OMAP has "big sleep" and "deep sleep", why not simply map them to
> "standby" and "suspend-to-ram"?
Or even "cpu idle". Entering power saving modes shouldn't be such
a Big Deal. Some of the variable scheduling timeout work has been
done specifically with the goal of letting the system use those low
power modes more generally, without needing user(space) input to
suggest that now would be a good time to conserve more milliWatts.
Of course, on systems that don't swap (or swsusp) there may be
dozens of different low-power "standby" states. I'm not sure it
helps to try labeling them all through /sys/power/files.
- Dave
Hi.
On Fri, 2005-03-04 at 13:17, David Brownell wrote:
> On Wednesday 02 March 2005 12:56 am, Pavel Machek wrote:
> >
> > If OMAP has "big sleep" and "deep sleep", why not simply map them to
> > "standby" and "suspend-to-ram"?
>
> Or even "cpu idle". Entering power saving modes shouldn't be such
> a Big Deal. Some of the variable scheduling timeout work has been
> done specifically with the goal of letting the system use those low
> power modes more generally, without needing user(space) input to
> suggest that now would be a good time to conserve more milliWatts.
>
> Of course, on systems that don't swap (or swsusp) there may be
> dozens of different low-power "standby" states. I'm not sure it
> helps to try labeling them all through /sys/power/files.
It seems to make a lot of sense to me for us to make a split between
capability/implementation and policy, and move the policy stuff to
userspace. Here's the proposal I'm slowly fleshing out:
Two way communication between a userspace policy manager and kernel
drivers is implemented via DBus.
In this scheme, 'kernel drivers' doesn't just refer to the drivers for
hardware. It refers to anything remotely power management related,
including code to implement suspend-to-RAM, to disk or the like, ACPI
drivers or code to implement system power states.
The policy manager can enumerate devices and inter-relationships,
capabilities, settings and status information, set and query policies
and implementation results. The drivers can notify events. This
communication doesn't use complicated structures or type definitions.
Rather, all the nous regarding interpretation of the messages that are
sent is in the policy manager and the drivers. One driver might say it's
capable of states called "D0, D1 and D3", another (system) states called
"Deep Sleep" and "Big Sleep". Nothing but the driver itself and
userspace manager need to how to interpret & use these states.
Inter-relationships between drivers are _not_ included in this
information. The policy manager sets policy, the drivers deal with the
specifics of implementing it.
The userspace manager can in turn [en|dis]able capabilites and send a
list of run-time states that the driver can move between according to
its own logic (eg lack of active children) without notifying the
userspace manager. This would fit in with your power modes above, even
to the level of "cpu idle".
The DBus support would also provide a means by which the userspace
manager could be notified of events it might be interested in. Again, a
generic format could be used, with the format depending upon the driver.
These events might include 'no keypresses in the specified period' or
'I'm a new driver just loaded'.
Support for system states works in a similar manner. Capabilities and
configuration parameters for system states (suspend to ram, deep sleep
etc) could be registered in a similar manner to the device drivers
above, but the choice regarding entering them comes from the userspace
manager. The code implementing the state interacts with drivers using
the existing driver model which, by definition (it only deals with
system states), overrides the runtime policy settings previously
applied.
DBus events could also come from userspace, such as window manager
requesting hibernation or a userspace UPS driver notifying AC loss.
Drivers could also interact with each other to communicate status
changes (eg USB drivers notify parent HUB of their removal). This is, of
course, the more complicated bit. Since this is the implementation (and
not policy), however, userspace doesn't need to be involved. This
separation should simplify things. My USB hub driver knows what its
children are via the driver model. It doesn't need to receive a message
from userspace to tell it to sleep because it has nothing to do.
With an implementation along these lines, I think we'll have a good
basis for getting runtime power management and system states into a
usable state.
Regards,
Nigel
--
Nigel Cunningham
Software Engineer, Canberra, Australia
http://www.cyclades.com
Bus: +61 (2) 6291 9554; Hme: +61 (2) 6292 8028; Mob: +61 (417) 100 574
Maintainer of Suspend2 Kernel Patches http://softwaresuspend.berlios.de
On Thursday 03 March 2005 8:49 pm, Nigel Cunningham wrote:
> >
> > ... the goal of letting the system use those low
> > power modes more generally, without needing user(space) input to
> > suggest that now would be a good time to conserve more milliWatts.
> >
> > Of course, on systems that don't swap (or swsusp) there may be
> > dozens of different low-power "standby" states. I'm not sure it
> > helps to try labeling them all through /sys/power/files.
>
> It seems to make a lot of sense to me for us to make a split between
> capability/implementation and policy, and move the policy stuff to
> userspace.
But what's a "policy"? And what's its scope? When the device drivers
can cheaply turn their clocks on/off depending on whether their hardware
is in use, there's little point in having control knobs for that. And
considering debug and testing, such knobs create pain and trouble.
Yet each different clock that can be gated creates a different family of
such low power "standby" states... all subtly different combinations
of active clocks. I counted over six dozen clocks on one SOC; do the
math, it's hard to justify userspace caring about that many states!!
(Some characteristics may matter though, e.g. whether the 48 MHz PLL
is currently active or not. If not, more aggressive power saving modes
might be available.)
There's a cost to having userspace make decisions. When that cost can
exceed the power savings, it's probably best not to get it involved.
There are policy decisions that probably make sense in userspace,
like "backlight off"; and ones that certainly don't make sense there
(like IMO almost all clock gating decisions).
> Drivers could also interact with each other to communicate status
> changes (eg USB drivers notify parent HUB of their removal).
Actually it works the other way around: hub status change events
are used to detect device removal, then the device gets told.
I think most hotpluggable busses work that way: PCMCIA/CF, MMC,
CardBus, FireWire, PCI Hotplug, and so on.
> This is, of
> course, the more complicated bit. Since this is the implementation (and
> not policy), however, userspace doesn't need to be involved. This
> separation should simplify things. My USB hub driver knows what its
> children are via the driver model. It doesn't need to receive a message
> from userspace to tell it to sleep because it has nothing to do.
Right, but there are important PM scenarios lurking there. There's the
example of USB mice autosuspending, to enable root hubs to suspend,
allowing DMA to stop, and thus enabling C3 CPU mode, and saving 2 Watts
on top of the power no longer being supplied to that bus-powered mouse.
(And the same could be done for some other USB devices.) This is,
you'll note, purely device power management ... it's all in the same
ACPI system state, S0. (Though the mouse can probably be used to wake
the system from S1, S2, or S3 later on ...)
What we've discussed so far is that the only policy setting there
would be a module parameter for the HID driver, saying whether to
start that sequence after N seconds. For the other drivers, CONFIG_PM
would seem to be enough of a request.
> With an implementation along these lines, I think we'll have a good
> basis for getting runtime power management and system states into a
> usable state.
Using DBUS to allow active policy (re)decisions could work, but I'd
also like to see most PM policies not need to swap in DBUS. Sysfs
is a fine place to record policy decisions (though the cost of each
new attribute is less memory available for Real Work).
Do you have examples of PM policies where a DBUS agent would need
to make choices on the fly?
- Dave
>
> Regards,
>
> Nigel
> --
> Nigel Cunningham
> Software Engineer, Canberra, Australia
> http://www.cyclades.com
> Bus: +61 (2) 6291 9554; Hme: +61 (2) 6292 8028; Mob: +61 (417) 100 574
>
> Maintainer of Suspend2 Kernel Patches http://softwaresuspend.berlios.de
>
>
>
Hi!
> > These are expected to be system states, and sleeping system
> > does not take calls, etc...
>
> Pavel, remember that great big "wakeup" shaped hole in the
> current PM framework... ? Even ACPI sleep states support
> wakeup mechanisms, although not well under Linux (yet).
Umm, yes, I see that one.
> One way a sleeping system could take a call is if some
> external chip raised a wakeup-enabled IRQ to wake up the
> system. And if going from deep sleep to normal operational
> state has a low cost, why shouldn't the system routinely
> enter deep sleep instead of going to CPU idle state?
But in such case /sys/power/sleep is wrong interface to trigger
this. Imagine system taking short sleeps 10 times a second. You don't
want to trigger that using /sys/power/sleep [because it would switch
your consoles].
But yes, I see the fine line... If it turns display off and waits for
incoming call, yes, there /sys/power/sleep makes sense. Someone get me
Linux phone or tell me where to buy one so I can see the fine points
better ;-).
Pavel
--
People were complaining that M$ turns users into beta-testers...
...jr ghea gurz vagb qrirybcref, naq gurl frrz gb yvxr vg gung jnl!
Nigel Cunningham wrote:
...
> Two way communication between a userspace policy manager and kernel
> drivers is implemented via DBus.
>
> In this scheme, 'kernel drivers' doesn't just refer to the drivers for
> hardware. It refers to anything remotely power management related,
> including code to implement suspend-to-RAM, to disk or the like, ACPI
> drivers or code to implement system power states.
>
> The policy manager can enumerate devices and inter-relationships,
> capabilities, settings and status information, set and query policies
> and implementation results. The drivers can notify events. This
> communication doesn't use complicated structures or type definitions.
> Rather, all the nous regarding interpretation of the messages that are
> sent is in the policy manager and the drivers. One driver might say it's
> capable of states called "D0, D1 and D3", another (system) states called
> "Deep Sleep" and "Big Sleep". Nothing but the driver itself and
> userspace manager need to how to interpret & use these states.
>
> Inter-relationships between drivers are _not_ included in this
> information. The policy manager sets policy, the drivers deal with the
> specifics of implementing it.
This all sounds exactly like the way we're headed as well, so I'm
definitely interested in anything I can do to help. Was thinking that
can start defining kobject_uevent power events and attributes (with
enough detail that acpid could use it instead of /proc if the ACPI
drivers were to convert to it).
Capturing the relationships between drivers is difficult. If nobody's
already looking into this then I'll take this up soon.
> The userspace manager can in turn [en|dis]able capabilites and send a
> list of run-time states that the driver can move between according to
> its own logic (eg lack of active children) without notifying the
> userspace manager. This would fit in with your power modes above, even
> to the level of "cpu idle".
At dynamicpower.sf.net we do something similar for cpufreq-style scaling
of platform clocks and voltages, setting up desired policy for various
platform clocks/voltages according to changes in low-level system state
(primarily scheduler state) from userspace and then letting the state
machine run without interaction. Similar policy objects for devices
sounds intriguing, although the device-specific nature of event triggers
probably makes this quite difficult.
Mac OS X support for some of these concepts is documented at
developer.apple.com, looking for ideas to steal... Thanks,
--
Todd