Hi all,
This patchset adds support for ACPI D3Cold state and ZPODD(Zero Power
Optical Disk Drive).
ZPODD is a new feature in SATA 3.1 specification. It provides a way to
power off unused CDROM.
CDROM is powered on/off by executing ACPI power resource's _ON/_OFF
method. It enters ACPI D3Cold state when powered off.
PATCH 1 to 4: Adds ACPI D3Cold state support
PATCH 5 to 6: Adds ZPODD support
Applied on top of libata/ALL branch.
[PATCH 1/6] ACPI: Introduce ACPI D3_COLD state support
[PATCH 2/6] ACPI: Reference devices in ACPI Power Resource
[PATCH 3/6] ACPI: Runtime resume all devices covered by a power resource
[PATCH 4/6] PM / Runtime: Introduce flag can_power_off
[PATCH 5/6] PCI: Move acpi_dev_run_wake to acpi core
[PATCH 6/6] libata: add ZPODD support
drivers/acpi/power.c | 141 +++++++++++++++++++++++++++++++++++++++++++-
drivers/acpi/scan.c | 10 +++-
drivers/acpi/sleep.c | 35 +++++++++++
drivers/ata/libata-acpi.c | 64 ++++++++++++++++----
drivers/pci/pci-acpi.c | 40 +------------
drivers/scsi/sr.c | 39 ++++++++++++
drivers/scsi/sr.h | 3 +
include/acpi/acpi_bus.h | 7 ++
include/linux/pm.h | 1 +
include/linux/pm_runtime.h | 30 +++++++++
10 files changed, 319 insertions(+), 51 deletions(-)
Thanks for any comments.
Lin Ming
From: Zhang Rui <[email protected]>
ACPI Power Resource can power on/off a couple of devices.
Introduce interfaces to register/unregister a device to/from
an ACPI Power Resource.
Signed-off-by: Zhang Rui <[email protected]>
---
drivers/acpi/power.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++
include/acpi/acpi_bus.h | 2 +
2 files changed, 130 insertions(+), 0 deletions(-)
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 0d681fb..a7e2305 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -84,10 +84,25 @@ struct acpi_power_resource {
u32 order;
unsigned int ref_count;
struct mutex resource_lock;
+ struct list_head devices; /* list for devices powered by this PR */
};
static struct list_head acpi_power_resource_list;
+/*
+ * When a power resource is turned on, all the devices are put to an uninitialized
+ * state although their ACPI states are still D0.
+ * To handle this, we need to keep a list of all devices powered by the power resource,
+ * and resume all of them.
+ * Currently, we only support this case:
+ * 1) multiple devices share the same power resoruce,
+ * 2) One device uses One Power Resource ONLY.
+ */
+struct acpi_powered_device {
+ struct list_head node;
+ struct device *dev;
+};
+
/* --------------------------------------------------------------------------
Power Resource Management
-------------------------------------------------------------------------- */
@@ -455,6 +470,118 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
Device Power Management
-------------------------------------------------------------------------- */
+int acpi_power_resource_register_device(struct device *dev, acpi_handle handle)
+{
+ struct acpi_power_resource *resource;
+ struct acpi_device *acpi_dev, *res_dev;
+ acpi_handle res_handle = NULL;
+ struct acpi_powered_device *apd;
+ int i;
+ int ret;
+
+ if (!handle || !dev)
+ return -EINVAL;
+
+ ret = acpi_bus_get_device(handle, &acpi_dev);
+ if (ret)
+ goto no_power_resource;
+
+ if (!acpi_dev->power.flags.power_resources)
+ goto no_power_resource;
+
+ for (i = 0; i <= ACPI_STATE_D3; i++) {
+ struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
+
+ if (!ps->flags.valid)
+ continue;
+
+ if (ps->resources.count > 1)
+ return 0;
+
+ if (!res_handle)
+ res_handle = ps->resources.handles[0];
+ else if (res_handle != ps->resources.handles[0])
+ return 0;
+ }
+
+ ret = acpi_bus_get_device(res_handle, &res_dev);
+ if (ret)
+ goto no_power_resource;
+
+ resource = res_dev->driver_data;
+ if (!resource)
+ goto no_power_resource;
+
+ apd = kzalloc(sizeof(struct acpi_powered_device), GFP_KERNEL);
+ if (!apd)
+ return -ENOMEM;
+
+ apd->dev = dev;
+ mutex_lock(&resource->resource_lock);
+ list_add_tail(&apd->node, &resource->devices);
+ mutex_unlock(&resource->resource_lock);
+ printk(KERN_INFO PREFIX "Device %s uses Power Resource %s\n", dev->kobj.name, acpi_device_name(res_dev));
+
+ return 0;
+
+no_power_resource:
+ printk(KERN_WARNING PREFIX "Invalid Power Resource to register!");
+ return -ENODEV;
+}
+
+void acpi_power_resource_unregister_device(struct device *dev, acpi_handle handle)
+{
+ struct acpi_power_resource *resource;
+ struct acpi_device *acpi_dev, *res_dev;
+ acpi_handle res_handle = NULL;
+ struct acpi_powered_device *apd, *tmp;
+ int i;
+ int ret;
+
+ if (!handle || !dev)
+ return;
+
+ ret = acpi_bus_get_device(handle, &acpi_dev);
+ if (ret)
+ return;
+
+ if (!acpi_dev->power.flags.power_resources)
+ return;
+
+ for (i = 0; i <= ACPI_STATE_D3; i++) {
+ struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
+
+ if (!ps->flags.valid)
+ continue;
+
+ if (ps->resources.count > 1)
+ return;
+
+ res_handle = ps->resources.handles[0];
+ break;
+ }
+
+ if (!res_handle)
+ return;
+
+ ret = acpi_bus_get_device(res_handle, &res_dev);
+ if (ret)
+ return;
+
+ resource = res_dev->driver_data;
+ if (!resource)
+ return;
+
+ mutex_lock(&resource->resource_lock);
+ list_for_each_entry_safe(apd, tmp, &resource->devices, node) {
+ if (apd->dev == dev) {
+ list_del(&apd->node);
+ kfree(apd);
+ }
+ }
+ mutex_unlock(&resource->resource_lock);
+}
+
int acpi_power_get_inferred_state(struct acpi_device *device, int *state)
{
int result = 0;
@@ -550,6 +677,7 @@ static int acpi_power_add(struct acpi_device *device)
resource->device = device;
mutex_init(&resource->resource_lock);
+ INIT_LIST_HEAD(&resource->devices);
strcpy(resource->name, device->pnp.bus_id);
strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME);
strcpy(acpi_device_class(device), ACPI_POWER_CLASS);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 6cd5b64..32fb899 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -322,6 +322,8 @@ int acpi_bus_get_status(struct acpi_device *device);
int acpi_bus_set_power(acpi_handle handle, int state);
int acpi_bus_update_power(acpi_handle handle, int *state_p);
bool acpi_bus_power_manageable(acpi_handle handle);
+int acpi_power_resource_register_device(struct device *dev, acpi_handle handle);
+void acpi_power_resource_unregister_device(struct device *dev, acpi_handle handle);
bool acpi_bus_can_wakeup(acpi_handle handle);
#ifdef CONFIG_ACPI_PROC_EVENT
int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data);
--
1.7.2.5
ZPODD(Zero Power Optical Disk Drive) is a new feature in
SATA 3.1 specification. It provides a way to power off unused CDROM.
CDROM is powered off by executing ACPI power resource's _OFF method.
When CDROM is powered off(D3Cold state), inserting disk will trigger a
wakeup event(GPE). GPE AML handler notifies the associated device. Then
CDROM is resumed in the notify handler.
Signed-off-by: Lin Ming <[email protected]>
---
drivers/ata/libata-acpi.c | 64 +++++++++++++++++++++++++++++++++++++-------
drivers/scsi/sr.c | 39 +++++++++++++++++++++++++++
drivers/scsi/sr.h | 3 ++
3 files changed, 95 insertions(+), 11 deletions(-)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index b03e468..acbb85e 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -16,6 +16,7 @@
#include <linux/libata.h>
#include <linux/pci.h>
#include <linux/slab.h>
+#include <linux/pm_runtime.h>
#include <scsi/scsi_device.h>
#include "libata.h"
@@ -841,23 +842,45 @@ void ata_acpi_on_resume(struct ata_port *ap)
void ata_acpi_set_state(struct ata_port *ap, pm_message_t state)
{
struct ata_device *dev;
-
- if (!ata_ap_acpi_handle(ap) || (ap->flags & ATA_FLAG_ACPI_SATA))
- return;
+ acpi_handle handle;
+ int acpi_state;
/* channel first and then drives for power on and vica versa
for power off */
- if (state.event == PM_EVENT_ON)
- acpi_bus_set_power(ata_ap_acpi_handle(ap), ACPI_STATE_D0);
+ handle = ata_ap_acpi_handle(ap);
+ if (handle && state.event == PM_EVENT_ON)
+ acpi_bus_set_power(handle, ACPI_STATE_D0);
ata_for_each_dev(dev, &ap->link, ENABLED) {
- if (ata_dev_acpi_handle(dev))
- acpi_bus_set_power(ata_dev_acpi_handle(dev),
- state.event == PM_EVENT_ON ?
- ACPI_STATE_D0 : ACPI_STATE_D3);
+ handle = ata_dev_acpi_handle(dev);
+ if (!handle)
+ continue;
+
+ if (state.event != PM_EVENT_ON) {
+ acpi_state = acpi_pm_device_sleep_state(
+ &dev->sdev->sdev_gendev, NULL);
+ if (acpi_state > 0) {
+ if (acpi_state == ACPI_STATE_D3_COLD &&
+ !pm_runtime_can_power_off(
+ &dev->sdev->sdev_gendev))
+ acpi_state = ACPI_STATE_D3;
+
+ acpi_bus_set_power(handle, acpi_state);
+ }
+ if (ap->tdev.power.request == RPM_REQ_SUSPEND);
+ acpi_pm_device_run_wake(
+ &dev->sdev->sdev_gendev, true);
+ } else {
+ if (ap->tdev.power.request == RPM_REQ_RESUME);
+ acpi_pm_device_run_wake(
+ &dev->sdev->sdev_gendev, false);
+ acpi_bus_set_power(handle, ACPI_STATE_D0);
+ }
}
- if (state.event != PM_EVENT_ON)
- acpi_bus_set_power(ata_ap_acpi_handle(ap), ACPI_STATE_D3);
+
+ handle = ata_ap_acpi_handle(ap);
+ if (handle && state.event != PM_EVENT_ON)
+ acpi_bus_set_power(handle, ACPI_STATE_D3);
}
/**
@@ -1007,6 +1030,14 @@ static int ata_acpi_bind_host(struct device *dev, int host, acpi_handle *handle)
return 0;
}
+static void ata_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
+{
+ struct ata_device *ata_dev = context;
+
+ if (event == ACPI_NOTIFY_DEVICE_WAKE && ata_dev)
+ scsi_autopm_get_device(ata_dev->sdev);
+}
+
static int ata_acpi_bind_device(struct device *dev, int channel, int id,
acpi_handle *handle)
{
@@ -1014,6 +1045,8 @@ static int ata_acpi_bind_device(struct device *dev, int channel, int id,
struct Scsi_Host *shost = dev_to_shost(host);
struct ata_port *ap = ata_shost_to_port(shost);
struct ata_device *ata_dev;
+ struct acpi_device *acpi_dev;
+ acpi_status status;
if (ap->flags & ATA_FLAG_ACPI_SATA)
ata_dev = &ap->link.device[channel];
@@ -1028,6 +1061,15 @@ static int ata_acpi_bind_device(struct device *dev, int channel, int id,
if (!is_registered_hotplug_dock_device(&ata_acpi_dev_dock_ops))
register_hotplug_dock_device(*handle, &ata_acpi_dev_dock_ops, ata_dev);
+ status = acpi_bus_get_device(*handle, &acpi_dev);
+ if (ACPI_SUCCESS(status) && acpi_dev->wakeup.flags.run_wake) {
+ acpi_install_notify_handler(*handle, ACPI_SYSTEM_NOTIFY,
+ ata_acpi_wake_dev, ata_dev);
+ device_set_run_wake(dev, true);
+
+ acpi_power_resource_register_device(dev, *handle);
+ }
+
return 0;
}
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 5fc97d2..bf4eace 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -45,6 +45,7 @@
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/pm_runtime.h>
#include <asm/uaccess.h>
#include <scsi/scsi.h>
@@ -80,12 +81,38 @@ static int sr_probe(struct device *);
static int sr_remove(struct device *);
static int sr_done(struct scsi_cmnd *);
+static int sr_suspend(struct device *dev)
+{
+ struct scsi_cd *cd;
+
+ cd = dev_get_drvdata(dev);
+ if (cd->zpodd)
+ pm_runtime_enable_power_off(dev);
+
+ return 0;
+}
+
+static int sr_resume(struct device *dev)
+{
+ struct scsi_cd *cd;
+
+ cd = dev_get_drvdata(dev);
+ if (cd->zpodd) {
+ pm_runtime_disable_power_off(dev);
+ cd->zpodd_event = 0;
+ }
+
+ return 0;
+}
+
static struct scsi_driver sr_template = {
.owner = THIS_MODULE,
.gendrv = {
.name = "sr",
.probe = sr_probe,
.remove = sr_remove,
+ .suspend = sr_suspend,
+ .resume = sr_resume,
},
.done = sr_done,
};
@@ -216,6 +243,10 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi,
unsigned int events;
int ret;
+ /* Not necessary to check events if enter ZPODD state */
+ if (cd->zpodd && pm_runtime_suspended(&cd->device->sdev_gendev))
+ return 0;
+
/* no changer support */
if (CDSL_CURRENT != slot)
return 0;
@@ -260,6 +291,11 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi,
cd->media_present = scsi_status_is_good(ret) ||
(scsi_sense_valid(&sshdr) && sshdr.asc != 0x3a);
+ if (!cd->media_present && cd->zpodd && !cd->zpodd_event) {
+ scsi_autopm_put_device(cd->device);
+ cd->zpodd_event = 1;
+ }
+
if (last_present != cd->media_present)
cd->device->changed = 1;
@@ -716,6 +752,9 @@ static int sr_probe(struct device *dev)
disk->flags |= GENHD_FL_REMOVABLE;
add_disk(disk);
+ if (device_run_wake(dev))
+ cd->zpodd = 1;
+
sdev_printk(KERN_DEBUG, sdev,
"Attached scsi CD-ROM %s\n", cd->cdi.name);
return 0;
diff --git a/drivers/scsi/sr.h b/drivers/scsi/sr.h
index 37c8f6b..39b3d8c 100644
--- a/drivers/scsi/sr.h
+++ b/drivers/scsi/sr.h
@@ -42,6 +42,9 @@ typedef struct scsi_cd {
unsigned readcd_cdda:1; /* reading audio data using READ_CD */
unsigned media_present:1; /* media is present */
+ unsigned zpodd:1; /* is ZPODD supported */
+ unsigned zpodd_event:1;
+
/* GET_EVENT spurious event handling, blk layer guarantees exclusion */
int tur_mismatch; /* nr of get_event TUR mismatches */
bool tur_changed:1; /* changed according to TUR */
--
1.7.2.5
acpi_dev_run_wake is a generic function which can be used by
other subsystem too. Rename it to acpi_pm_device_run_wake, to be
consistent with acpi_pm_device_sleep_wake.
Then move it to acpi core.
Signed-off-by: Lin Ming <[email protected]>
---
drivers/acpi/sleep.c | 35 +++++++++++++++++++++++++++++++++++
drivers/pci/pci-acpi.c | 40 +++-------------------------------------
include/acpi/acpi_bus.h | 5 +++++
3 files changed, 43 insertions(+), 37 deletions(-)
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index ca191ff..00fa664 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -17,6 +17,7 @@
#include <linux/suspend.h>
#include <linux/reboot.h>
#include <linux/acpi.h>
+#include <linux/pm_runtime.h>
#include <asm/io.h>
@@ -730,6 +731,40 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
#ifdef CONFIG_PM_SLEEP
/**
+ * acpi_pm_device_run_wake - Enable/disable wake-up for given device.
+ * @phys_dev: Device to enable/disable the platform to wake-up the system for.
+ * @enable: Whether enable or disable the wake-up functionality.
+ *
+ * Find the ACPI device object corresponding to @pci_dev and try to
+ * enable/disable the GPE associated with it.
+ */
+int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
+{
+ struct acpi_device *dev;
+ acpi_handle handle;
+
+ if (!device_run_wake(phys_dev))
+ return -EINVAL;
+
+ handle = DEVICE_ACPI_HANDLE(phys_dev);
+ if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &dev))) {
+ dev_dbg(phys_dev, "ACPI handle has no context in %s!\n",
+ __func__);
+ return -ENODEV;
+ }
+
+ if (enable) {
+ acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0);
+ acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
+ } else {
+ acpi_disable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
+ acpi_disable_wakeup_device_power(dev);
+ }
+
+ return 0;
+}
+
+/**
* acpi_pm_device_sleep_wake - enable or disable the system wake-up
* capability of given device
* @dev: device to handle
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 060fd22..0f150f2 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -277,40 +277,6 @@ static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable)
return 0;
}
-/**
- * acpi_dev_run_wake - Enable/disable wake-up for given device.
- * @phys_dev: Device to enable/disable the platform to wake-up the system for.
- * @enable: Whether enable or disable the wake-up functionality.
- *
- * Find the ACPI device object corresponding to @pci_dev and try to
- * enable/disable the GPE associated with it.
- */
-static int acpi_dev_run_wake(struct device *phys_dev, bool enable)
-{
- struct acpi_device *dev;
- acpi_handle handle;
-
- if (!device_run_wake(phys_dev))
- return -EINVAL;
-
- handle = DEVICE_ACPI_HANDLE(phys_dev);
- if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &dev))) {
- dev_dbg(phys_dev, "ACPI handle has no context in %s!\n",
- __func__);
- return -ENODEV;
- }
-
- if (enable) {
- acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0);
- acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
- } else {
- acpi_disable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
- acpi_disable_wakeup_device_power(dev);
- }
-
- return 0;
-}
-
static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable)
{
while (bus->parent) {
@@ -318,14 +284,14 @@ static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable)
if (bridge->pme_interrupt)
return;
- if (!acpi_dev_run_wake(&bridge->dev, enable))
+ if (!acpi_pm_device_run_wake(&bridge->dev, enable))
return;
bus = bus->parent;
}
/* We have reached the root bus. */
if (bus->bridge)
- acpi_dev_run_wake(bus->bridge, enable);
+ acpi_pm_device_run_wake(bus->bridge, enable);
}
static int acpi_pci_run_wake(struct pci_dev *dev, bool enable)
@@ -333,7 +299,7 @@ static int acpi_pci_run_wake(struct pci_dev *dev, bool enable)
if (dev->pme_interrupt)
return 0;
- if (!acpi_dev_run_wake(&dev->dev, enable))
+ if (!acpi_pm_device_run_wake(&dev->dev, enable))
return 0;
acpi_pci_propagate_run_wake(dev->bus, enable);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 32fb899..2d556e3 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -394,8 +394,13 @@ static inline int acpi_pm_device_sleep_state(struct device *d, int *p)
#endif
#ifdef CONFIG_PM_SLEEP
+int acpi_pm_device_run_wake(struct device *, bool);
int acpi_pm_device_sleep_wake(struct device *, bool);
#else
+static inline int acpi_pm_device_run_wake(struct device *dev, bool enable)
+{
+ return -ENODEV;
+}
static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
{
return -ENODEV;
--
1.7.2.5
From: Zhang Rui <[email protected]>
Introduce flag can_power_off in device structure to support runtime
power off/on.
Note that, for a specific device driver,
"support runtime power off/on" means that the driver .runtime_suspend
callback needs to
1) save all the context so that it can restore the device back to the previous
working state after powered on.
2) set can_power_off flag to tell the driver model that it's ready for power off.
The following example shows how this works.
device A
|---------|
v v
device B device C
A is the parent of device B and device C, and device A/B/C shares the
same power logic
(Only device A knows how to turn on/off the power).
In order to power off A, B, C at runtime,
1) device B and device C should support runtime power off
(runtime suspended with can_power_off flag set)
2) pm idle request for device A is fired by runtime PM core.
3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
4) if succeed, it means all its children have been ready for power off
and it can turn off the power at any time.
5) if failed, it means at least one of its children does not support runtime
power off, thus the power can not be turned off.
Signed-off-by: Zhang Rui <[email protected]>
---
include/linux/pm.h | 1 +
include/linux/pm_runtime.h | 30 ++++++++++++++++++++++++++++++
2 files changed, 31 insertions(+), 0 deletions(-)
diff --git a/include/linux/pm.h b/include/linux/pm.h
index e4982ac..4a09c76 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -474,6 +474,7 @@ struct dev_pm_info {
bool is_prepared:1; /* Owned by the PM core */
bool is_suspended:1; /* Ditto */
bool ignore_children:1;
+ bool can_power_off:1;
spinlock_t lock;
#ifdef CONFIG_PM_SLEEP
struct list_head entry;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 609daae..81f3f13 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -100,6 +100,33 @@ static inline void pm_runtime_mark_last_busy(struct device *dev)
ACCESS_ONCE(dev->power.last_busy) = jiffies;
}
+static inline bool pm_runtime_can_power_off(struct device *dev)
+{
+ return !!dev->power.can_power_off;
+}
+
+static inline int check_fn(struct device *dev, void *data)
+{
+ return pm_runtime_can_power_off(dev) ? 0 : -1;
+}
+
+static inline bool pm_runtime_allow_power_off(struct device *dev)
+{
+ return device_for_each_child(dev, NULL, check_fn) ? false : true;
+}
+
+static inline void pm_runtime_enable_power_off(struct device *dev)
+{
+ if (!dev->power.is_prepared)
+ dev->power.can_power_off = pm_runtime_allow_power_off(dev);
+}
+
+static inline void pm_runtime_disable_power_off(struct device *dev)
+{
+ if (!dev->power.is_prepared)
+ dev->power.can_power_off = false;
+}
+
#else /* !CONFIG_PM_RUNTIME */
static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
@@ -149,6 +176,9 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev,
int delay) {}
static inline unsigned long pm_runtime_autosuspend_expiration(
struct device *dev) { return 0; }
+static inline bool pm_runtime_can_power_off(struct device *dev) { return false; }
+static inline void pm_runtime_enable_power_off(struct device *dev) {}
+static inline void pm_runtime_disable_power_off(struct device *dev) {}
static inline void pm_runtime_update_max_time_suspended(struct device *dev,
s64 delta_ns) {}
--
1.7.2.5
From: Zhang Rui <[email protected]>
When an ACPI Power Resource is turned on, we should runtime
resume all the devices covered by this ACPI Power Resource.
So that they have a chance to runtime suspend again.
Or else, they will be in uninitialized state after powered on.
Signed-off-by: Zhang Rui <[email protected]>
---
drivers/acpi/power.c | 9 +++++++++
1 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index a7e2305..86791e3 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -40,6 +40,7 @@
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
+#include <linux/pm_runtime.h>
#include <acpi/acpi_bus.h>
#include <acpi/acpi_drivers.h>
#include "sleep.h"
@@ -201,6 +202,7 @@ static int acpi_power_get_list_state(struct acpi_handle_list *list, int *state)
static int __acpi_power_on(struct acpi_power_resource *resource)
{
acpi_status status = AE_OK;
+ struct acpi_powered_device *apd;
status = acpi_evaluate_object(resource->device->handle, "_ON", NULL, NULL);
if (ACPI_FAILURE(status))
@@ -212,6 +214,13 @@ static int __acpi_power_on(struct acpi_power_resource *resource)
ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Power resource [%s] turned on\n",
resource->name));
+ /* resume all the devices when power resource is turned on */
+ list_for_each_entry(apd, &resource->devices, node){
+ pm_request_resume(apd->dev);
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "resume device %s\n",
+ apd->dev->kobj.name));
+ }
+
return 0;
}
--
1.7.2.5
From: Zhang Rui <[email protected]>
If a device has _PR3._ON, it means the device supports D3_HOT.
If a device has _PR3._OFF, it means the device supports D3_COLD.
Add the ability to validate and enter D3_COLD state in ACPI.
Signed-off-by: Zhang Rui <[email protected]>
---
drivers/acpi/power.c | 4 ++--
drivers/acpi/scan.c | 10 +++++++++-
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 9ac2a9f..0d681fb 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -500,14 +500,14 @@ int acpi_power_transition(struct acpi_device *device, int state)
{
int result;
- if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3))
+ if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
return -EINVAL;
if (device->power.state == state)
return 0;
if ((device->power.state < ACPI_STATE_D0)
- || (device->power.state > ACPI_STATE_D3))
+ || (device->power.state > ACPI_STATE_D3_COLD))
return -ENODEV;
/* TBD: Resources must be ordered. */
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 8ab80ba..a9d4391 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -881,8 +881,16 @@ static int acpi_bus_get_power_flags(struct acpi_device *device)
device->power.flags.power_resources = 1;
ps->flags.valid = 1;
- for (j = 0; j < ps->resources.count; j++)
+ for (j = 0; j < ps->resources.count; j++) {
acpi_bus_add_power_resource(ps->resources.handles[j]);
+ /* Check for D3_COLD support. _PR3._OFF equals D3_COLD ? */
+ if (i == ACPI_STATE_D3) {
+ if (j == 0)
+ device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1;
+ status = acpi_get_handle(ps->resources.handles[j], "_OFF", &handle);
+ device->power.states[ACPI_STATE_D3_COLD].flags.valid &= ACPI_SUCCESS(status);
+ }
+ }
}
/* Evaluate "_PSx" to see if we can do explicit sets */
--
1.7.2.5
On Mon, 13 Feb 2012, Lin Ming wrote:
> From: Zhang Rui <[email protected]>
>
> Introduce flag can_power_off in device structure to support runtime
> power off/on.
>
> Note that, for a specific device driver,
> "support runtime power off/on" means that the driver .runtime_suspend
> callback needs to
> 1) save all the context so that it can restore the device back to the previous
> working state after powered on.
> 2) set can_power_off flag to tell the driver model that it's ready for power off.
>
> The following example shows how this works.
>
> device A
> |---------|
> v v
> device B device C
>
> A is the parent of device B and device C, and device A/B/C shares the
> same power logic
> (Only device A knows how to turn on/off the power).
>
> In order to power off A, B, C at runtime,
> 1) device B and device C should support runtime power off
> (runtime suspended with can_power_off flag set)
> 2) pm idle request for device A is fired by runtime PM core.
> 3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
> 4) if succeed, it means all its children have been ready for power off
> and it can turn off the power at any time.
> 5) if failed, it means at least one of its children does not support runtime
> power off, thus the power can not be turned off.
I'm not sure if this is really the right approach. What you're trying
to do is implement two different low-power states, basically D3hot and
D3cold. Currently the runtime PM core doesn't support such things; all
it knows about is low power and full power.
Before doing an ad-hoc implementation, it would be best to step back
and think about other subsystems. Other sorts of devices may well have
multiple low-power states. What's the best way for this to be
supported by the PM core?
Alan Stern
On Monday, February 13, 2012, Alan Stern wrote:
> On Mon, 13 Feb 2012, Lin Ming wrote:
>
> > From: Zhang Rui <[email protected]>
> >
> > Introduce flag can_power_off in device structure to support runtime
> > power off/on.
> >
> > Note that, for a specific device driver,
> > "support runtime power off/on" means that the driver .runtime_suspend
> > callback needs to
> > 1) save all the context so that it can restore the device back to the previous
> > working state after powered on.
> > 2) set can_power_off flag to tell the driver model that it's ready for power off.
> >
> > The following example shows how this works.
> >
> > device A
> > |---------|
> > v v
> > device B device C
> >
> > A is the parent of device B and device C, and device A/B/C shares the
> > same power logic
> > (Only device A knows how to turn on/off the power).
> >
> > In order to power off A, B, C at runtime,
> > 1) device B and device C should support runtime power off
> > (runtime suspended with can_power_off flag set)
> > 2) pm idle request for device A is fired by runtime PM core.
> > 3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
> > 4) if succeed, it means all its children have been ready for power off
> > and it can turn off the power at any time.
> > 5) if failed, it means at least one of its children does not support runtime
> > power off, thus the power can not be turned off.
>
> I'm not sure if this is really the right approach. What you're trying
> to do is implement two different low-power states, basically D3hot and
> D3cold. Currently the runtime PM core doesn't support such things; all
> it knows about is low power and full power.
I'd rather say all it knows about is "suspended" and "active", which mean
"the device is not processing I/O" and "the device may be processing I/O",
respectively. A "suspended" device may or may not be in a low-power state,
but the runtime PM core doesn't care about that.
> Before doing an ad-hoc implementation, it would be best to step back
> and think about other subsystems. Other sorts of devices may well have
> multiple low-power states. What's the best way for this to be
> supported by the PM core?
Well, I honestly don't think there's any way they all can be covered at the
same time and that's why we chose to support only "suspended" and "active"
as defined above. The handling of multiple low-power states must be
implemented outside of the runtime PM core (like in the PCI core, for example).
Thanks,
Rafael
On Monday, February 13, 2012, Lin Ming wrote:
> From: Zhang Rui <[email protected]>
>
> If a device has _PR3._ON, it means the device supports D3_HOT.
> If a device has _PR3._OFF, it means the device supports D3_COLD.
> Add the ability to validate and enter D3_COLD state in ACPI.
>
> Signed-off-by: Zhang Rui <[email protected]>
This is supposed to be ACPI 5.0 support, right?
So can anyone please tell me what part of the ACPI 5.0 spec is the
basis of this patch, because I can't see that immediately?
The only places where D3Cold is _mentioned_ are Section 7.2.12 (_PRE, which
appears to be new in 5.0), Section 7.2.20 (_S0W), Section 7.2.21 (_S1W),
Section 7.2.22 (_S2W), Section 7.2.23 (_S3W) and Section 7.2.24 (_S4W).
None of them mentions those _PR3._ON and _PR3._OFF things above.
Moreover, my understanding of the spec is that D3Cold means all of the
power resources returned by _PR3 are "off" (whereas some of them will be
"on" in D3hot).
> ---
> drivers/acpi/power.c | 4 ++--
> drivers/acpi/scan.c | 10 +++++++++-
> 2 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> index 9ac2a9f..0d681fb 100644
> --- a/drivers/acpi/power.c
> +++ b/drivers/acpi/power.c
> @@ -500,14 +500,14 @@ int acpi_power_transition(struct acpi_device *device, int state)
> {
> int result;
>
> - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3))
> + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
> return -EINVAL;
>
> if (device->power.state == state)
> return 0;
>
> if ((device->power.state < ACPI_STATE_D0)
> - || (device->power.state > ACPI_STATE_D3))
> + || (device->power.state > ACPI_STATE_D3_COLD))
> return -ENODEV;
>
> /* TBD: Resources must be ordered. */
> diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
> index 8ab80ba..a9d4391 100644
> --- a/drivers/acpi/scan.c
> +++ b/drivers/acpi/scan.c
> @@ -881,8 +881,16 @@ static int acpi_bus_get_power_flags(struct acpi_device *device)
>
> device->power.flags.power_resources = 1;
> ps->flags.valid = 1;
> - for (j = 0; j < ps->resources.count; j++)
> + for (j = 0; j < ps->resources.count; j++) {
> acpi_bus_add_power_resource(ps->resources.handles[j]);
> + /* Check for D3_COLD support. _PR3._OFF equals D3_COLD ? */
> + if (i == ACPI_STATE_D3) {
> + if (j == 0)
> + device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1;
> + status = acpi_get_handle(ps->resources.handles[j], "_OFF", &handle);
> + device->power.states[ACPI_STATE_D3_COLD].flags.valid &= ACPI_SUCCESS(status);
> + }
> + }
Sorry, but this doesn't make sense to me. Power resources always have
the _OFF method, right?
> }
>
> /* Evaluate "_PSx" to see if we can do explicit sets */
>
Rafael
On Mon, 13 Feb 2012, Rafael J. Wysocki wrote:
> > I'm not sure if this is really the right approach. What you're trying
> > to do is implement two different low-power states, basically D3hot and
> > D3cold. Currently the runtime PM core doesn't support such things; all
> > it knows about is low power and full power.
>
> I'd rather say all it knows about is "suspended" and "active", which mean
> "the device is not processing I/O" and "the device may be processing I/O",
> respectively. A "suspended" device may or may not be in a low-power state,
> but the runtime PM core doesn't care about that.
Yes, okay. We can say that this patch tries to implement two different
"suspended" states, basically "low power" and "power off" (or D3hot and
D3cold).
> > Before doing an ad-hoc implementation, it would be best to step back
> > and think about other subsystems. Other sorts of devices may well have
> > multiple low-power states. What's the best way for this to be
> > supported by the PM core?
>
> Well, I honestly don't think there's any way they all can be covered at the
> same time and that's why we chose to support only "suspended" and "active"
> as defined above. The handling of multiple low-power states must be
> implemented outside of the runtime PM core (like in the PCI core, for example).
That's the point. If this is to be implemented outside of the runtime
PM core, should the patch be allowed to add new fields to struct
dev_pm_info (which has to be shared among all subsystems)?
Or to put it another way, if we do add new fields to struct dev_pm_info
(like can_power_off) in order to help support multiple "suspended"
states, shouldn't these new fields be such that they can be used by
many different subsystems rather than being special for the
full-power/no-power situation?
Likewise, should new routines like pm_runtime_allow_power_off() be
added to the runtime PM core if they are going to be used just by PCI?
Alan Stern
On Monday, February 13, 2012, Lin Ming wrote:
> From: Zhang Rui <[email protected]>
>
> ACPI Power Resource can power on/off a couple of devices.
> Introduce interfaces to register/unregister a device to/from
> an ACPI Power Resource.
>
> Signed-off-by: Zhang Rui <[email protected]>
> ---
> drivers/acpi/power.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++
> include/acpi/acpi_bus.h | 2 +
> 2 files changed, 130 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> index 0d681fb..a7e2305 100644
> --- a/drivers/acpi/power.c
> +++ b/drivers/acpi/power.c
> @@ -84,10 +84,25 @@ struct acpi_power_resource {
> u32 order;
> unsigned int ref_count;
> struct mutex resource_lock;
> + struct list_head devices; /* list for devices powered by this PR */
> };
>
> static struct list_head acpi_power_resource_list;
>
> +/*
> + * When a power resource is turned on, all the devices are put to an uninitialized
> + * state although their ACPI states are still D0.
> + * To handle this, we need to keep a list of all devices powered by the power resource,
> + * and resume all of them.
> + * Currently, we only support this case:
> + * 1) multiple devices share the same power resoruce,
> + * 2) One device uses One Power Resource ONLY.
This is incorrect. We actually support all combinations. However, we don't
generally support checking what devices depend on the given power resource.
> + */
> +struct acpi_powered_device {
> + struct list_head node;
> + struct device *dev;
> +};
> +
> /* --------------------------------------------------------------------------
> Power Resource Management
> -------------------------------------------------------------------------- */
> @@ -455,6 +470,118 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
> Device Power Management
> -------------------------------------------------------------------------- */
>
> +int acpi_power_resource_register_device(struct device *dev, acpi_handle handle)
> +{
> + struct acpi_power_resource *resource;
> + struct acpi_device *acpi_dev, *res_dev;
> + acpi_handle res_handle = NULL;
> + struct acpi_powered_device *apd;
> + int i;
> + int ret;
> +
> + if (!handle || !dev)
> + return -EINVAL;
> +
> + ret = acpi_bus_get_device(handle, &acpi_dev);
> + if (ret)
> + goto no_power_resource;
> +
> + if (!acpi_dev->power.flags.power_resources)
> + goto no_power_resource;
> +
> + for (i = 0; i <= ACPI_STATE_D3; i++) {
> + struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
> +
> + if (!ps->flags.valid)
> + continue;
> +
> + if (ps->resources.count > 1)
> + return 0;
> +
> + if (!res_handle)
> + res_handle = ps->resources.handles[0];
> + else if (res_handle != ps->resources.handles[0])
> + return 0;
> + }
I'm not sure what the above checks are needed for. It seems that this function
will only be called from acpi_bus_add_power_resource() (which needs to be
modified for this purpose, BTW), so it doesn't need to check all those things
(those checks have been made already).
> +
> + ret = acpi_bus_get_device(res_handle, &res_dev);
> + if (ret)
> + goto no_power_resource;
> +
> + resource = res_dev->driver_data;
> + if (!resource)
> + goto no_power_resource;
> +
> + apd = kzalloc(sizeof(struct acpi_powered_device), GFP_KERNEL);
> + if (!apd)
> + return -ENOMEM;
> +
> + apd->dev = dev;
> + mutex_lock(&resource->resource_lock);
> + list_add_tail(&apd->node, &resource->devices);
> + mutex_unlock(&resource->resource_lock);
> + printk(KERN_INFO PREFIX "Device %s uses Power Resource %s\n", dev->kobj.name, acpi_device_name(res_dev));
> +
> + return 0;
> +
> +no_power_resource:
> + printk(KERN_WARNING PREFIX "Invalid Power Resource to register!");
> + return -ENODEV;
> +}
> +
> +void acpi_power_resource_unregister_device(struct device *dev, acpi_handle handle)
> +{
Who is supposed to call this one?
> + struct acpi_power_resource *resource;
> + struct acpi_device *acpi_dev, *res_dev;
> + acpi_handle res_handle = NULL;
> + struct acpi_powered_device *apd, *tmp;
> + int i;
> + int ret;
> +
> + if (!handle || !dev)
> + return;
> +
> + ret = acpi_bus_get_device(handle, &acpi_dev);
> + if (ret)
> + return;
> +
> + if (!acpi_dev->power.flags.power_resources)
> + return;
> +
> + for (i = 0; i <= ACPI_STATE_D3; i++) {
> + struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
> +
> + if (!ps->flags.valid)
> + continue;
> +
> + if (ps->resources.count > 1)
> + return;
> +
> + res_handle = ps->resources.handles[0];
> + break;
> + }
> +
> + if (!res_handle)
> + return;
> +
> + ret = acpi_bus_get_device(res_handle, &res_dev);
> + if (ret)
> + return;
> +
> + resource = res_dev->driver_data;
> + if (!resource)
> + return;
> +
> + mutex_lock(&resource->resource_lock);
> + list_for_each_entry_safe(apd, tmp, &resource->devices, node) {
> + if (apd->dev == dev) {
> + list_del(&apd->node);
> + kfree(apd);
> + }
> + }
> + mutex_unlock(&resource->resource_lock);
> +}
> +
> int acpi_power_get_inferred_state(struct acpi_device *device, int *state)
> {
> int result = 0;
> @@ -550,6 +677,7 @@ static int acpi_power_add(struct acpi_device *device)
>
> resource->device = device;
> mutex_init(&resource->resource_lock);
> + INIT_LIST_HEAD(&resource->devices);
> strcpy(resource->name, device->pnp.bus_id);
> strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME);
> strcpy(acpi_device_class(device), ACPI_POWER_CLASS);
> diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> index 6cd5b64..32fb899 100644
> --- a/include/acpi/acpi_bus.h
> +++ b/include/acpi/acpi_bus.h
> @@ -322,6 +322,8 @@ int acpi_bus_get_status(struct acpi_device *device);
> int acpi_bus_set_power(acpi_handle handle, int state);
> int acpi_bus_update_power(acpi_handle handle, int *state_p);
> bool acpi_bus_power_manageable(acpi_handle handle);
> +int acpi_power_resource_register_device(struct device *dev, acpi_handle handle);
> +void acpi_power_resource_unregister_device(struct device *dev, acpi_handle handle);
> bool acpi_bus_can_wakeup(acpi_handle handle);
> #ifdef CONFIG_ACPI_PROC_EVENT
> int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data);
Thanks,
Rafael
On Monday, February 13, 2012, Lin Ming wrote:
> acpi_dev_run_wake is a generic function which can be used by
> other subsystem too. Rename it to acpi_pm_device_run_wake, to be
> consistent with acpi_pm_device_sleep_wake.
>
> Then move it to acpi core.
>
> Signed-off-by: Lin Ming <[email protected]>
OK, why not.
Acked-by: Rafael J. Wysocki <[email protected]>
> ---
> drivers/acpi/sleep.c | 35 +++++++++++++++++++++++++++++++++++
> drivers/pci/pci-acpi.c | 40 +++-------------------------------------
> include/acpi/acpi_bus.h | 5 +++++
> 3 files changed, 43 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
> index ca191ff..00fa664 100644
> --- a/drivers/acpi/sleep.c
> +++ b/drivers/acpi/sleep.c
> @@ -17,6 +17,7 @@
> #include <linux/suspend.h>
> #include <linux/reboot.h>
> #include <linux/acpi.h>
> +#include <linux/pm_runtime.h>
>
> #include <asm/io.h>
>
> @@ -730,6 +731,40 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p)
>
> #ifdef CONFIG_PM_SLEEP
> /**
> + * acpi_pm_device_run_wake - Enable/disable wake-up for given device.
> + * @phys_dev: Device to enable/disable the platform to wake-up the system for.
> + * @enable: Whether enable or disable the wake-up functionality.
> + *
> + * Find the ACPI device object corresponding to @pci_dev and try to
> + * enable/disable the GPE associated with it.
> + */
> +int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
> +{
> + struct acpi_device *dev;
> + acpi_handle handle;
> +
> + if (!device_run_wake(phys_dev))
> + return -EINVAL;
> +
> + handle = DEVICE_ACPI_HANDLE(phys_dev);
> + if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &dev))) {
> + dev_dbg(phys_dev, "ACPI handle has no context in %s!\n",
> + __func__);
> + return -ENODEV;
> + }
> +
> + if (enable) {
> + acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0);
> + acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
> + } else {
> + acpi_disable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
> + acpi_disable_wakeup_device_power(dev);
> + }
> +
> + return 0;
> +}
> +
> +/**
> * acpi_pm_device_sleep_wake - enable or disable the system wake-up
> * capability of given device
> * @dev: device to handle
> diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
> index 060fd22..0f150f2 100644
> --- a/drivers/pci/pci-acpi.c
> +++ b/drivers/pci/pci-acpi.c
> @@ -277,40 +277,6 @@ static int acpi_pci_sleep_wake(struct pci_dev *dev, bool enable)
> return 0;
> }
>
> -/**
> - * acpi_dev_run_wake - Enable/disable wake-up for given device.
> - * @phys_dev: Device to enable/disable the platform to wake-up the system for.
> - * @enable: Whether enable or disable the wake-up functionality.
> - *
> - * Find the ACPI device object corresponding to @pci_dev and try to
> - * enable/disable the GPE associated with it.
> - */
> -static int acpi_dev_run_wake(struct device *phys_dev, bool enable)
> -{
> - struct acpi_device *dev;
> - acpi_handle handle;
> -
> - if (!device_run_wake(phys_dev))
> - return -EINVAL;
> -
> - handle = DEVICE_ACPI_HANDLE(phys_dev);
> - if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &dev))) {
> - dev_dbg(phys_dev, "ACPI handle has no context in %s!\n",
> - __func__);
> - return -ENODEV;
> - }
> -
> - if (enable) {
> - acpi_enable_wakeup_device_power(dev, ACPI_STATE_S0);
> - acpi_enable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
> - } else {
> - acpi_disable_gpe(dev->wakeup.gpe_device, dev->wakeup.gpe_number);
> - acpi_disable_wakeup_device_power(dev);
> - }
> -
> - return 0;
> -}
> -
> static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable)
> {
> while (bus->parent) {
> @@ -318,14 +284,14 @@ static void acpi_pci_propagate_run_wake(struct pci_bus *bus, bool enable)
>
> if (bridge->pme_interrupt)
> return;
> - if (!acpi_dev_run_wake(&bridge->dev, enable))
> + if (!acpi_pm_device_run_wake(&bridge->dev, enable))
> return;
> bus = bus->parent;
> }
>
> /* We have reached the root bus. */
> if (bus->bridge)
> - acpi_dev_run_wake(bus->bridge, enable);
> + acpi_pm_device_run_wake(bus->bridge, enable);
> }
>
> static int acpi_pci_run_wake(struct pci_dev *dev, bool enable)
> @@ -333,7 +299,7 @@ static int acpi_pci_run_wake(struct pci_dev *dev, bool enable)
> if (dev->pme_interrupt)
> return 0;
>
> - if (!acpi_dev_run_wake(&dev->dev, enable))
> + if (!acpi_pm_device_run_wake(&dev->dev, enable))
> return 0;
>
> acpi_pci_propagate_run_wake(dev->bus, enable);
> diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> index 32fb899..2d556e3 100644
> --- a/include/acpi/acpi_bus.h
> +++ b/include/acpi/acpi_bus.h
> @@ -394,8 +394,13 @@ static inline int acpi_pm_device_sleep_state(struct device *d, int *p)
> #endif
>
> #ifdef CONFIG_PM_SLEEP
> +int acpi_pm_device_run_wake(struct device *, bool);
> int acpi_pm_device_sleep_wake(struct device *, bool);
> #else
> +static inline int acpi_pm_device_run_wake(struct device *dev, bool enable)
> +{
> + return -ENODEV;
> +}
> static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable)
> {
> return -ENODEV;
>
On Monday, February 13, 2012, Alan Stern wrote:
> On Mon, 13 Feb 2012, Rafael J. Wysocki wrote:
>
> > > I'm not sure if this is really the right approach. What you're trying
> > > to do is implement two different low-power states, basically D3hot and
> > > D3cold. Currently the runtime PM core doesn't support such things; all
> > > it knows about is low power and full power.
> >
> > I'd rather say all it knows about is "suspended" and "active", which mean
> > "the device is not processing I/O" and "the device may be processing I/O",
> > respectively. A "suspended" device may or may not be in a low-power state,
> > but the runtime PM core doesn't care about that.
>
> Yes, okay. We can say that this patch tries to implement two different
> "suspended" states, basically "low power" and "power off" (or D3hot and
> D3cold).
>
> > > Before doing an ad-hoc implementation, it would be best to step back
> > > and think about other subsystems. Other sorts of devices may well have
> > > multiple low-power states. What's the best way for this to be
> > > supported by the PM core?
> >
> > Well, I honestly don't think there's any way they all can be covered at the
> > same time and that's why we chose to support only "suspended" and "active"
> > as defined above. The handling of multiple low-power states must be
> > implemented outside of the runtime PM core (like in the PCI core, for example).
>
> That's the point. If this is to be implemented outside of the runtime
> PM core, should the patch be allowed to add new fields to struct
> dev_pm_info (which has to be shared among all subsystems)?
>
> Or to put it another way, if we do add new fields to struct dev_pm_info
> (like can_power_off) in order to help support multiple "suspended"
> states, shouldn't these new fields be such that they can be used by
> many different subsystems rather than being special for the
> full-power/no-power situation?
>
> Likewise, should new routines like pm_runtime_allow_power_off() be
> added to the runtime PM core if they are going to be used just by PCI?
No, they shouldn't.
Thanks,
Rafael
On 一, 2012-02-13 at 10:01 -0500, Alan Stern wrote:
> On Mon, 13 Feb 2012, Lin Ming wrote:
>
> > From: Zhang Rui <[email protected]>
> >
> > Introduce flag can_power_off in device structure to support runtime
> > power off/on.
> >
> > Note that, for a specific device driver,
> > "support runtime power off/on" means that the driver .runtime_suspend
> > callback needs to
> > 1) save all the context so that it can restore the device back to the previous
> > working state after powered on.
> > 2) set can_power_off flag to tell the driver model that it's ready for power off.
> >
> > The following example shows how this works.
> >
> > device A
> > |---------|
> > v v
> > device B device C
> >
> > A is the parent of device B and device C, and device A/B/C shares the
> > same power logic
> > (Only device A knows how to turn on/off the power).
> >
> > In order to power off A, B, C at runtime,
> > 1) device B and device C should support runtime power off
> > (runtime suspended with can_power_off flag set)
> > 2) pm idle request for device A is fired by runtime PM core.
> > 3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
> > 4) if succeed, it means all its children have been ready for power off
> > and it can turn off the power at any time.
> > 5) if failed, it means at least one of its children does not support runtime
> > power off, thus the power can not be turned off.
>
> I'm not sure if this is really the right approach. What you're trying
> to do is implement two different low-power states, basically D3hot and
> D3cold. Currently the runtime PM core doesn't support such things; all
> it knows about is low power and full power.
>
Exactly.
what I'm trying to fix here is to add a "special" runtime low power
state, aka, power off.
> Before doing an ad-hoc implementation, it would be best to step back
> and think about other subsystems. Other sorts of devices may well have
> multiple low-power states. What's the best way for this to be
> supported by the PM core?
>
I thought about this before, e.g. introduce support for multiple runtime
low power states in runtime PM core, like suspend/hibernate for system
low power states. But I'm not sure if this is workable because the low
power states varies between devices/buses/platforms.
So I decided to introduce a special low power state, aka, runtime power
off, first, which means the same thing to different
devices/buses/platforms.
thanks,
rui
On 一, 2012-02-13 at 20:38 +0100, Rafael J. Wysocki wrote:
> On Monday, February 13, 2012, Alan Stern wrote:
> > On Mon, 13 Feb 2012, Lin Ming wrote:
> >
> > > From: Zhang Rui <[email protected]>
> > >
> > > Introduce flag can_power_off in device structure to support runtime
> > > power off/on.
> > >
> > > Note that, for a specific device driver,
> > > "support runtime power off/on" means that the driver .runtime_suspend
> > > callback needs to
> > > 1) save all the context so that it can restore the device back to the previous
> > > working state after powered on.
> > > 2) set can_power_off flag to tell the driver model that it's ready for power off.
> > >
> > > The following example shows how this works.
> > >
> > > device A
> > > |---------|
> > > v v
> > > device B device C
> > >
> > > A is the parent of device B and device C, and device A/B/C shares the
> > > same power logic
> > > (Only device A knows how to turn on/off the power).
> > >
> > > In order to power off A, B, C at runtime,
> > > 1) device B and device C should support runtime power off
> > > (runtime suspended with can_power_off flag set)
> > > 2) pm idle request for device A is fired by runtime PM core.
> > > 3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
> > > 4) if succeed, it means all its children have been ready for power off
> > > and it can turn off the power at any time.
> > > 5) if failed, it means at least one of its children does not support runtime
> > > power off, thus the power can not be turned off.
> >
> > I'm not sure if this is really the right approach. What you're trying
> > to do is implement two different low-power states, basically D3hot and
> > D3cold. Currently the runtime PM core doesn't support such things; all
> > it knows about is low power and full power.
>
> I'd rather say all it knows about is "suspended" and "active", which mean
> "the device is not processing I/O" and "the device may be processing I/O",
> respectively. A "suspended" device may or may not be in a low-power state,
> but the runtime PM core doesn't care about that.
>
yes, I know that.
> > Before doing an ad-hoc implementation, it would be best to step back
> > and think about other subsystems. Other sorts of devices may well have
> > multiple low-power states. What's the best way for this to be
> > supported by the PM core?
>
> Well, I honestly don't think there's any way they all can be covered at the
> same time and that's why we chose to support only "suspended" and "active"
> as defined above.
> The handling of multiple low-power states must be
> implemented outside of the runtime PM core (like in the PCI core, for example).
Surely I'd prefer to implement it in the bus code, :), but the problem
is that several buses maybe involved at the same time.
Let's take ZPODD for example,
ZPODD is attached to a SATA port. Only SATA port knows that it can be
runtime powered off, because its ACPI node has _PR3._OFF.
But when ATA layer code tries to put SATA port to D3_COLD at runtime,it
must make sure all the devices/drivers in the same power domain are
ready for power off, and in this case, we need to get this info from
SCSI layer.
thanks,
rui
Hi, Rafael,
On 一, 2012-02-13 at 21:25 +0100, Rafael J. Wysocki wrote:
> On Monday, February 13, 2012, Lin Ming wrote:
> > From: Zhang Rui <[email protected]>
> >
> > If a device has _PR3._ON, it means the device supports D3_HOT.
> > If a device has _PR3._OFF, it means the device supports D3_COLD.
> > Add the ability to validate and enter D3_COLD state in ACPI.
> >
> > Signed-off-by: Zhang Rui <[email protected]>
>
> This is supposed to be ACPI 5.0 support, right?
>
No, D3_HOT is introduced in ACPI spec 4.0.
According to the spec, _PR3 is used for devices that support both
D3(D3_COLD) and D3HOT.
The confusion here is that Linux D3 equals ACPICA D3HOT and Linux
D3_COLD equals ACPICA D3.
For example, when enter Linux ACPI D3, the reference count of ACPI Power
Resources in _PR3 is increased by one.
> So can anyone please tell me what part of the ACPI 5.0 spec is the
> basis of this patch, because I can't see that immediately?
>
> The only places where D3Cold is _mentioned_ are Section 7.2.12 (_PRE, which
> appears to be new in 5.0), Section 7.2.20 (_S0W), Section 7.2.21 (_S1W),
> Section 7.2.22 (_S2W), Section 7.2.23 (_S3W) and Section 7.2.24 (_S4W).
> None of them mentions those _PR3._ON and _PR3._OFF things above.
>
> Moreover, my understanding of the spec is that D3Cold means all of the
> power resources returned by _PR3 are "off" (whereas some of them will be
> "on" in D3hot).
>
Agreed.
> > ---
> > drivers/acpi/power.c | 4 ++--
> > drivers/acpi/scan.c | 10 +++++++++-
> > 2 files changed, 11 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> > index 9ac2a9f..0d681fb 100644
> > --- a/drivers/acpi/power.c
> > +++ b/drivers/acpi/power.c
> > @@ -500,14 +500,14 @@ int acpi_power_transition(struct acpi_device *device, int state)
> > {
> > int result;
> >
> > - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3))
> > + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
> > return -EINVAL;
> >
> > if (device->power.state == state)
> > return 0;
> >
> > if ((device->power.state < ACPI_STATE_D0)
> > - || (device->power.state > ACPI_STATE_D3))
> > + || (device->power.state > ACPI_STATE_D3_COLD))
> > return -ENODEV;
> >
> > /* TBD: Resources must be ordered. */
> > diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
> > index 8ab80ba..a9d4391 100644
> > --- a/drivers/acpi/scan.c
> > +++ b/drivers/acpi/scan.c
> > @@ -881,8 +881,16 @@ static int acpi_bus_get_power_flags(struct acpi_device *device)
> >
> > device->power.flags.power_resources = 1;
> > ps->flags.valid = 1;
> > - for (j = 0; j < ps->resources.count; j++)
> > + for (j = 0; j < ps->resources.count; j++) {
> > acpi_bus_add_power_resource(ps->resources.handles[j]);
> > + /* Check for D3_COLD support. _PR3._OFF equals D3_COLD ? */
> > + if (i == ACPI_STATE_D3) {
> > + if (j == 0)
> > + device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1;
> > + status = acpi_get_handle(ps->resources.handles[j], "_OFF", &handle);
> > + device->power.states[ACPI_STATE_D3_COLD].flags.valid &= ACPI_SUCCESS(status);
> > + }
> > + }
>
> Sorry, but this doesn't make sense to me. Power resources always have
> the _OFF method, right?
>
I'm not sure. I thought I had seen ACPI Power Resources without _OFF
control method somewhere in bugzilla, but I can not find it out now.
Hmm, how about set D3_COLD support if _PR3 exists, but leave a warning
message if _OFF doesn't exist, for now?
thanks,
rui
Hi, Alan,
On 一, 2012-02-13 at 15:41 -0500, Alan Stern wrote:
> On Mon, 13 Feb 2012, Rafael J. Wysocki wrote:
>
> > > I'm not sure if this is really the right approach. What you're trying
> > > to do is implement two different low-power states, basically D3hot and
> > > D3cold. Currently the runtime PM core doesn't support such things; all
> > > it knows about is low power and full power.
> >
> > I'd rather say all it knows about is "suspended" and "active", which mean
> > "the device is not processing I/O" and "the device may be processing I/O",
> > respectively. A "suspended" device may or may not be in a low-power state,
> > but the runtime PM core doesn't care about that.
>
> Yes, okay. We can say that this patch tries to implement two different
> "suspended" states, basically "low power" and "power off" (or D3hot and
> D3cold).
>
Right!
> > > Before doing an ad-hoc implementation, it would be best to step back
> > > and think about other subsystems. Other sorts of devices may well have
> > > multiple low-power states. What's the best way for this to be
> > > supported by the PM core?
> >
> > Well, I honestly don't think there's any way they all can be covered at the
> > same time and that's why we chose to support only "suspended" and "active"
> > as defined above. The handling of multiple low-power states must be
> > implemented outside of the runtime PM core (like in the PCI core, for example).
>
> That's the point. If this is to be implemented outside of the runtime
> PM core, should the patch be allowed to add new fields to struct
> dev_pm_info (which has to be shared among all subsystems)?
>
Surely it shouldn't in this case.
> Or to put it another way, if we do add new fields to struct dev_pm_info
> (like can_power_off) in order to help support multiple "suspended"
> states, shouldn't these new fields be such that they can be used by
> many different subsystems rather than being special for the
> full-power/no-power situation?
>
My opinion is that the concept of "no-power state" is unique for all
devices/buses/platforms.
If any of them support this, they can use the routines without any
confusion.
thanks,
rui
On 一, 2012-02-13 at 21:48 +0100, Rafael J. Wysocki wrote:
> On Monday, February 13, 2012, Lin Ming wrote:
> > From: Zhang Rui <[email protected]>
> >
> > ACPI Power Resource can power on/off a couple of devices.
> > Introduce interfaces to register/unregister a device to/from
> > an ACPI Power Resource.
> >
> > Signed-off-by: Zhang Rui <[email protected]>
> > ---
> > drivers/acpi/power.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++
> > include/acpi/acpi_bus.h | 2 +
> > 2 files changed, 130 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> > index 0d681fb..a7e2305 100644
> > --- a/drivers/acpi/power.c
> > +++ b/drivers/acpi/power.c
> > @@ -84,10 +84,25 @@ struct acpi_power_resource {
> > u32 order;
> > unsigned int ref_count;
> > struct mutex resource_lock;
> > + struct list_head devices; /* list for devices powered by this PR */
> > };
> >
> > static struct list_head acpi_power_resource_list;
> >
> > +/*
> > + * When a power resource is turned on, all the devices are put to an uninitialized
> > + * state although their ACPI states are still D0.
> > + * To handle this, we need to keep a list of all devices powered by the power resource,
> > + * and resume all of them.
> > + * Currently, we only support this case:
> > + * 1) multiple devices share the same power resoruce,
> > + * 2) One device uses One Power Resource ONLY.
>
> This is incorrect. We actually support all combinations.
Agreed.
> However, we don't
> generally support checking what devices depend on the given power resource.
>
Yes.
But all the code in this patch is for runtime D3_COLD support only.
At least for the BIOS code on hand, a device that support runtime
D3_COLD uses one ACPI Power Resource only.
We can add the support for all combinations if there are such kind of
BIOS in the market, which I do not think there would be.
Besides, as a RFC version, I do not want to make a over designed
proposal at the beginning.
> > + */
> > +struct acpi_powered_device {
> > + struct list_head node;
> > + struct device *dev;
> > +};
> > +
> > /* --------------------------------------------------------------------------
> > Power Resource Management
> > -------------------------------------------------------------------------- */
> > @@ -455,6 +470,118 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
> > Device Power Management
> > -------------------------------------------------------------------------- */
> >
> > +int acpi_power_resource_register_device(struct device *dev, acpi_handle handle)
> > +{
> > + struct acpi_power_resource *resource;
> > + struct acpi_device *acpi_dev, *res_dev;
> > + acpi_handle res_handle = NULL;
> > + struct acpi_powered_device *apd;
> > + int i;
> > + int ret;
> > +
> > + if (!handle || !dev)
> > + return -EINVAL;
> > +
> > + ret = acpi_bus_get_device(handle, &acpi_dev);
> > + if (ret)
> > + goto no_power_resource;
> > +
> > + if (!acpi_dev->power.flags.power_resources)
> > + goto no_power_resource;
> > +
> > + for (i = 0; i <= ACPI_STATE_D3; i++) {
> > + struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
> > +
> > + if (!ps->flags.valid)
> > + continue;
> > +
> > + if (ps->resources.count > 1)
> > + return 0;
> > +
> > + if (!res_handle)
> > + res_handle = ps->resources.handles[0];
> > + else if (res_handle != ps->resources.handles[0])
> > + return 0;
> > + }
>
> I'm not sure what the above checks are needed for. It seems that this function
> will only be called from acpi_bus_add_power_resource() (which needs to be
> modified for this purpose, BTW), so it doesn't need to check all those things
> (those checks have been made already).
>
No. These two APIs are introduced to support the runtime D3_COLD remote
wakeup. And they should be invoked by drivers, either in driver code or
via bus layer code.
Say, ATA port, who has _PR3 in its ACPI node, knows that it can enter
D3_COLD at run time, and it supports remote wakeup in D3_COLD because it
has _S0W (return value 4).
When remote wakeup event is triggered, there is an ACPI event sent to
the ATA controller/port, which sets the ATA controller/port back to D0
state.
At this time, what we actually need is to resume the ZPODD, rather than
the ATA controller/port. To follow the runtime PM model (runtime resume
starts from leaf devices), ATA code can use these two APIs to tell ACPI
to runtime resume ZPODD device directly, because ZPODD is powered by
this Power Resource as well.
thanks,
rui
> > +
> > + ret = acpi_bus_get_device(res_handle, &res_dev);
> > + if (ret)
> > + goto no_power_resource;
> > +
> > + resource = res_dev->driver_data;
> > + if (!resource)
> > + goto no_power_resource;
> > +
> > + apd = kzalloc(sizeof(struct acpi_powered_device), GFP_KERNEL);
> > + if (!apd)
> > + return -ENOMEM;
> > +
> > + apd->dev = dev;
> > + mutex_lock(&resource->resource_lock);
> > + list_add_tail(&apd->node, &resource->devices);
> > + mutex_unlock(&resource->resource_lock);
> > + printk(KERN_INFO PREFIX "Device %s uses Power Resource %s\n", dev->kobj.name, acpi_device_name(res_dev));
> > +
> > + return 0;
> > +
> > +no_power_resource:
> > + printk(KERN_WARNING PREFIX "Invalid Power Resource to register!");
> > + return -ENODEV;
> > +}
> > +
> > +void acpi_power_resource_unregister_device(struct device *dev, acpi_handle handle)
> > +{
>
> Who is supposed to call this one?
>
> > + struct acpi_power_resource *resource;
> > + struct acpi_device *acpi_dev, *res_dev;
> > + acpi_handle res_handle = NULL;
> > + struct acpi_powered_device *apd, *tmp;
> > + int i;
> > + int ret;
> > +
> > + if (!handle || !dev)
> > + return;
> > +
> > + ret = acpi_bus_get_device(handle, &acpi_dev);
> > + if (ret)
> > + return;
> > +
> > + if (!acpi_dev->power.flags.power_resources)
> > + return;
> > +
> > + for (i = 0; i <= ACPI_STATE_D3; i++) {
> > + struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
> > +
> > + if (!ps->flags.valid)
> > + continue;
> > +
> > + if (ps->resources.count > 1)
> > + return;
> > +
> > + res_handle = ps->resources.handles[0];
> > + break;
> > + }
> > +
> > + if (!res_handle)
> > + return;
> > +
> > + ret = acpi_bus_get_device(res_handle, &res_dev);
> > + if (ret)
> > + return;
> > +
> > + resource = res_dev->driver_data;
> > + if (!resource)
> > + return;
> > +
> > + mutex_lock(&resource->resource_lock);
> > + list_for_each_entry_safe(apd, tmp, &resource->devices, node) {
> > + if (apd->dev == dev) {
> > + list_del(&apd->node);
> > + kfree(apd);
> > + }
> > + }
> > + mutex_unlock(&resource->resource_lock);
> > +}
> > +
> > int acpi_power_get_inferred_state(struct acpi_device *device, int *state)
> > {
> > int result = 0;
> > @@ -550,6 +677,7 @@ static int acpi_power_add(struct acpi_device *device)
> >
> > resource->device = device;
> > mutex_init(&resource->resource_lock);
> > + INIT_LIST_HEAD(&resource->devices);
> > strcpy(resource->name, device->pnp.bus_id);
> > strcpy(acpi_device_name(device), ACPI_POWER_DEVICE_NAME);
> > strcpy(acpi_device_class(device), ACPI_POWER_CLASS);
> > diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
> > index 6cd5b64..32fb899 100644
> > --- a/include/acpi/acpi_bus.h
> > +++ b/include/acpi/acpi_bus.h
> > @@ -322,6 +322,8 @@ int acpi_bus_get_status(struct acpi_device *device);
> > int acpi_bus_set_power(acpi_handle handle, int state);
> > int acpi_bus_update_power(acpi_handle handle, int *state_p);
> > bool acpi_bus_power_manageable(acpi_handle handle);
> > +int acpi_power_resource_register_device(struct device *dev, acpi_handle handle);
> > +void acpi_power_resource_unregister_device(struct device *dev, acpi_handle handle);
> > bool acpi_bus_can_wakeup(acpi_handle handle);
> > #ifdef CONFIG_ACPI_PROC_EVENT
> > int acpi_bus_generate_proc_event(struct acpi_device *device, u8 type, int data);
>
> Thanks,
> Rafael
On Tuesday, February 14, 2012, Zhang Rui wrote:
> Hi, Rafael,
>
> On 一, 2012-02-13 at 21:25 +0100, Rafael J. Wysocki wrote:
> > On Monday, February 13, 2012, Lin Ming wrote:
> > > From: Zhang Rui <[email protected]>
> > >
> > > If a device has _PR3._ON, it means the device supports D3_HOT.
> > > If a device has _PR3._OFF, it means the device supports D3_COLD.
> > > Add the ability to validate and enter D3_COLD state in ACPI.
> > >
> > > Signed-off-by: Zhang Rui <[email protected]>
> >
> > This is supposed to be ACPI 5.0 support, right?
> >
> No, D3_HOT is introduced in ACPI spec 4.0.
> According to the spec, _PR3 is used for devices that support both
> D3(D3_COLD) and D3HOT.
Yes, it does.
> The confusion here is that Linux D3 equals ACPICA D3HOT and Linux
> D3_COLD equals ACPICA D3.
> For example, when enter Linux ACPI D3, the reference count of ACPI Power
> Resources in _PR3 is increased by one.
That's correct.
> > So can anyone please tell me what part of the ACPI 5.0 spec is the
> > basis of this patch, because I can't see that immediately?
> >
> > The only places where D3Cold is _mentioned_ are Section 7.2.12 (_PRE, which
> > appears to be new in 5.0), Section 7.2.20 (_S0W), Section 7.2.21 (_S1W),
> > Section 7.2.22 (_S2W), Section 7.2.23 (_S3W) and Section 7.2.24 (_S4W).
> > None of them mentions those _PR3._ON and _PR3._OFF things above.
> >
> > Moreover, my understanding of the spec is that D3Cold means all of the
> > power resources returned by _PR3 are "off" (whereas some of them will be
> > "on" in D3hot).
> >
> Agreed.
>
> > > ---
> > > drivers/acpi/power.c | 4 ++--
> > > drivers/acpi/scan.c | 10 +++++++++-
> > > 2 files changed, 11 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> > > index 9ac2a9f..0d681fb 100644
> > > --- a/drivers/acpi/power.c
> > > +++ b/drivers/acpi/power.c
> > > @@ -500,14 +500,14 @@ int acpi_power_transition(struct acpi_device *device, int state)
> > > {
> > > int result;
> > >
> > > - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3))
> > > + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
> > > return -EINVAL;
> > >
> > > if (device->power.state == state)
> > > return 0;
> > >
> > > if ((device->power.state < ACPI_STATE_D0)
> > > - || (device->power.state > ACPI_STATE_D3))
> > > + || (device->power.state > ACPI_STATE_D3_COLD))
> > > return -ENODEV;
> > >
> > > /* TBD: Resources must be ordered. */
> > > diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
> > > index 8ab80ba..a9d4391 100644
> > > --- a/drivers/acpi/scan.c
> > > +++ b/drivers/acpi/scan.c
> > > @@ -881,8 +881,16 @@ static int acpi_bus_get_power_flags(struct acpi_device *device)
> > >
> > > device->power.flags.power_resources = 1;
> > > ps->flags.valid = 1;
> > > - for (j = 0; j < ps->resources.count; j++)
> > > + for (j = 0; j < ps->resources.count; j++) {
> > > acpi_bus_add_power_resource(ps->resources.handles[j]);
> > > + /* Check for D3_COLD support. _PR3._OFF equals D3_COLD ? */
> > > + if (i == ACPI_STATE_D3) {
> > > + if (j == 0)
> > > + device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1;
> > > + status = acpi_get_handle(ps->resources.handles[j], "_OFF", &handle);
> > > + device->power.states[ACPI_STATE_D3_COLD].flags.valid &= ACPI_SUCCESS(status);
> > > + }
> > > + }
> >
> > Sorry, but this doesn't make sense to me. Power resources always have
> > the _OFF method, right?
> >
> I'm not sure.
That would be explicitly against the spec that says that power resources
are *required* to have _ON, _OFF and _STA.
> I thought I had seen ACPI Power Resources without _OFF
> control method somewhere in bugzilla, but I can not find it out now.
That, clearly, is a firmware bug.
> Hmm, how about set D3_COLD support if _PR3 exists, but leave a warning
> message if _OFF doesn't exist, for now?
I don't think we need to set D3_COLD support at all. In fact, it is always
supported (as I said, if all power resources used by a device are off, the
device is in D3_COLD pretty much by definition).
Thanks,
Rafael
On Tuesday, February 14, 2012, Zhang Rui wrote:
> On 一, 2012-02-13 at 21:48 +0100, Rafael J. Wysocki wrote:
> > On Monday, February 13, 2012, Lin Ming wrote:
> > > From: Zhang Rui <[email protected]>
> > >
> > > ACPI Power Resource can power on/off a couple of devices.
> > > Introduce interfaces to register/unregister a device to/from
> > > an ACPI Power Resource.
> > >
> > > Signed-off-by: Zhang Rui <[email protected]>
> > > ---
> > > drivers/acpi/power.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++
> > > include/acpi/acpi_bus.h | 2 +
> > > 2 files changed, 130 insertions(+), 0 deletions(-)
> > >
> > > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> > > index 0d681fb..a7e2305 100644
> > > --- a/drivers/acpi/power.c
> > > +++ b/drivers/acpi/power.c
> > > @@ -84,10 +84,25 @@ struct acpi_power_resource {
> > > u32 order;
> > > unsigned int ref_count;
> > > struct mutex resource_lock;
> > > + struct list_head devices; /* list for devices powered by this PR */
> > > };
> > >
> > > static struct list_head acpi_power_resource_list;
> > >
> > > +/*
> > > + * When a power resource is turned on, all the devices are put to an uninitialized
> > > + * state although their ACPI states are still D0.
> > > + * To handle this, we need to keep a list of all devices powered by the power resource,
> > > + * and resume all of them.
> > > + * Currently, we only support this case:
> > > + * 1) multiple devices share the same power resoruce,
> > > + * 2) One device uses One Power Resource ONLY.
> >
> > This is incorrect. We actually support all combinations.
>
> Agreed.
>
> > However, we don't
> > generally support checking what devices depend on the given power resource.
> >
> Yes.
> But all the code in this patch is for runtime D3_COLD support only.
> At least for the BIOS code on hand, a device that support runtime
> D3_COLD uses one ACPI Power Resource only.
> We can add the support for all combinations if there are such kind of
> BIOS in the market, which I do not think there would be.
>
> Besides, as a RFC version, I do not want to make a over designed
> proposal at the beginning.
>
> > > + */
> > > +struct acpi_powered_device {
> > > + struct list_head node;
> > > + struct device *dev;
> > > +};
> > > +
> > > /* --------------------------------------------------------------------------
> > > Power Resource Management
> > > -------------------------------------------------------------------------- */
> > > @@ -455,6 +470,118 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
> > > Device Power Management
> > > -------------------------------------------------------------------------- */
> > >
> > > +int acpi_power_resource_register_device(struct device *dev, acpi_handle handle)
> > > +{
> > > + struct acpi_power_resource *resource;
> > > + struct acpi_device *acpi_dev, *res_dev;
> > > + acpi_handle res_handle = NULL;
> > > + struct acpi_powered_device *apd;
> > > + int i;
> > > + int ret;
> > > +
> > > + if (!handle || !dev)
> > > + return -EINVAL;
> > > +
> > > + ret = acpi_bus_get_device(handle, &acpi_dev);
> > > + if (ret)
> > > + goto no_power_resource;
> > > +
> > > + if (!acpi_dev->power.flags.power_resources)
> > > + goto no_power_resource;
> > > +
> > > + for (i = 0; i <= ACPI_STATE_D3; i++) {
> > > + struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
> > > +
> > > + if (!ps->flags.valid)
> > > + continue;
> > > +
> > > + if (ps->resources.count > 1)
> > > + return 0;
> > > +
> > > + if (!res_handle)
> > > + res_handle = ps->resources.handles[0];
> > > + else if (res_handle != ps->resources.handles[0])
> > > + return 0;
> > > + }
> >
> > I'm not sure what the above checks are needed for. It seems that this function
> > will only be called from acpi_bus_add_power_resource() (which needs to be
> > modified for this purpose, BTW), so it doesn't need to check all those things
> > (those checks have been made already).
> >
> No. These two APIs are introduced to support the runtime D3_COLD remote
> wakeup. And they should be invoked by drivers, either in driver code or
> via bus layer code.
>
> Say, ATA port, who has _PR3 in its ACPI node, knows that it can enter
> D3_COLD at run time, and it supports remote wakeup in D3_COLD because it
> has _S0W (return value 4).
> When remote wakeup event is triggered, there is an ACPI event sent to
> the ATA controller/port, which sets the ATA controller/port back to D0
> state.
> At this time, what we actually need is to resume the ZPODD, rather than
> the ATA controller/port. To follow the runtime PM model (runtime resume
> starts from leaf devices),
Well, this isn't the case. Parents are always resumed first.
> ATA code can use these two APIs to tell ACPI
> to runtime resume ZPODD device directly, because ZPODD is powered by
> this Power Resource as well.
I'm not exactly sure what you're trying to achieve and what you mean by
"resume a device directly"? Do you want to run the device's resume
callback at the time when another device is being resumed?
Rafael
On Tuesday, February 14, 2012, Zhang Rui wrote:
> Hi, Alan,
>
> On 一, 2012-02-13 at 15:41 -0500, Alan Stern wrote:
> > On Mon, 13 Feb 2012, Rafael J. Wysocki wrote:
> >
> > > > I'm not sure if this is really the right approach. What you're trying
> > > > to do is implement two different low-power states, basically D3hot and
> > > > D3cold. Currently the runtime PM core doesn't support such things; all
> > > > it knows about is low power and full power.
> > >
> > > I'd rather say all it knows about is "suspended" and "active", which mean
> > > "the device is not processing I/O" and "the device may be processing I/O",
> > > respectively. A "suspended" device may or may not be in a low-power state,
> > > but the runtime PM core doesn't care about that.
> >
> > Yes, okay. We can say that this patch tries to implement two different
> > "suspended" states, basically "low power" and "power off" (or D3hot and
> > D3cold).
> >
> Right!
>
> > > > Before doing an ad-hoc implementation, it would be best to step back
> > > > and think about other subsystems. Other sorts of devices may well have
> > > > multiple low-power states. What's the best way for this to be
> > > > supported by the PM core?
> > >
> > > Well, I honestly don't think there's any way they all can be covered at the
> > > same time and that's why we chose to support only "suspended" and "active"
> > > as defined above. The handling of multiple low-power states must be
> > > implemented outside of the runtime PM core (like in the PCI core, for example).
> >
> > That's the point. If this is to be implemented outside of the runtime
> > PM core, should the patch be allowed to add new fields to struct
> > dev_pm_info (which has to be shared among all subsystems)?
> >
> Surely it shouldn't in this case.
>
> > Or to put it another way, if we do add new fields to struct dev_pm_info
> > (like can_power_off) in order to help support multiple "suspended"
> > states, shouldn't these new fields be such that they can be used by
> > many different subsystems rather than being special for the
> > full-power/no-power situation?
> >
> My opinion is that the concept of "no-power state" is unique for all
> devices/buses/platforms.
No, it is not, basically because of power domains. If they are used,
then individual device power states are not well defined at all.
> If any of them support this, they can use the routines without any
> confusion.
No, they can't.
Thanks,
Rafael
On Tuesday, February 14, 2012, Zhang Rui wrote:
> On 一, 2012-02-13 at 20:38 +0100, Rafael J. Wysocki wrote:
> > On Monday, February 13, 2012, Alan Stern wrote:
> > > On Mon, 13 Feb 2012, Lin Ming wrote:
> > >
> > > > From: Zhang Rui <[email protected]>
> > > >
> > > > Introduce flag can_power_off in device structure to support runtime
> > > > power off/on.
> > > >
> > > > Note that, for a specific device driver,
> > > > "support runtime power off/on" means that the driver .runtime_suspend
> > > > callback needs to
> > > > 1) save all the context so that it can restore the device back to the previous
> > > > working state after powered on.
> > > > 2) set can_power_off flag to tell the driver model that it's ready for power off.
> > > >
> > > > The following example shows how this works.
> > > >
> > > > device A
> > > > |---------|
> > > > v v
> > > > device B device C
> > > >
> > > > A is the parent of device B and device C, and device A/B/C shares the
> > > > same power logic
> > > > (Only device A knows how to turn on/off the power).
> > > >
> > > > In order to power off A, B, C at runtime,
> > > > 1) device B and device C should support runtime power off
> > > > (runtime suspended with can_power_off flag set)
> > > > 2) pm idle request for device A is fired by runtime PM core.
> > > > 3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
> > > > 4) if succeed, it means all its children have been ready for power off
> > > > and it can turn off the power at any time.
> > > > 5) if failed, it means at least one of its children does not support runtime
> > > > power off, thus the power can not be turned off.
> > >
> > > I'm not sure if this is really the right approach. What you're trying
> > > to do is implement two different low-power states, basically D3hot and
> > > D3cold. Currently the runtime PM core doesn't support such things; all
> > > it knows about is low power and full power.
> >
> > I'd rather say all it knows about is "suspended" and "active", which mean
> > "the device is not processing I/O" and "the device may be processing I/O",
> > respectively. A "suspended" device may or may not be in a low-power state,
> > but the runtime PM core doesn't care about that.
> >
> yes, I know that.
>
> > > Before doing an ad-hoc implementation, it would be best to step back
> > > and think about other subsystems. Other sorts of devices may well have
> > > multiple low-power states. What's the best way for this to be
> > > supported by the PM core?
> >
> > Well, I honestly don't think there's any way they all can be covered at the
> > same time and that's why we chose to support only "suspended" and "active"
> > as defined above.
>
> > The handling of multiple low-power states must be
> > implemented outside of the runtime PM core (like in the PCI core, for example).
>
> Surely I'd prefer to implement it in the bus code, :), but the problem
> is that several buses maybe involved at the same time.
> Let's take ZPODD for example,
> ZPODD is attached to a SATA port. Only SATA port knows that it can be
> runtime powered off, because its ACPI node has _PR3._OFF.
> But when ATA layer code tries to put SATA port to D3_COLD at runtime,it
> must make sure all the devices/drivers in the same power domain are
> ready for power off, and in this case, we need to get this info from
> SCSI layer.
Then you need to get it from there. I know that this is a difficult problem,
have been working on a similar one for several months now. :-)
Thanks,
Rafael
Hi,
Good work, I'm also working on ZPODD for AMD platforms, some comments below.
On Mon, Feb 13, 2012 at 5:11 PM, Lin Ming <[email protected]> wrote:
> ZPODD(Zero Power Optical Disk Drive) is a new feature in
> SATA 3.1 specification. It provides a way to power off unused CDROM.
I don't see anywhere in the sata 3.1 spec mentioned how to power off the
cdrom, the only relevant content is the newly defined device attention pin,
which is used to notify the host that this powered off device needs attention.
Or do I miss something?
>
> CDROM is powered off by executing ACPI power resource's _OFF method.
>
AMD has a different implementation to power off the CDROM, I'll need to
prepare another patch based on yours.
> When CDROM is powered off(D3Cold state), inserting disk will trigger a
> wakeup event(GPE). GPE AML handler notifies the associated device. Then
> CDROM is resumed in the notify handler.
>
> Signed-off-by: Lin Ming <[email protected]>
> ---
> ?drivers/ata/libata-acpi.c | ? 64 +++++++++++++++++++++++++++++++++++++-------
> ?drivers/scsi/sr.c ? ? ? ? | ? 39 +++++++++++++++++++++++++++
> ?drivers/scsi/sr.h ? ? ? ? | ? ?3 ++
> ?3 files changed, 95 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
> index 5fc97d2..bf4eace 100644
> --- a/drivers/scsi/sr.c
> +++ b/drivers/scsi/sr.c
> @@ -716,6 +752,9 @@ static int sr_probe(struct device *dev)
> ? ? ? ?disk->flags |= GENHD_FL_REMOVABLE;
> ? ? ? ?add_disk(disk);
>
> + ? ? ? if (device_run_wake(dev))
> + ? ? ? ? ? ? ? cd->zpodd = 1;
> +
For a cd to support zero power, it has to support device attention pin.
If it does not support that, once it is powered off, it can't be power up
back. So I don't think device_run_wake is enough to set the zpodd flag,
unless your firmware has done the check and created the acpi table
according to the result. Is it the case?
Thanks,
Aaron
On Wed, 2012-02-15 at 14:06 +0800, Aaron Lu wrote:
> Hi,
>
> Good work, I'm also working on ZPODD for AMD platforms, some comments below.
Great :)
>
> On Mon, Feb 13, 2012 at 5:11 PM, Lin Ming <[email protected]> wrote:
> > ZPODD(Zero Power Optical Disk Drive) is a new feature in
> > SATA 3.1 specification. It provides a way to power off unused CDROM.
>
> I don't see anywhere in the sata 3.1 spec mentioned how to power off the
> cdrom, the only relevant content is the newly defined device attention pin,
> which is used to notify the host that this powered off device needs attention.
> Or do I miss something?
You're right.
>
> >
> > CDROM is powered off by executing ACPI power resource's _OFF method.
> >
>
> AMD has a different implementation to power off the CDROM, I'll need to
> prepare another patch based on yours.
Could you share AMD's implementation?
>
> > When CDROM is powered off(D3Cold state), inserting disk will trigger a
> > wakeup event(GPE). GPE AML handler notifies the associated device. Then
> > CDROM is resumed in the notify handler.
> >
> > Signed-off-by: Lin Ming <[email protected]>
> > ---
> > drivers/ata/libata-acpi.c | 64 +++++++++++++++++++++++++++++++++++++-------
> > drivers/scsi/sr.c | 39 +++++++++++++++++++++++++++
> > drivers/scsi/sr.h | 3 ++
> > 3 files changed, 95 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
> > index 5fc97d2..bf4eace 100644
> > --- a/drivers/scsi/sr.c
> > +++ b/drivers/scsi/sr.c
> > @@ -716,6 +752,9 @@ static int sr_probe(struct device *dev)
> > disk->flags |= GENHD_FL_REMOVABLE;
> > add_disk(disk);
> >
> > + if (device_run_wake(dev))
> > + cd->zpodd = 1;
> > +
>
> For a cd to support zero power, it has to support device attention pin.
> If it does not support that, once it is powered off, it can't be power up
> back. So I don't think device_run_wake is enough to set the zpodd flag,
> unless your firmware has done the check and created the acpi table
> according to the result. Is it the case?
I should add device attention pin check.
Thanks for the comments.
Lin Ming
>
> Thanks,
> Aaron
On Wed, Feb 15, 2012 at 02:46:05PM +0800, Lin Ming wrote:
> On Wed, 2012-02-15 at 14:06 +0800, Aaron Lu wrote:
> > Hi,
> >
> > Good work, I'm also working on ZPODD for AMD platforms, some comments below.
>
> Great :)
>
> >
> > On Mon, Feb 13, 2012 at 5:11 PM, Lin Ming <[email protected]> wrote:
> > > ZPODD(Zero Power Optical Disk Drive) is a new feature in
> > > SATA 3.1 specification. It provides a way to power off unused CDROM.
> >
> > I don't see anywhere in the sata 3.1 spec mentioned how to power off the
> > cdrom, the only relevant content is the newly defined device attention pin,
> > which is used to notify the host that this powered off device needs attention.
> > Or do I miss something?
>
> You're right.
>
> >
> > >
> > > CDROM is powered off by executing ACPI power resource's _OFF method.
> > >
> >
> > AMD has a different implementation to power off the CDROM, I'll need to
> > prepare another patch based on yours.
>
> Could you share AMD's implementation?
>
Sure.
We have defined a special device, named ODDZ, under the SATA scope in the
DSDT table. So basically, the existence of device ODDZ means the support
of ZPODD at the platform level. And there are the _PS0 and _PS3 control
methods under device ODDZ to power on/off the attached cdrom.
The code is not ready yet, I'm still working on it.
Thanks,
Aaron
> >
> > > When CDROM is powered off(D3Cold state), inserting disk will trigger a
> > > wakeup event(GPE). GPE AML handler notifies the associated device. Then
> > > CDROM is resumed in the notify handler.
> > >
> > > Signed-off-by: Lin Ming <[email protected]>
> > > ---
> > > drivers/ata/libata-acpi.c | 64 +++++++++++++++++++++++++++++++++++++-------
> > > drivers/scsi/sr.c | 39 +++++++++++++++++++++++++++
> > > drivers/scsi/sr.h | 3 ++
> > > 3 files changed, 95 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
> > > index 5fc97d2..bf4eace 100644
> > > --- a/drivers/scsi/sr.c
> > > +++ b/drivers/scsi/sr.c
> > > @@ -716,6 +752,9 @@ static int sr_probe(struct device *dev)
> > > disk->flags |= GENHD_FL_REMOVABLE;
> > > add_disk(disk);
> > >
> > > + if (device_run_wake(dev))
> > > + cd->zpodd = 1;
> > > +
> >
> > For a cd to support zero power, it has to support device attention pin.
> > If it does not support that, once it is powered off, it can't be power up
> > back. So I don't think device_run_wake is enough to set the zpodd flag,
> > unless your firmware has done the check and created the acpi table
> > according to the result. Is it the case?
>
> I should add device attention pin check.
>
> Thanks for the comments.
> Lin Ming
>
> >
> > Thanks,
> > Aaron
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
On 二, 2012-02-14 at 23:29 +0100, Rafael J. Wysocki wrote:
> On Tuesday, February 14, 2012, Zhang Rui wrote:
> > Hi, Rafael,
> >
> > On 一, 2012-02-13 at 21:25 +0100, Rafael J. Wysocki wrote:
> > > On Monday, February 13, 2012, Lin Ming wrote:
> > > > From: Zhang Rui <[email protected]>
> > > >
> > > > If a device has _PR3._ON, it means the device supports D3_HOT.
> > > > If a device has _PR3._OFF, it means the device supports D3_COLD.
> > > > Add the ability to validate and enter D3_COLD state in ACPI.
> > > >
> > > > Signed-off-by: Zhang Rui <[email protected]>
> > >
> > > This is supposed to be ACPI 5.0 support, right?
> > >
> > No, D3_HOT is introduced in ACPI spec 4.0.
> > According to the spec, _PR3 is used for devices that support both
> > D3(D3_COLD) and D3HOT.
>
> Yes, it does.
>
> > The confusion here is that Linux D3 equals ACPICA D3HOT and Linux
> > D3_COLD equals ACPICA D3.
> > For example, when enter Linux ACPI D3, the reference count of ACPI Power
> > Resources in _PR3 is increased by one.
>
> That's correct.
>
> > > So can anyone please tell me what part of the ACPI 5.0 spec is the
> > > basis of this patch, because I can't see that immediately?
> > >
> > > The only places where D3Cold is _mentioned_ are Section 7.2.12 (_PRE, which
> > > appears to be new in 5.0), Section 7.2.20 (_S0W), Section 7.2.21 (_S1W),
> > > Section 7.2.22 (_S2W), Section 7.2.23 (_S3W) and Section 7.2.24 (_S4W).
> > > None of them mentions those _PR3._ON and _PR3._OFF things above.
> > >
> > > Moreover, my understanding of the spec is that D3Cold means all of the
> > > power resources returned by _PR3 are "off" (whereas some of them will be
> > > "on" in D3hot).
> > >
> > Agreed.
> >
> > > > ---
> > > > drivers/acpi/power.c | 4 ++--
> > > > drivers/acpi/scan.c | 10 +++++++++-
> > > > 2 files changed, 11 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> > > > index 9ac2a9f..0d681fb 100644
> > > > --- a/drivers/acpi/power.c
> > > > +++ b/drivers/acpi/power.c
> > > > @@ -500,14 +500,14 @@ int acpi_power_transition(struct acpi_device *device, int state)
> > > > {
> > > > int result;
> > > >
> > > > - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3))
> > > > + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
> > > > return -EINVAL;
> > > >
> > > > if (device->power.state == state)
> > > > return 0;
> > > >
> > > > if ((device->power.state < ACPI_STATE_D0)
> > > > - || (device->power.state > ACPI_STATE_D3))
> > > > + || (device->power.state > ACPI_STATE_D3_COLD))
> > > > return -ENODEV;
> > > >
> > > > /* TBD: Resources must be ordered. */
> > > > diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
> > > > index 8ab80ba..a9d4391 100644
> > > > --- a/drivers/acpi/scan.c
> > > > +++ b/drivers/acpi/scan.c
> > > > @@ -881,8 +881,16 @@ static int acpi_bus_get_power_flags(struct acpi_device *device)
> > > >
> > > > device->power.flags.power_resources = 1;
> > > > ps->flags.valid = 1;
> > > > - for (j = 0; j < ps->resources.count; j++)
> > > > + for (j = 0; j < ps->resources.count; j++) {
> > > > acpi_bus_add_power_resource(ps->resources.handles[j]);
> > > > + /* Check for D3_COLD support. _PR3._OFF equals D3_COLD ? */
> > > > + if (i == ACPI_STATE_D3) {
> > > > + if (j == 0)
> > > > + device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1;
> > > > + status = acpi_get_handle(ps->resources.handles[j], "_OFF", &handle);
> > > > + device->power.states[ACPI_STATE_D3_COLD].flags.valid &= ACPI_SUCCESS(status);
> > > > + }
> > > > + }
> > >
> > > Sorry, but this doesn't make sense to me. Power resources always have
> > > the _OFF method, right?
> > >
> > I'm not sure.
>
> That would be explicitly against the spec that says that power resources
> are *required* to have _ON, _OFF and _STA.
>
> > I thought I had seen ACPI Power Resources without _OFF
> > control method somewhere in bugzilla, but I can not find it out now.
>
> That, clearly, is a firmware bug.
>
Okay, agreed.
so how about this? _PR3 equals D3_HOT support.
> > Hmm, how about set D3_COLD support if _PR3 exists, but leave a warning
> > message if _OFF doesn't exist, for now?
>
> I don't think we need to set D3_COLD support at all. In fact, it is always
> supported (as I said, if all power resources used by a device are off, the
> device is in D3_COLD pretty much by definition).
>
Yeah, but it seems that Linux uses ACPI_D3 for both ACPICA D3_HOT and D3
(off). I'm generating a patch to remove ACPI_D3_COLD and introduce
D3_HOT support in Linux kernel.
thanks,
rui
On 二, 2012-02-14 at 23:36 +0100, Rafael J. Wysocki wrote:
> > > > + */
> > > > +struct acpi_powered_device {
> > > > + struct list_head node;
> > > > + struct device *dev;
> > > > +};
> > > > +
> > > > /* --------------------------------------------------------------------------
> > > > Power Resource Management
> > > > -------------------------------------------------------------------------- */
> > > > @@ -455,6 +470,118 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
> > > > Device Power Management
> > > > -------------------------------------------------------------------------- */
> > > >
> > > > +int acpi_power_resource_register_device(struct device *dev, acpi_handle handle)
> > > > +{
> > > > + struct acpi_power_resource *resource;
> > > > + struct acpi_device *acpi_dev, *res_dev;
> > > > + acpi_handle res_handle = NULL;
> > > > + struct acpi_powered_device *apd;
> > > > + int i;
> > > > + int ret;
> > > > +
> > > > + if (!handle || !dev)
> > > > + return -EINVAL;
> > > > +
> > > > + ret = acpi_bus_get_device(handle, &acpi_dev);
> > > > + if (ret)
> > > > + goto no_power_resource;
> > > > +
> > > > + if (!acpi_dev->power.flags.power_resources)
> > > > + goto no_power_resource;
> > > > +
> > > > + for (i = 0; i <= ACPI_STATE_D3; i++) {
> > > > + struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
> > > > +
> > > > + if (!ps->flags.valid)
> > > > + continue;
> > > > +
> > > > + if (ps->resources.count > 1)
> > > > + return 0;
> > > > +
> > > > + if (!res_handle)
> > > > + res_handle = ps->resources.handles[0];
> > > > + else if (res_handle != ps->resources.handles[0])
> > > > + return 0;
> > > > + }
> > >
> > > I'm not sure what the above checks are needed for. It seems that this function
> > > will only be called from acpi_bus_add_power_resource() (which needs to be
> > > modified for this purpose, BTW), so it doesn't need to check all those things
> > > (those checks have been made already).
> > >
> > No. These two APIs are introduced to support the runtime D3_COLD remote
> > wakeup. And they should be invoked by drivers, either in driver code or
> > via bus layer code.
> >
> > Say, ATA port, who has _PR3 in its ACPI node, knows that it can enter
> > D3_COLD at run time, and it supports remote wakeup in D3_COLD because it
> > has _S0W (return value 4).
> > When remote wakeup event is triggered, there is an ACPI event sent to
> > the ATA controller/port, which sets the ATA controller/port back to D0
> > state.
> > At this time, what we actually need is to resume the ZPODD, rather than
> > the ATA controller/port. To follow the runtime PM model (runtime resume
> > starts from leaf devices),
>
> Well, this isn't the case. Parents are always resumed first.
>
Well, I mean runtime resume request issued by leaf devices first.
> > ATA code can use these two APIs to tell ACPI
> > to runtime resume ZPODD device directly, because ZPODD is powered by
> > this Power Resource as well.
>
> I'm not exactly sure what you're trying to achieve and what you mean by
> "resume a device directly"? Do you want to run the device's resume
> callback at the time when another device is being resumed?
>
I mean, wakeup event is sent to ATA port, but our goal is to resume
ZPODD after receiving this wakeup event.
Ideally, it is ACPI that resumes ATA port. And then, the ATA port
runtime resumes ZPODD. But this does not look good to runtime resume a
child device in the parent's .runtime_resume callback.
So I introduced these two APIs so that an runtime_resume request can be
sent to ZPODD directly and the runtime PM core can resume all the
parents of ZPODD automatically.
thanks,
rui
On 二, 2012-02-14 at 23:39 +0100, Rafael J. Wysocki wrote:
> On Tuesday, February 14, 2012, Zhang Rui wrote:
> > On 一, 2012-02-13 at 20:38 +0100, Rafael J. Wysocki wrote:
> > > On Monday, February 13, 2012, Alan Stern wrote:
> > > > On Mon, 13 Feb 2012, Lin Ming wrote:
> > > >
> > > > > From: Zhang Rui <[email protected]>
> > > > >
> > > > > Introduce flag can_power_off in device structure to support runtime
> > > > > power off/on.
> > > > >
> > > > > Note that, for a specific device driver,
> > > > > "support runtime power off/on" means that the driver .runtime_suspend
> > > > > callback needs to
> > > > > 1) save all the context so that it can restore the device back to the previous
> > > > > working state after powered on.
> > > > > 2) set can_power_off flag to tell the driver model that it's ready for power off.
> > > > >
> > > > > The following example shows how this works.
> > > > >
> > > > > device A
> > > > > |---------|
> > > > > v v
> > > > > device B device C
> > > > >
> > > > > A is the parent of device B and device C, and device A/B/C shares the
> > > > > same power logic
> > > > > (Only device A knows how to turn on/off the power).
> > > > >
> > > > > In order to power off A, B, C at runtime,
> > > > > 1) device B and device C should support runtime power off
> > > > > (runtime suspended with can_power_off flag set)
> > > > > 2) pm idle request for device A is fired by runtime PM core.
> > > > > 3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
> > > > > 4) if succeed, it means all its children have been ready for power off
> > > > > and it can turn off the power at any time.
> > > > > 5) if failed, it means at least one of its children does not support runtime
> > > > > power off, thus the power can not be turned off.
> > > >
> > > > I'm not sure if this is really the right approach. What you're trying
> > > > to do is implement two different low-power states, basically D3hot and
> > > > D3cold. Currently the runtime PM core doesn't support such things; all
> > > > it knows about is low power and full power.
> > >
> > > I'd rather say all it knows about is "suspended" and "active", which mean
> > > "the device is not processing I/O" and "the device may be processing I/O",
> > > respectively. A "suspended" device may or may not be in a low-power state,
> > > but the runtime PM core doesn't care about that.
> > >
> > yes, I know that.
> >
> > > > Before doing an ad-hoc implementation, it would be best to step back
> > > > and think about other subsystems. Other sorts of devices may well have
> > > > multiple low-power states. What's the best way for this to be
> > > > supported by the PM core?
> > >
> > > Well, I honestly don't think there's any way they all can be covered at the
> > > same time and that's why we chose to support only "suspended" and "active"
> > > as defined above.
> >
> > > The handling of multiple low-power states must be
> > > implemented outside of the runtime PM core (like in the PCI core, for example).
> >
> > Surely I'd prefer to implement it in the bus code, :), but the problem
> > is that several buses maybe involved at the same time.
> > Let's take ZPODD for example,
> > ZPODD is attached to a SATA port. Only SATA port knows that it can be
> > runtime powered off, because its ACPI node has _PR3._OFF.
> > But when ATA layer code tries to put SATA port to D3_COLD at runtime,it
> > must make sure all the devices/drivers in the same power domain are
> > ready for power off, and in this case, we need to get this info from
> > SCSI layer.
>
> Then you need to get it from there. I know that this is a difficult problem,
Yeah, I have thought about this for quite a while before, there ARE
several ways to do this, but these need a lot of changes in bus code, at
least for the buses that support device runtime D3 (off) by ACPI.
Lets also take SATA port and ZPODD for example,
proposal one,
1) introduce scsi_can_power_off and ata_can_power_off.
2) sr driver set scsi_can_power_off bit and scsi layer is aware of this,
thus the scsi host can set this bit as well.
3) in the .runtime_suspend callback of ata port, it knows that its scsi
host interface can be powered off, thus it invokes ata_can_power_off to
tell the ata layer.
proposal two,
introduce a platform callback for each bus.
And it is invoked immediately after the scsi_driver->runtime_suspend
being invoked in scsi_bus->runtime_suspend.
The platform callback checks the scsi lower power state of the
scsi_device and choose a compatible ACPI D-state for the device.
The decision of whether to use ACPI D3 (off) or not is made in the
platform callback.
what do you think?
> have been working on a similar one for several months now. :-)
That's why generic power domain is introduced?
Can you tell me what's your idea please?
It would be GREAT if you can share your experience on this.
thanks,
rui
On Thu, 16 Feb 2012, Zhang Rui wrote:
> > I'm not exactly sure what you're trying to achieve and what you mean by
> > "resume a device directly"? Do you want to run the device's resume
> > callback at the time when another device is being resumed?
> >
> I mean, wakeup event is sent to ATA port, but our goal is to resume
> ZPODD after receiving this wakeup event.
> Ideally, it is ACPI that resumes ATA port. And then, the ATA port
> runtime resumes ZPODD. But this does not look good to runtime resume a
> child device in the parent's .runtime_resume callback.
> So I introduced these two APIs so that an runtime_resume request can be
> sent to ZPODD directly and the runtime PM core can resume all the
> parents of ZPODD automatically.
It's not clear what you're trying to achieve. Do you basically want
the ZPODD always to be suspended and resumed along with the ATA port,
or should it be possible to suspend the ZPODD while the port remains
running?
Alan Stern
On Thu, 2012-02-16 at 10:13 -0500, Alan Stern wrote:
> On Thu, 16 Feb 2012, Zhang Rui wrote:
>
> > > I'm not exactly sure what you're trying to achieve and what you mean by
> > > "resume a device directly"? Do you want to run the device's resume
> > > callback at the time when another device is being resumed?
> > >
> > I mean, wakeup event is sent to ATA port, but our goal is to resume
> > ZPODD after receiving this wakeup event.
> > Ideally, it is ACPI that resumes ATA port. And then, the ATA port
> > runtime resumes ZPODD. But this does not look good to runtime resume a
> > child device in the parent's .runtime_resume callback.
> > So I introduced these two APIs so that an runtime_resume request can be
> > sent to ZPODD directly and the runtime PM core can resume all the
> > parents of ZPODD automatically.
>
> It's not clear what you're trying to achieve. Do you basically want
> the ZPODD always to be suspended and resumed along with the ATA port,
> or should it be possible to suspend the ZPODD while the port remains
> running?
We want to ZPODD always to be suspended and resumed along with the ATA
port.
Below is part of the GPE handler for ZPODD device attention event.
Scope (\_GPE)
{
Method (_L13, 0, NotSerialized)
{
ADBG ("ZPODD DA Event")
....
Notify (\_SB.PCI0.SAT0.PRT2, 0x02)
....
}
}
It maybe a bit confused, but actually, \_SB.PCI0.SAT0.PRT2 is bind to
the attached device, not the ata port itself.
See below commit in linux-next tree.
75d22c(libata: Bind the Linux device tree to the ACPI device tree)
And below notify handler(PATCH 6) will resume the attached device(CDROM
in ZPODD case).
+static void ata_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
+{
+ struct ata_device *ata_dev = context;
+
+ if (event == ACPI_NOTIFY_DEVICE_WAKE && ata_dev)
+ scsi_autopm_get_device(ata_dev->sdev);
+}
+
But the code to power on/power off the device is in ata_acpi_set_state,
which is called when ata port is resumed/suspended.
ata_eh_handle_port_resume/suspend
ata_acpi_set_state
ata_for_each_dev {
acpi_bus_set_power(<the acpi handle of the device>, acpi_state)
}
Could you take a look at PATCH 6?
It's more clear over there.
Thanks,
Lin Ming
>
> Alan Stern
>
Hi, alan,
> -----Original Message-----
> From: Alan Stern [mailto:[email protected]]
> Sent: Thursday, February 16, 2012 11:14 PM
> To: Zhang, Rui
> Cc: Rafael J. Wysocki; Lin, Ming M; Jeff Garzik; Tejun Heo; Len Brown;
> [email protected]; [email protected]; linux-
> [email protected]; [email protected]
> Subject: Re: [RFC PATCH 2/6] ACPI: Reference devices in ACPI Power
> Resource
> Importance: High
>
> On Thu, 16 Feb 2012, Zhang Rui wrote:
>
> > > I'm not exactly sure what you're trying to achieve and what you
> mean by
> > > "resume a device directly"? Do you want to run the device's resume
> > > callback at the time when another device is being resumed?
> > >
> > I mean, wakeup event is sent to ATA port, but our goal is to resume
> > ZPODD after receiving this wakeup event.
> > Ideally, it is ACPI that resumes ATA port. And then, the ATA port
> > runtime resumes ZPODD. But this does not look good to runtime resume
> a
> > child device in the parent's .runtime_resume callback.
> > So I introduced these two APIs so that an runtime_resume request can
> be
> > sent to ZPODD directly and the runtime PM core can resume all the
> > parents of ZPODD automatically.
>
> It's not clear what you're trying to achieve.
> Do you basically want
> the ZPODD always to be suspended and resumed along with the ATA port,
No. ZPODD suspends itself, which put ZPODD to a SCSI low power state (NOT power off/D3_COLD).
And then it is the "Runtime PM core" that suspends ATA port after ZPODD being suspended.
And the .runtime_suspend callback for ATA port actually turns off the ZPODD power.
During resume, ATA port is resumed first because of the ACPI wakeup event.
But in fact, this wakeup event should be read as "ZPODD remote wakeup signal", thus runtime resume request is sent to ZPODD, done by Patch 3/6.
> or should it be possible to suspend the ZPODD while the port remains
> running?
>
Sure, but the power is still on at this time.
Thanks,
rui
On Fri, 17 Feb 2012, Zhang, Rui wrote:
> > Do you basically want
> > the ZPODD always to be suspended and resumed along with the ATA port,
>
> No. ZPODD suspends itself, which put ZPODD to a SCSI low power state (NOT power off/D3_COLD).
> And then it is the "Runtime PM core" that suspends ATA port after ZPODD being suspended.
> And the .runtime_suspend callback for ATA port actually turns off the ZPODD power.
>
> During resume, ATA port is resumed first because of the ACPI wakeup event.
> But in fact, this wakeup event should be read as "ZPODD remote wakeup signal", thus runtime resume request is sent to ZPODD, done by Patch 3/6.
>
> > or should it be possible to suspend the ZPODD while the port remains
> > running?
> >
> Sure, but the power is still on at this time.
Then maybe you can use pm_runtime_no_callbacks() for the ZPODD device.
It's explained in Documentation/power/runtime_pm.txt, and I use it for
USB interfaces.
The idea is that the ZPODD will never receive any runtime PM callbacks
from the PM core. Instead the ATA port callback routines will be
responsible for power management of the ZPODD device.
Alan Stern
On Thursday, February 16, 2012, Zhang Rui wrote:
> On 二, 2012-02-14 at 23:29 +0100, Rafael J. Wysocki wrote:
> > On Tuesday, February 14, 2012, Zhang Rui wrote:
> > > Hi, Rafael,
> > >
> > > On 一, 2012-02-13 at 21:25 +0100, Rafael J. Wysocki wrote:
> > > > On Monday, February 13, 2012, Lin Ming wrote:
> > > > > From: Zhang Rui <[email protected]>
> > > > >
> > > > > If a device has _PR3._ON, it means the device supports D3_HOT.
> > > > > If a device has _PR3._OFF, it means the device supports D3_COLD.
> > > > > Add the ability to validate and enter D3_COLD state in ACPI.
> > > > >
> > > > > Signed-off-by: Zhang Rui <[email protected]>
> > > >
> > > > This is supposed to be ACPI 5.0 support, right?
> > > >
> > > No, D3_HOT is introduced in ACPI spec 4.0.
> > > According to the spec, _PR3 is used for devices that support both
> > > D3(D3_COLD) and D3HOT.
> >
> > Yes, it does.
> >
> > > The confusion here is that Linux D3 equals ACPICA D3HOT and Linux
> > > D3_COLD equals ACPICA D3.
> > > For example, when enter Linux ACPI D3, the reference count of ACPI Power
> > > Resources in _PR3 is increased by one.
> >
> > That's correct.
> >
> > > > So can anyone please tell me what part of the ACPI 5.0 spec is the
> > > > basis of this patch, because I can't see that immediately?
> > > >
> > > > The only places where D3Cold is _mentioned_ are Section 7.2.12 (_PRE, which
> > > > appears to be new in 5.0), Section 7.2.20 (_S0W), Section 7.2.21 (_S1W),
> > > > Section 7.2.22 (_S2W), Section 7.2.23 (_S3W) and Section 7.2.24 (_S4W).
> > > > None of them mentions those _PR3._ON and _PR3._OFF things above.
> > > >
> > > > Moreover, my understanding of the spec is that D3Cold means all of the
> > > > power resources returned by _PR3 are "off" (whereas some of them will be
> > > > "on" in D3hot).
> > > >
> > > Agreed.
> > >
> > > > > ---
> > > > > drivers/acpi/power.c | 4 ++--
> > > > > drivers/acpi/scan.c | 10 +++++++++-
> > > > > 2 files changed, 11 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
> > > > > index 9ac2a9f..0d681fb 100644
> > > > > --- a/drivers/acpi/power.c
> > > > > +++ b/drivers/acpi/power.c
> > > > > @@ -500,14 +500,14 @@ int acpi_power_transition(struct acpi_device *device, int state)
> > > > > {
> > > > > int result;
> > > > >
> > > > > - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3))
> > > > > + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD))
> > > > > return -EINVAL;
> > > > >
> > > > > if (device->power.state == state)
> > > > > return 0;
> > > > >
> > > > > if ((device->power.state < ACPI_STATE_D0)
> > > > > - || (device->power.state > ACPI_STATE_D3))
> > > > > + || (device->power.state > ACPI_STATE_D3_COLD))
> > > > > return -ENODEV;
> > > > >
> > > > > /* TBD: Resources must be ordered. */
> > > > > diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
> > > > > index 8ab80ba..a9d4391 100644
> > > > > --- a/drivers/acpi/scan.c
> > > > > +++ b/drivers/acpi/scan.c
> > > > > @@ -881,8 +881,16 @@ static int acpi_bus_get_power_flags(struct acpi_device *device)
> > > > >
> > > > > device->power.flags.power_resources = 1;
> > > > > ps->flags.valid = 1;
> > > > > - for (j = 0; j < ps->resources.count; j++)
> > > > > + for (j = 0; j < ps->resources.count; j++) {
> > > > > acpi_bus_add_power_resource(ps->resources.handles[j]);
> > > > > + /* Check for D3_COLD support. _PR3._OFF equals D3_COLD ? */
> > > > > + if (i == ACPI_STATE_D3) {
> > > > > + if (j == 0)
> > > > > + device->power.states[ACPI_STATE_D3_COLD].flags.valid = 1;
> > > > > + status = acpi_get_handle(ps->resources.handles[j], "_OFF", &handle);
> > > > > + device->power.states[ACPI_STATE_D3_COLD].flags.valid &= ACPI_SUCCESS(status);
> > > > > + }
> > > > > + }
> > > >
> > > > Sorry, but this doesn't make sense to me. Power resources always have
> > > > the _OFF method, right?
> > > >
> > > I'm not sure.
> >
> > That would be explicitly against the spec that says that power resources
> > are *required* to have _ON, _OFF and _STA.
> >
> > > I thought I had seen ACPI Power Resources without _OFF
> > > control method somewhere in bugzilla, but I can not find it out now.
> >
> > That, clearly, is a firmware bug.
> >
> Okay, agreed.
> so how about this? _PR3 equals D3_HOT support.
Yes, I can agree with that. :-)
> > > Hmm, how about set D3_COLD support if _PR3 exists, but leave a warning
> > > message if _OFF doesn't exist, for now?
> >
> > I don't think we need to set D3_COLD support at all. In fact, it is always
> > supported (as I said, if all power resources used by a device are off, the
> > device is in D3_COLD pretty much by definition).
> >
> Yeah, but it seems that Linux uses ACPI_D3 for both ACPICA D3_HOT and D3
> (off). I'm generating a patch to remove ACPI_D3_COLD and introduce
> D3_HOT support in Linux kernel.
That's a good idea in my opinion.
Thanks,
Rafael
On Thursday, February 16, 2012, Zhang Rui wrote:
> On 二, 2012-02-14 at 23:36 +0100, Rafael J. Wysocki wrote:
> > > > > + */
> > > > > +struct acpi_powered_device {
> > > > > + struct list_head node;
> > > > > + struct device *dev;
> > > > > +};
> > > > > +
> > > > > /* --------------------------------------------------------------------------
> > > > > Power Resource Management
> > > > > -------------------------------------------------------------------------- */
> > > > > @@ -455,6 +470,118 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev)
> > > > > Device Power Management
> > > > > -------------------------------------------------------------------------- */
> > > > >
> > > > > +int acpi_power_resource_register_device(struct device *dev, acpi_handle handle)
> > > > > +{
> > > > > + struct acpi_power_resource *resource;
> > > > > + struct acpi_device *acpi_dev, *res_dev;
> > > > > + acpi_handle res_handle = NULL;
> > > > > + struct acpi_powered_device *apd;
> > > > > + int i;
> > > > > + int ret;
> > > > > +
> > > > > + if (!handle || !dev)
> > > > > + return -EINVAL;
> > > > > +
> > > > > + ret = acpi_bus_get_device(handle, &acpi_dev);
> > > > > + if (ret)
> > > > > + goto no_power_resource;
> > > > > +
> > > > > + if (!acpi_dev->power.flags.power_resources)
> > > > > + goto no_power_resource;
> > > > > +
> > > > > + for (i = 0; i <= ACPI_STATE_D3; i++) {
> > > > > + struct acpi_device_power_state *ps = &acpi_dev->power.states[i];
> > > > > +
> > > > > + if (!ps->flags.valid)
> > > > > + continue;
> > > > > +
> > > > > + if (ps->resources.count > 1)
> > > > > + return 0;
> > > > > +
> > > > > + if (!res_handle)
> > > > > + res_handle = ps->resources.handles[0];
> > > > > + else if (res_handle != ps->resources.handles[0])
> > > > > + return 0;
> > > > > + }
> > > >
> > > > I'm not sure what the above checks are needed for. It seems that this function
> > > > will only be called from acpi_bus_add_power_resource() (which needs to be
> > > > modified for this purpose, BTW), so it doesn't need to check all those things
> > > > (those checks have been made already).
> > > >
> > > No. These two APIs are introduced to support the runtime D3_COLD remote
> > > wakeup. And they should be invoked by drivers, either in driver code or
> > > via bus layer code.
> > >
> > > Say, ATA port, who has _PR3 in its ACPI node, knows that it can enter
> > > D3_COLD at run time, and it supports remote wakeup in D3_COLD because it
> > > has _S0W (return value 4).
> > > When remote wakeup event is triggered, there is an ACPI event sent to
> > > the ATA controller/port, which sets the ATA controller/port back to D0
> > > state.
> > > At this time, what we actually need is to resume the ZPODD, rather than
> > > the ATA controller/port. To follow the runtime PM model (runtime resume
> > > starts from leaf devices),
> >
> > Well, this isn't the case. Parents are always resumed first.
> >
> Well, I mean runtime resume request issued by leaf devices first.
That still causes runtime PM callbacks for parents to be run before runtime PM
callbacks for the children.
It is impossible to runtime-resume a child device (using the runtime PM
framework's functions at least) if the parent of it is not "active".
> > > ATA code can use these two APIs to tell ACPI
> > > to runtime resume ZPODD device directly, because ZPODD is powered by
> > > this Power Resource as well.
> >
> > I'm not exactly sure what you're trying to achieve and what you mean by
> > "resume a device directly"? Do you want to run the device's resume
> > callback at the time when another device is being resumed?
> >
> I mean, wakeup event is sent to ATA port, but our goal is to resume
> ZPODD after receiving this wakeup event.
> Ideally, it is ACPI that resumes ATA port. And then, the ATA port
> runtime resumes ZPODD. But this does not look good to runtime resume a
> child device in the parent's .runtime_resume callback.
OK, I think I see what the problem is.
> So I introduced these two APIs so that an runtime_resume request can be
> sent to ZPODD directly and the runtime PM core can resume all the
> parents of ZPODD automatically.
It seems that you're confusing things. In the Linux driver model a device
cannot have more than a single parent.
Thanks,
Rafael
On Friday, February 17, 2012, Lin Ming wrote:
> On Thu, 2012-02-16 at 10:13 -0500, Alan Stern wrote:
> > On Thu, 16 Feb 2012, Zhang Rui wrote:
> >
> > > > I'm not exactly sure what you're trying to achieve and what you mean by
> > > > "resume a device directly"? Do you want to run the device's resume
> > > > callback at the time when another device is being resumed?
> > > >
> > > I mean, wakeup event is sent to ATA port, but our goal is to resume
> > > ZPODD after receiving this wakeup event.
> > > Ideally, it is ACPI that resumes ATA port. And then, the ATA port
> > > runtime resumes ZPODD. But this does not look good to runtime resume a
> > > child device in the parent's .runtime_resume callback.
> > > So I introduced these two APIs so that an runtime_resume request can be
> > > sent to ZPODD directly and the runtime PM core can resume all the
> > > parents of ZPODD automatically.
> >
> > It's not clear what you're trying to achieve. Do you basically want
> > the ZPODD always to be suspended and resumed along with the ATA port,
> > or should it be possible to suspend the ZPODD while the port remains
> > running?
>
> We want to ZPODD always to be suspended and resumed along with the ATA
> port.
>
> Below is part of the GPE handler for ZPODD device attention event.
>
> Scope (\_GPE)
> {
> Method (_L13, 0, NotSerialized)
> {
> ADBG ("ZPODD DA Event")
> ....
>
> Notify (\_SB.PCI0.SAT0.PRT2, 0x02)
> ....
> }
> }
>
> It maybe a bit confused, but actually, \_SB.PCI0.SAT0.PRT2 is bind to
> the attached device, not the ata port itself.
>
> See below commit in linux-next tree.
> 75d22c(libata: Bind the Linux device tree to the ACPI device tree)
>
> And below notify handler(PATCH 6) will resume the attached device(CDROM
> in ZPODD case).
>
> +static void ata_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
> +{
> + struct ata_device *ata_dev = context;
> +
> + if (event == ACPI_NOTIFY_DEVICE_WAKE && ata_dev)
> + scsi_autopm_get_device(ata_dev->sdev);
> +}
> +
>
> But the code to power on/power off the device is in ata_acpi_set_state,
> which is called when ata port is resumed/suspended.
>
> ata_eh_handle_port_resume/suspend
> ata_acpi_set_state
> ata_for_each_dev {
> acpi_bus_set_power(<the acpi handle of the device>, acpi_state)
> }
>
> Could you take a look at PATCH 6?
> It's more clear over there.
It seems that you can use pm_runtime_no_callbacks() to work around this
as suggested by Alan.
Thanks,
Rafael
On Thursday, February 16, 2012, Zhang Rui wrote:
> On 二, 2012-02-14 at 23:39 +0100, Rafael J. Wysocki wrote:
> > On Tuesday, February 14, 2012, Zhang Rui wrote:
> > > On 一, 2012-02-13 at 20:38 +0100, Rafael J. Wysocki wrote:
> > > > On Monday, February 13, 2012, Alan Stern wrote:
> > > > > On Mon, 13 Feb 2012, Lin Ming wrote:
> > > > >
> > > > > > From: Zhang Rui <[email protected]>
> > > > > >
> > > > > > Introduce flag can_power_off in device structure to support runtime
> > > > > > power off/on.
> > > > > >
> > > > > > Note that, for a specific device driver,
> > > > > > "support runtime power off/on" means that the driver .runtime_suspend
> > > > > > callback needs to
> > > > > > 1) save all the context so that it can restore the device back to the previous
> > > > > > working state after powered on.
> > > > > > 2) set can_power_off flag to tell the driver model that it's ready for power off.
> > > > > >
> > > > > > The following example shows how this works.
> > > > > >
> > > > > > device A
> > > > > > |---------|
> > > > > > v v
> > > > > > device B device C
> > > > > >
> > > > > > A is the parent of device B and device C, and device A/B/C shares the
> > > > > > same power logic
> > > > > > (Only device A knows how to turn on/off the power).
> > > > > >
> > > > > > In order to power off A, B, C at runtime,
> > > > > > 1) device B and device C should support runtime power off
> > > > > > (runtime suspended with can_power_off flag set)
> > > > > > 2) pm idle request for device A is fired by runtime PM core.
> > > > > > 3) in device A .runtime_suspend callback, it tries to set can_power_off flag.
> > > > > > 4) if succeed, it means all its children have been ready for power off
> > > > > > and it can turn off the power at any time.
> > > > > > 5) if failed, it means at least one of its children does not support runtime
> > > > > > power off, thus the power can not be turned off.
> > > > >
> > > > > I'm not sure if this is really the right approach. What you're trying
> > > > > to do is implement two different low-power states, basically D3hot and
> > > > > D3cold. Currently the runtime PM core doesn't support such things; all
> > > > > it knows about is low power and full power.
> > > >
> > > > I'd rather say all it knows about is "suspended" and "active", which mean
> > > > "the device is not processing I/O" and "the device may be processing I/O",
> > > > respectively. A "suspended" device may or may not be in a low-power state,
> > > > but the runtime PM core doesn't care about that.
> > > >
> > > yes, I know that.
> > >
> > > > > Before doing an ad-hoc implementation, it would be best to step back
> > > > > and think about other subsystems. Other sorts of devices may well have
> > > > > multiple low-power states. What's the best way for this to be
> > > > > supported by the PM core?
> > > >
> > > > Well, I honestly don't think there's any way they all can be covered at the
> > > > same time and that's why we chose to support only "suspended" and "active"
> > > > as defined above.
> > >
> > > > The handling of multiple low-power states must be
> > > > implemented outside of the runtime PM core (like in the PCI core, for example).
> > >
> > > Surely I'd prefer to implement it in the bus code, :), but the problem
> > > is that several buses maybe involved at the same time.
> > > Let's take ZPODD for example,
> > > ZPODD is attached to a SATA port. Only SATA port knows that it can be
> > > runtime powered off, because its ACPI node has _PR3._OFF.
> > > But when ATA layer code tries to put SATA port to D3_COLD at runtime,it
> > > must make sure all the devices/drivers in the same power domain are
> > > ready for power off, and in this case, we need to get this info from
> > > SCSI layer.
> >
> > Then you need to get it from there. I know that this is a difficult problem,
>
> Yeah, I have thought about this for quite a while before, there ARE
> several ways to do this, but these need a lot of changes in bus code, at
> least for the buses that support device runtime D3 (off) by ACPI.
>
> Lets also take SATA port and ZPODD for example,
> proposal one,
> 1) introduce scsi_can_power_off and ata_can_power_off.
> 2) sr driver set scsi_can_power_off bit and scsi layer is aware of this,
> thus the scsi host can set this bit as well.
> 3) in the .runtime_suspend callback of ata port, it knows that its scsi
> host interface can be powered off, thus it invokes ata_can_power_off to
> tell the ata layer.
Hmm. I'm not sure why you want to introduce this special "power off"
condition. In fact, it's nothing special, it only means that the device
in question shouldn't be accessed by software, which pretty much is equivalent
to the "suspended" condition (as defined in the runtime PM docs).
> proposal two,
> introduce a platform callback for each bus.
> And it is invoked immediately after the scsi_driver->runtime_suspend
> being invoked in scsi_bus->runtime_suspend.
> The platform callback checks the scsi lower power state of the
> scsi_device and choose a compatible ACPI D-state for the device.
> The decision of whether to use ACPI D3 (off) or not is made in the
> platform callback.
>
> what do you think?
I think you need to consider that at a more abstract level.
> > have been working on a similar one for several months now. :-)
>
> That's why generic power domain is introduced?
> Can you tell me what's your idea please?
> It would be GREAT if you can share your experience on this.
Well, a power domain (which seems to be what you have in the ZPODD case)
is analogous to a package with multiple CPU cores. In that case you
can put individual cores into per-core low-power ("idle") states (that
roughly corresponds to the D1-D3hot device states) or you can put the
whole package into a low-power state ("package idle") resulting in the
removal of power from all the cores (more-or-less). Now, it has to be
decided which approach to use and if the "package idle" is used, it may
be necessary to restore the cores' "state" when they are "resumed".
Analogously, for devices in a power domain you usually can use some
programmable mechanism to put each of them into some sort of a low-power
state (e.g. D3hot or "stop clock" etc.) such that the device may be programmed
to go out of it. Alternatively, you can use a different mechanism to
remove power from the entire domain, in which case devices, when power is
restored, may need to be re-initialized. Of course, you need to know when
this happens, so that you know when to carry out the re-initialization.
Our approach in the generic PM domains framework is, essentially, to provide
a special set of PM callbacks ("domain callbacks") that are run (by the PM
core) instead of bus-type PM callbacks. Those domain callbacks are added to
every device in the domain through its pm_domain pointer. Of course, this
means that devices have to be added to the domains explicitly and we have some
helpers for that. We also use some additional data structures allowing the
domain callbacks to track devices in the domain.
Now, when a device in a domain is "suspended" (meaning its runtime PM status
changes from "active" to "suspended"), the domain callbacks check if this is
the last device in the domain whose status is "active" at that point. If
that is not the case, they simply call a special .stop() callback to put the
device into a "normal" per-device low-power state (the .stop() callback may be
defined per device and in principle it may be designed to call the bus-type
or driver .runtime_suspend() callback for the device). Otherwise (i.e. if
this is the last device in the domain whose status was "active" before) and if
the PM QoS constraints allow that to happen, power is removed from the domain
as a whole. Then, all devices in the domain are marked as "need re-init upon
resume" and the resume domain callbacks take care of re-initializing them as
appropriate when their status changes from "suspended" back to "active". [The
domain callbacks use the subsys_data pointer in dev_pm_info to attach their own
data to device objects.]
The actual code is more complicated than that, but that's the idea.
Thanks,
Rafael
On Sat, Feb 18, 2012 at 7:54 AM, Rafael J. Wysocki <[email protected]> wrote:
> On Thursday, February 16, 2012, Zhang Rui wrote:
>> On 二, 2012-02-14 at 23:39 +0100, Rafael J. Wysocki wrote:
>> > On Tuesday, February 14, 2012, Zhang Rui wrote:
>> > > On 一, 2012-02-13 at 20:38 +0100, Rafael J. Wysocki wrote:
>> > > > On Monday, February 13, 2012, Alan Stern wrote:
>> > > > > On Mon, 13 Feb 2012, Lin Ming wrote:
[snip]
>> Yeah, I have thought about this for quite a while before, there ARE
>> several ways to do this, but these need a lot of changes in bus code, at
>> least for the buses that support device runtime D3 (off) by ACPI.
>>
>> Lets also take SATA port and ZPODD for example,
>> proposal one,
>> 1) introduce scsi_can_power_off and ata_can_power_off.
>> 2) sr driver set scsi_can_power_off bit and scsi layer is aware of this,
>> thus the scsi host can set this bit as well.
>> 3) in the .runtime_suspend callback of ata port, it knows that its scsi
>> host interface can be powered off, thus it invokes ata_can_power_off to
>> tell the ata layer.
>
> Hmm. I'm not sure why you want to introduce this special "power off"
> condition. In fact, it's nothing special, it only means that the device
> in question shouldn't be accessed by software, which pretty much is equivalent
> to the "suspended" condition (as defined in the runtime PM docs).
I think some reasons to introduce can_poweroff can be:
1) To indicate the implementation of .runtime_suspend/.runtime_resume
is compatible with power off. That is, .runtime_suspend will save all
needed information and .runtime_resume can work on the uninitialized
device.
If this is already the requirement of
.runtime_suspend/.runtime_resume. Then this is not needed. Maybe we
can make that explicitly for these callbacks via some kind of
documentation.
2) To support something like pm-qos. power off device may have more
exit.latency than normal low power state (such as D3Hot). Some device
may disable can_power_off based on that.
3) Whether to go to power off should be determined by leaf device
(such as SATA disk), but that may be done by its parent device (such
as SATA port). It's a way for leaf device to tell its parent device
whether it want to go to power off.
[snip]
Best Regards,
Huang Ying
On Saturday, February 18, 2012, huang ying wrote:
> On Sat, Feb 18, 2012 at 7:54 AM, Rafael J. Wysocki <[email protected]> wrote:
> > On Thursday, February 16, 2012, Zhang Rui wrote:
> >> On 二, 2012-02-14 at 23:39 +0100, Rafael J. Wysocki wrote:
> >> > On Tuesday, February 14, 2012, Zhang Rui wrote:
> >> > > On 一, 2012-02-13 at 20:38 +0100, Rafael J. Wysocki wrote:
> >> > > > On Monday, February 13, 2012, Alan Stern wrote:
> >> > > > > On Mon, 13 Feb 2012, Lin Ming wrote:
> [snip]
> >> Yeah, I have thought about this for quite a while before, there ARE
> >> several ways to do this, but these need a lot of changes in bus code, at
> >> least for the buses that support device runtime D3 (off) by ACPI.
> >>
> >> Lets also take SATA port and ZPODD for example,
> >> proposal one,
> >> 1) introduce scsi_can_power_off and ata_can_power_off.
> >> 2) sr driver set scsi_can_power_off bit and scsi layer is aware of this,
> >> thus the scsi host can set this bit as well.
> >> 3) in the .runtime_suspend callback of ata port, it knows that its scsi
> >> host interface can be powered off, thus it invokes ata_can_power_off to
> >> tell the ata layer.
> >
> > Hmm. I'm not sure why you want to introduce this special "power off"
> > condition. In fact, it's nothing special, it only means that the device
> > in question shouldn't be accessed by software, which pretty much is equivalent
> > to the "suspended" condition (as defined in the runtime PM docs).
>
> I think some reasons to introduce can_poweroff can be:
>
> 1) To indicate the implementation of .runtime_suspend/.runtime_resume
> is compatible with power off. That is, .runtime_suspend will save all
> needed information and .runtime_resume can work on the uninitialized
> device.
>
> If this is already the requirement of
> .runtime_suspend/.runtime_resume.
Yes, it is.
> Then this is not needed. Maybe we
> can make that explicitly for these callbacks via some kind of
> documentation.
I thought it was documented.
> 2) To support something like pm-qos. power off device may have more
> exit.latency than normal low power state (such as D3Hot). Some device
> may disable can_power_off based on that.
No, please. There would be totally _no_ _meaning_ of that flag at the core
level. Please use subsys_data in struct dev_pm_info for subsystem-specific
data (which is this one).
> 3) Whether to go to power off should be determined by leaf device
> (such as SATA disk), but that may be done by its parent device (such
> as SATA port). It's a way for leaf device to tell its parent device
> whether it want to go to power off.
Well, please see above.
Thanks,
Rafael
On 六, 2012-02-18 at 00:54 +0100, Rafael J. Wysocki wrote:
>
> > > have been working on a similar one for several months now. :-)
> >
> > That's why generic power domain is introduced?
> > Can you tell me what's your idea please?
> > It would be GREAT if you can share your experience on this.
>
> Well, a power domain (which seems to be what you have in the ZPODD case)
> is analogous to a package with multiple CPU cores. In that case you
> can put individual cores into per-core low-power ("idle") states (that
> roughly corresponds to the D1-D3hot device states) or you can put the
> whole package into a low-power state ("package idle") resulting in the
> removal of power from all the cores (more-or-less). Now, it has to be
> decided which approach to use and if the "package idle" is used, it may
> be necessary to restore the cores' "state" when they are "resumed".
>
> Analogously, for devices in a power domain you usually can use some
> programmable mechanism to put each of them into some sort of a low-power
> state (e.g. D3hot or "stop clock" etc.) such that the device may be programmed
> to go out of it. Alternatively, you can use a different mechanism to
> remove power from the entire domain, in which case devices, when power is
> restored, may need to be re-initialized. Of course, you need to know when
> this happens, so that you know when to carry out the re-initialization.
>
> Our approach in the generic PM domains framework is, essentially, to provide
> a special set of PM callbacks ("domain callbacks") that are run (by the PM
> core) instead of bus-type PM callbacks. Those domain callbacks are added to
> every device in the domain through its pm_domain pointer. Of course, this
> means that devices have to be added to the domains explicitly and we have some
> helpers for that. We also use some additional data structures allowing the
> domain callbacks to track devices in the domain.
>
> Now, when a device in a domain is "suspended" (meaning its runtime PM status
> changes from "active" to "suspended"), the domain callbacks check if this is
> the last device in the domain whose status is "active" at that point. If
> that is not the case, they simply call a special .stop() callback to put the
> device into a "normal" per-device low-power state (the .stop() callback may be
> defined per device and in principle it may be designed to call the bus-type
> or driver .runtime_suspend() callback for the device). Otherwise (i.e. if
> this is the last device in the domain whose status was "active" before) and if
> the PM QoS constraints allow that to happen, power is removed from the domain
> as a whole. Then, all devices in the domain are marked as "need re-init upon
> resume" and the resume domain callbacks take care of re-initializing them as
> appropriate when their status changes from "suspended" back to "active". [The
> domain callbacks use the subsys_data pointer in dev_pm_info to attach their own
> data to device objects.]
>
> The actual code is more complicated than that, but that's the idea.
>
Yeah, I have read the generic PM domain code before. and I have a
question about the generic PM domain code.
genpd->pow_off is invoked if all devices in a generic PM domain are
pm_runtime_suspended(). This suggests that the device driver can set
RPM_SUSPENDED flag only if it is able to bring the device from a cold
power off, right?
So how to handle this case, say, for a device in the generic PM domain
that supports 2 different low power state, D1 and D2.
D2 is deeper than D1, and it is kind of cold power off with remote
wakeup disabled. If the driver needs to runtime suspend the device with
remote wakeup enabled, it should set the device to D1, but it can not
set the RPM_SUSPEND?
thanks,
rui
On 五, 2012-02-17 at 23:23 +0100, Rafael J. Wysocki wrote:
> > Okay, agreed.
> > so how about this? _PR3 equals D3_HOT support.
>
> Yes, I can agree with that. :-)
>
> > > > Hmm, how about set D3_COLD support if _PR3 exists, but leave a warning
> > > > message if _OFF doesn't exist, for now?
> > >
> > > I don't think we need to set D3_COLD support at all. In fact, it is always
> > > supported (as I said, if all power resources used by a device are off, the
> > > device is in D3_COLD pretty much by definition).
> > >
> > Yeah, but it seems that Linux uses ACPI_D3 for both ACPICA D3_HOT and D3
> > (off). I'm generating a patch to remove ACPI_D3_COLD and introduce
> > D3_HOT support in Linux kernel.
>
> That's a good idea in my opinion.
Great.
Patch will be sent out later.
thanks,
rui
On 五, 2012-02-17 at 23:34 +0100, Rafael J. Wysocki wrote:
>
> > So I introduced these two APIs so that an runtime_resume request can be
> > sent to ZPODD directly and the runtime PM core can resume all the
> > parents of ZPODD automatically.
>
> It seems that you're confusing things. In the Linux driver model a device
> cannot have more than a single parent.
>
sorry, I mean the ancients here, e.g. scsi-host, etc. :)
thanks,
rui
On Monday, February 20, 2012, Zhang Rui wrote:
> On 六, 2012-02-18 at 00:54 +0100, Rafael J. Wysocki wrote:
> >
> > > > have been working on a similar one for several months now. :-)
> > >
> > > That's why generic power domain is introduced?
> > > Can you tell me what's your idea please?
> > > It would be GREAT if you can share your experience on this.
> >
> > Well, a power domain (which seems to be what you have in the ZPODD case)
> > is analogous to a package with multiple CPU cores. In that case you
> > can put individual cores into per-core low-power ("idle") states (that
> > roughly corresponds to the D1-D3hot device states) or you can put the
> > whole package into a low-power state ("package idle") resulting in the
> > removal of power from all the cores (more-or-less). Now, it has to be
> > decided which approach to use and if the "package idle" is used, it may
> > be necessary to restore the cores' "state" when they are "resumed".
> >
> > Analogously, for devices in a power domain you usually can use some
> > programmable mechanism to put each of them into some sort of a low-power
> > state (e.g. D3hot or "stop clock" etc.) such that the device may be programmed
> > to go out of it. Alternatively, you can use a different mechanism to
> > remove power from the entire domain, in which case devices, when power is
> > restored, may need to be re-initialized. Of course, you need to know when
> > this happens, so that you know when to carry out the re-initialization.
> >
> > Our approach in the generic PM domains framework is, essentially, to provide
> > a special set of PM callbacks ("domain callbacks") that are run (by the PM
> > core) instead of bus-type PM callbacks. Those domain callbacks are added to
> > every device in the domain through its pm_domain pointer. Of course, this
> > means that devices have to be added to the domains explicitly and we have some
> > helpers for that. We also use some additional data structures allowing the
> > domain callbacks to track devices in the domain.
> >
> > Now, when a device in a domain is "suspended" (meaning its runtime PM status
> > changes from "active" to "suspended"), the domain callbacks check if this is
> > the last device in the domain whose status is "active" at that point. If
> > that is not the case, they simply call a special .stop() callback to put the
> > device into a "normal" per-device low-power state (the .stop() callback may be
> > defined per device and in principle it may be designed to call the bus-type
> > or driver .runtime_suspend() callback for the device). Otherwise (i.e. if
> > this is the last device in the domain whose status was "active" before) and if
> > the PM QoS constraints allow that to happen, power is removed from the domain
> > as a whole. Then, all devices in the domain are marked as "need re-init upon
> > resume" and the resume domain callbacks take care of re-initializing them as
> > appropriate when their status changes from "suspended" back to "active". [The
> > domain callbacks use the subsys_data pointer in dev_pm_info to attach their own
> > data to device objects.]
> >
> > The actual code is more complicated than that, but that's the idea.
> >
> Yeah, I have read the generic PM domain code before. and I have a
> question about the generic PM domain code.
>
> genpd->pow_off is invoked if all devices in a generic PM domain are
> pm_runtime_suspended(). This suggests that the device driver can set
> RPM_SUSPENDED flag only if it is able to bring the device from a cold
> power off, right?
A device driver can _never_ set the RPM_SUSPENDED, the core does that.
> So how to handle this case, say, for a device in the generic PM domain
> that supports 2 different low power state, D1 and D2.
> D2 is deeper than D1, and it is kind of cold power off with remote
> wakeup disabled. If the driver needs to runtime suspend the device with
> remote wakeup enabled, it should set the device to D1, but it can not
> set the RPM_SUSPEND?
The device is regarded as "suspended" if its bus type's (or PM domain's)
.runtime_suspend() callback has been executed and has returned 0 (success).
What the callback has actually done is not of any interest to the core.
Now, the D1 and D2 case has to be handled by the bus (PM domain) and
driver. In both cases the device will be regarded as "suspended" and the
core doesn't track the actual device state.
I think the problem here is that the PCI bus type's runtime PM callbacks
aren't very sophisticated (they just choose the lowest possible low-power
state and attempt to put the device into it) and I can see two possible
ways to address that.
First, you can modify pci_pm_runtime_suspend/_resume() to handle multiple
states (for example, to choose the target low-power state more intelligently
than they do right now). Second, you can add a PM domain that will do what
you want from pci_pm_runtime_suspend/_resume() for a specific set of devices.
Thanks,
Rafael
On 二, 2012-02-21 at 00:13 +0100, Rafael J. Wysocki wrote:
> > So how to handle this case, say, for a device in the generic PM domain
> > that supports 2 different low power state, D1 and D2.
> > D2 is deeper than D1, and it is kind of cold power off with remote
> > wakeup disabled. If the driver needs to runtime suspend the device with
> > remote wakeup enabled, it should set the device to D1, but it can not
> > set the RPM_SUSPEND?
>
> The device is regarded as "suspended" if its bus type's (or PM domain's)
> .runtime_suspend() callback has been executed and has returned 0 (success).
> What the callback has actually done is not of any interest to the core.
>
right.
> Now, the D1 and D2 case has to be handled by the bus (PM domain) and
> driver. In both cases the device will be regarded as "suspended" and the
> core doesn't track the actual device state.
>
> I think the problem here is that the PCI bus type's runtime PM callbacks
> aren't very sophisticated (they just choose the lowest possible low-power
> state and attempt to put the device into it) and I can see two possible
> ways to address that.
>
> First, you can modify pci_pm_runtime_suspend/_resume() to handle multiple
> states (for example, to choose the target low-power state more intelligently
> than they do right now). Second, you can add a PM domain that will do what
> you want from pci_pm_runtime_suspend/_resume() for a specific set of devices.
>
But RPM_SUSPENDED is set by PM core after .runtime_suspend() being
invoked, even if device is in D1 instead of D2, right?
So the problem is that, if a device in a generic power domain supports
two low power state, one is compatible with generic power domain power
off and another is not, how can the device driver pass this information
to the generic power domain, i.e. how to runtime suspend a device while
keep the generic power domain always on?
thanks,
rui
On Fri, Feb 17, 2012 at 11:07 PM, Alan Stern <[email protected]> wrote:
> On Fri, 17 Feb 2012, Zhang, Rui wrote:
>
>> > Do you basically want
>> > the ZPODD always to be suspended and resumed along with the ATA port,
>>
>> No. ZPODD suspends itself, which put ZPODD to a SCSI low power state (NOT power off/D3_COLD).
>> And then it is the "Runtime PM core" that suspends ATA port after ZPODD being suspended.
>> And the .runtime_suspend callback for ATA port actually turns off the ZPODD power.
>>
>> During resume, ATA port is resumed first because of the ACPI wakeup event.
>> But in fact, this wakeup event should be read as "ZPODD remote wakeup signal", thus runtime resume request is sent to ZPODD, done by Patch 3/6.
>>
>> > or should it be possible to suspend the ZPODD while the port remains
>> > running?
>> >
>> Sure, but the power is still on at this time.
>
> Then maybe you can use pm_runtime_no_callbacks() for the ZPODD device.
> It's explained in Documentation/power/runtime_pm.txt, and I use it for
> USB interfaces.
If pm_runtime_no_callbacks() is used, runtime PM sysfs attributes
won't be created.
Then how to disable ZPODD feature in userspace?
Currently, I use "control" file of scsi device to enable/disable
ZPODD, for example
echo auto > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/power/control
echo on > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/power/control
>
> The idea is that the ZPODD will never receive any runtime PM callbacks
> from the PM core. ?Instead the ATA port callback routines will be
> responsible for power management of the ZPODD device.
Does the ATA port callback also responsible to resume its child?
For example,
/sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/
ata0 is resumed.
Then who will be responsible to resume host1, target1:0:0 and 1:0:0:0?
Or do you mean that we don't need to resume these devices at all?
host1 and target1:0:0 are logical devices, but I think 1:0:0:0 is not.
Thanks,
Lin Ming
On Tue, 21 Feb 2012, Lin Ming wrote:
> > Then maybe you can use pm_runtime_no_callbacks() for the ZPODD device.
> > It's explained in Documentation/power/runtime_pm.txt, and I use it for
> > USB interfaces.
>
> If pm_runtime_no_callbacks() is used, runtime PM sysfs attributes
> won't be created.
> Then how to disable ZPODD feature in userspace?
>
> Currently, I use "control" file of scsi device to enable/disable
> ZPODD, for example
> echo auto > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/power/control
> echo on > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/power/control
You could get the same result by using the "control" file for the ATA
port.
> > The idea is that the ZPODD will never receive any runtime PM callbacks
> > from the PM core. ?Instead the ATA port callback routines will be
> > responsible for power management of the ZPODD device.
>
> Does the ATA port callback also responsible to resume its child?
>
> For example,
> /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/
>
> ata0 is resumed.
>
> Then who will be responsible to resume host1, target1:0:0 and 1:0:0:0?
>
> Or do you mean that we don't need to resume these devices at all?
> host1 and target1:0:0 are logical devices, but I think 1:0:0:0 is not.
That's right. It makes no difference whether the host and target
are resumed or suspended. In fact, you could also call
pm_runtime_no_callbacks() for them.
Alan Stern
On Tuesday, February 21, 2012, Zhang Rui wrote:
> On 二, 2012-02-21 at 00:13 +0100, Rafael J. Wysocki wrote:
> > > So how to handle this case, say, for a device in the generic PM domain
> > > that supports 2 different low power state, D1 and D2.
> > > D2 is deeper than D1, and it is kind of cold power off with remote
> > > wakeup disabled. If the driver needs to runtime suspend the device with
> > > remote wakeup enabled, it should set the device to D1, but it can not
> > > set the RPM_SUSPEND?
> >
> > The device is regarded as "suspended" if its bus type's (or PM domain's)
> > .runtime_suspend() callback has been executed and has returned 0 (success).
> > What the callback has actually done is not of any interest to the core.
> >
> right.
>
> > Now, the D1 and D2 case has to be handled by the bus (PM domain) and
> > driver. In both cases the device will be regarded as "suspended" and the
> > core doesn't track the actual device state.
> >
>
>
> > I think the problem here is that the PCI bus type's runtime PM callbacks
> > aren't very sophisticated (they just choose the lowest possible low-power
> > state and attempt to put the device into it) and I can see two possible
> > ways to address that.
> >
> > First, you can modify pci_pm_runtime_suspend/_resume() to handle multiple
> > states (for example, to choose the target low-power state more intelligently
> > than they do right now). Second, you can add a PM domain that will do what
> > you want from pci_pm_runtime_suspend/_resume() for a specific set of devices.
> >
> But RPM_SUSPENDED is set by PM core after .runtime_suspend() being
> invoked, even if device is in D1 instead of D2, right?
>
> So the problem is that, if a device in a generic power domain supports
> two low power state, one is compatible with generic power domain power
> off and another is not, how can the device driver pass this information
> to the generic power domain, i.e. how to runtime suspend a device while
> keep the generic power domain always on?
There are two "low-power" levels in the generic PM domains framework. The
first one is the per-device low-power in which devices are put into their
individual (programmable) low-power states by the domain .dev_ops->stop()
callback. The second one is when .stop() has been called for all devices,
so presumably all of them are in programmable low-power states and it's
possible to switch the entire domain off. This is done by the domain
.power_off() callback.
It seems that the trick might be to make .dev_ops->stop() avoid turning off
power resources for the last suspending device in the domain and leave that
to domain .power_off().
Thanks,
Rafael
On 二, 2012-02-21 at 22:43 +0100, Rafael J. Wysocki wrote:
> On Tuesday, February 21, 2012, Zhang Rui wrote:
> > On 二, 2012-02-21 at 00:13 +0100, Rafael J. Wysocki wrote:
> > > > So how to handle this case, say, for a device in the generic PM domain
> > > > that supports 2 different low power state, D1 and D2.
> > > > D2 is deeper than D1, and it is kind of cold power off with remote
> > > > wakeup disabled. If the driver needs to runtime suspend the device with
> > > > remote wakeup enabled, it should set the device to D1, but it can not
> > > > set the RPM_SUSPEND?
> > >
> > > The device is regarded as "suspended" if its bus type's (or PM domain's)
> > > .runtime_suspend() callback has been executed and has returned 0 (success).
> > > What the callback has actually done is not of any interest to the core.
> > >
> > right.
> >
> > > Now, the D1 and D2 case has to be handled by the bus (PM domain) and
> > > driver. In both cases the device will be regarded as "suspended" and the
> > > core doesn't track the actual device state.
> > >
> >
> >
> > > I think the problem here is that the PCI bus type's runtime PM callbacks
> > > aren't very sophisticated (they just choose the lowest possible low-power
> > > state and attempt to put the device into it) and I can see two possible
> > > ways to address that.
> > >
> > > First, you can modify pci_pm_runtime_suspend/_resume() to handle multiple
> > > states (for example, to choose the target low-power state more intelligently
> > > than they do right now). Second, you can add a PM domain that will do what
> > > you want from pci_pm_runtime_suspend/_resume() for a specific set of devices.
> > >
> > But RPM_SUSPENDED is set by PM core after .runtime_suspend() being
> > invoked, even if device is in D1 instead of D2, right?
> >
> > So the problem is that, if a device in a generic power domain supports
> > two low power state, one is compatible with generic power domain power
> > off and another is not, how can the device driver pass this information
> > to the generic power domain, i.e. how to runtime suspend a device while
> > keep the generic power domain always on?
>
> There are two "low-power" levels in the generic PM domains framework. The
> first one is the per-device low-power in which devices are put into their
> individual (programmable) low-power states by the domain .dev_ops->stop()
> callback. The second one is when .stop() has been called for all devices,
> so presumably all of them are in programmable low-power states and it's
> possible to switch the entire domain off. This is done by the domain
> .power_off() callback.
>
> It seems that the trick might be to make .dev_ops->stop() avoid turning off
> power resources for the last suspending device in the domain and leave that
> to domain .power_off().
>
Yeah, that's a good idea.
so how about this proposal for ZPODD.
1. create an ACPI generic power domain for every device that has an _PR3
method, the SATA port in this case.
2. add sr device to the generic power domain via genpd APIs.
this can be done either in ATA port driver or in sr driver.
3. the .power_off callback of the generic power domain
a) checks if all the devices allows power off (for the trick above)
b) turns off the power resources in _PR3.
thanks,
rui
On Tue, 2012-02-21 at 11:06 -0500, Alan Stern wrote:
> On Tue, 21 Feb 2012, Lin Ming wrote:
>
> > > Then maybe you can use pm_runtime_no_callbacks() for the ZPODD device.
> > > It's explained in Documentation/power/runtime_pm.txt, and I use it for
> > > USB interfaces.
> >
> > If pm_runtime_no_callbacks() is used, runtime PM sysfs attributes
> > won't be created.
> > Then how to disable ZPODD feature in userspace?
> >
> > Currently, I use "control" file of scsi device to enable/disable
> > ZPODD, for example
> > echo auto > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/power/control
> > echo on > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/power/control
>
> You could get the same result by using the "control" file for the ATA
> port.
>
> > > The idea is that the ZPODD will never receive any runtime PM callbacks
> > > from the PM core. �Instead the ATA port callback routines will be
> > > responsible for power management of the ZPODD device.
> >
> > Does the ATA port callback also responsible to resume its child?
> >
> > For example,
> > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/
> >
> > ata0 is resumed.
> >
> > Then who will be responsible to resume host1, target1:0:0 and 1:0:0:0?
> >
> > Or do you mean that we don't need to resume these devices at all?
> > host1 and target1:0:0 are logical devices, but I think 1:0:0:0 is not.
>
> That's right. It makes no difference whether the host and target
> are resumed or suspended. In fact, you could also call
> pm_runtime_no_callbacks() for them.
But host and target are added in scsi layer. How do we know if they are
"logical device"?
Or do you mean that we can call pm_runtime_no_callbacks() for them
because scsi layer(scsi_pm.c) only implements runtime pm for scsi
device.
Thanks,
Lin Ming
>
> Alan Stern
>
On Thu, 23 Feb 2012, Lin Ming wrote:
> > > For example,
> > > /sys/devices/pci0000:00/0000:00:1f.2/ata0/host1/target1:0:0/1:0:0:0/
> > >
> > > ata0 is resumed.
> > >
> > > Then who will be responsible to resume host1, target1:0:0 and 1:0:0:0?
> > >
> > > Or do you mean that we don't need to resume these devices at all?
> > > host1 and target1:0:0 are logical devices, but I think 1:0:0:0 is not.
> >
> > That's right. It makes no difference whether the host and target
> > are resumed or suspended. In fact, you could also call
> > pm_runtime_no_callbacks() for them.
>
> But host and target are added in scsi layer. How do we know if they are
> "logical device"?
Those data structures may be created by the SCSI layer, but you know
that the hardware is all controlled by the ATA layer. Therefore you
know there is no physical hardware corresponding to the host or target
device.
> Or do you mean that we can call pm_runtime_no_callbacks() for them
> because scsi layer(scsi_pm.c) only implements runtime pm for scsi
> device.
That's right. And you know why? It's because the SCSI host and target
do not refer to power-manageable hardware.
Alan Stern