From: "Rafael J. Wysocki" <[email protected]>
Revert commit 44cc89f76464 ("PM: runtime: Update device status
before letting suppliers suspend") that introduced a race condition
into __rpm_callback() which allowed a concurrent rpm_resume() to
run and resume the device prematurely after its status had been
changed to RPM_SUSPENDED by __rpm_callback().
Fixes: 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend")
Link: https://lore.kernel.org/linux-pm/[email protected]/
Reported by: Adrian Hunter <[email protected]>
Cc: 4.10+ <[email protected]> # 4.10+
Signed-off-by: Rafael J. Wysocki <[email protected]>
---
drivers/base/power/runtime.c | 62 +++++++++++++++---------------------
1 file changed, 25 insertions(+), 37 deletions(-)
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 18b82427d0cb..a46a7e30881b 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev)
static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
__releases(&dev->power.lock) __acquires(&dev->power.lock)
{
- bool use_links = dev->power.links_count > 0;
- bool get = false;
int retval, idx;
- bool put;
+ bool use_links = dev->power.links_count > 0;
if (dev->power.irq_safe) {
spin_unlock(&dev->power.lock);
- } else if (!use_links) {
- spin_unlock_irq(&dev->power.lock);
} else {
- get = dev->power.runtime_status == RPM_RESUMING;
-
spin_unlock_irq(&dev->power.lock);
- /* Resume suppliers if necessary. */
- if (get) {
+ /*
+ * Resume suppliers if necessary.
+ *
+ * The device's runtime PM status cannot change until this
+ * routine returns, so it is safe to read the status outside of
+ * the lock.
+ */
+ if (use_links && dev->power.runtime_status == RPM_RESUMING) {
idx = device_links_read_lock();
retval = rpm_get_suppliers(dev);
@@ -355,36 +355,24 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
if (dev->power.irq_safe) {
spin_lock(&dev->power.lock);
- return retval;
- }
-
- spin_lock_irq(&dev->power.lock);
-
- if (!use_links)
- return retval;
-
- /*
- * If the device is suspending and the callback has returned success,
- * drop the usage counters of the suppliers that have been reference
- * counted on its resume.
- *
- * Do that if the resume fails too.
- */
- put = dev->power.runtime_status == RPM_SUSPENDING && !retval;
- if (put)
- __update_runtime_status(dev, RPM_SUSPENDED);
- else
- put = get && retval;
-
- if (put) {
- spin_unlock_irq(&dev->power.lock);
-
- idx = device_links_read_lock();
+ } else {
+ /*
+ * If the device is suspending and the callback has returned
+ * success, drop the usage counters of the suppliers that have
+ * been reference counted on its resume.
+ *
+ * Do that if resume fails too.
+ */
+ if (use_links
+ && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
+ || (dev->power.runtime_status == RPM_RESUMING && retval))) {
+ idx = device_links_read_lock();
-fail:
- rpm_put_suppliers(dev);
+ fail:
+ rpm_put_suppliers(dev);
- device_links_read_unlock(idx);
+ device_links_read_unlock(idx);
+ }
spin_lock_irq(&dev->power.lock);
}
--
2.26.2
On Thu, 18 Mar 2021 at 19:15, Rafael J. Wysocki <[email protected]> wrote:
>
> From: "Rafael J. Wysocki" <[email protected]>
>
> Revert commit 44cc89f76464 ("PM: runtime: Update device status
> before letting suppliers suspend") that introduced a race condition
> into __rpm_callback() which allowed a concurrent rpm_resume() to
> run and resume the device prematurely after its status had been
> changed to RPM_SUSPENDED by __rpm_callback().
Huh, the code path is not entirely easy to follow. :-)
Did you find this by code inspection or did you get an error report?
>
> Fixes: 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend")
> Link: https://lore.kernel.org/linux-pm/[email protected]/
> Reported by: Adrian Hunter <[email protected]>
> Cc: 4.10+ <[email protected]> # 4.10+
> Signed-off-by: Rafael J. Wysocki <[email protected]>
Reviewed-by: Ulf Hansson <[email protected]>
Kind regards
Uffe
> ---
> drivers/base/power/runtime.c | 62 +++++++++++++++---------------------
> 1 file changed, 25 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 18b82427d0cb..a46a7e30881b 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev)
> static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
> __releases(&dev->power.lock) __acquires(&dev->power.lock)
> {
> - bool use_links = dev->power.links_count > 0;
> - bool get = false;
> int retval, idx;
> - bool put;
> + bool use_links = dev->power.links_count > 0;
>
> if (dev->power.irq_safe) {
> spin_unlock(&dev->power.lock);
> - } else if (!use_links) {
> - spin_unlock_irq(&dev->power.lock);
> } else {
> - get = dev->power.runtime_status == RPM_RESUMING;
> -
> spin_unlock_irq(&dev->power.lock);
>
> - /* Resume suppliers if necessary. */
> - if (get) {
> + /*
> + * Resume suppliers if necessary.
> + *
> + * The device's runtime PM status cannot change until this
> + * routine returns, so it is safe to read the status outside of
> + * the lock.
> + */
> + if (use_links && dev->power.runtime_status == RPM_RESUMING) {
> idx = device_links_read_lock();
>
> retval = rpm_get_suppliers(dev);
> @@ -355,36 +355,24 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
>
> if (dev->power.irq_safe) {
> spin_lock(&dev->power.lock);
> - return retval;
> - }
> -
> - spin_lock_irq(&dev->power.lock);
> -
> - if (!use_links)
> - return retval;
> -
> - /*
> - * If the device is suspending and the callback has returned success,
> - * drop the usage counters of the suppliers that have been reference
> - * counted on its resume.
> - *
> - * Do that if the resume fails too.
> - */
> - put = dev->power.runtime_status == RPM_SUSPENDING && !retval;
> - if (put)
> - __update_runtime_status(dev, RPM_SUSPENDED);
> - else
> - put = get && retval;
> -
> - if (put) {
> - spin_unlock_irq(&dev->power.lock);
> -
> - idx = device_links_read_lock();
> + } else {
> + /*
> + * If the device is suspending and the callback has returned
> + * success, drop the usage counters of the suppliers that have
> + * been reference counted on its resume.
> + *
> + * Do that if resume fails too.
> + */
> + if (use_links
> + && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
> + || (dev->power.runtime_status == RPM_RESUMING && retval))) {
> + idx = device_links_read_lock();
>
> -fail:
> - rpm_put_suppliers(dev);
> + fail:
> + rpm_put_suppliers(dev);
>
> - device_links_read_unlock(idx);
> + device_links_read_unlock(idx);
> + }
>
> spin_lock_irq(&dev->power.lock);
> }
> --
> 2.26.2
>
>
>
>
On Fri, Mar 19, 2021 at 2:31 PM Ulf Hansson <[email protected]> wrote:
>
> On Thu, 18 Mar 2021 at 19:15, Rafael J. Wysocki <[email protected]> wrote:
> >
> > From: "Rafael J. Wysocki" <[email protected]>
> >
> > Revert commit 44cc89f76464 ("PM: runtime: Update device status
> > before letting suppliers suspend") that introduced a race condition
> > into __rpm_callback() which allowed a concurrent rpm_resume() to
> > run and resume the device prematurely after its status had been
> > changed to RPM_SUSPENDED by __rpm_callback().
>
> Huh, the code path is not entirely easy to follow. :-)
>
> Did you find this by code inspection or did you get an error report?
There was a bug report that caused me to look at the code once again.
> > Fixes: 44cc89f76464 ("PM: runtime: Update device status before letting suppliers suspend")
> > Link: https://lore.kernel.org/linux-pm/[email protected]/
> > Reported by: Adrian Hunter <[email protected]>
> > Cc: 4.10+ <[email protected]> # 4.10+
> > Signed-off-by: Rafael J. Wysocki <[email protected]>
>
> Reviewed-by: Ulf Hansson <[email protected]>
Thanks!