Currently some code flow of runtime usage count changes is not covered by
the rpm_runtime_usage tracepoints. Adjust corresponding tracepoints to monitor
all the runtime usage count changes.
Chen Yu (2):
PM-runtime: Move all runtime usage related function to runtime.c
PM-runtime: change the tracepoints to cover all usage_count
drivers/base/power/runtime.c | 50 ++++++++++++++++++++++++++----------
include/linux/pm_runtime.h | 12 ++-------
2 files changed, 38 insertions(+), 24 deletions(-)
--
2.17.1
In order to track all the runtime usage count change, move the code
related to runtime usage count change from pm_runtime.h to runtime.c,
so that in runtime.c we can leverage trace event to do the tracking.
Meanwhile export pm_runtime_get_noresume() and pm_runtime_put_noidle()
so the module can use them.
No functional changes intended.
Signed-off-by: Chen Yu <[email protected]>
---
drivers/base/power/runtime.c | 12 ++++++++++++
include/linux/pm_runtime.h | 12 ++----------
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 9f62790f644c..85a248e196ca 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1738,6 +1738,18 @@ void pm_runtime_drop_link(struct device *dev)
spin_unlock_irq(&dev->power.lock);
}
+void pm_runtime_get_noresume(struct device *dev)
+{
+ atomic_inc(&dev->power.usage_count);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_get_noresume);
+
+void pm_runtime_put_noidle(struct device *dev)
+{
+ atomic_add_unless(&dev->power.usage_count, -1, 0);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_put_noidle);
+
static bool pm_runtime_need_not_resume(struct device *dev)
{
return atomic_read(&dev->power.usage_count) <= 1 &&
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 3dbc207bff53..26510fef2acd 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -59,6 +59,8 @@ extern void pm_runtime_get_suppliers(struct device *dev);
extern void pm_runtime_put_suppliers(struct device *dev);
extern void pm_runtime_new_link(struct device *dev);
extern void pm_runtime_drop_link(struct device *dev);
+extern void pm_runtime_get_noresume(struct device *dev);
+extern void pm_runtime_put_noidle(struct device *dev);
static inline int pm_runtime_get_if_in_use(struct device *dev)
{
@@ -70,16 +72,6 @@ static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
dev->power.ignore_children = enable;
}
-static inline void pm_runtime_get_noresume(struct device *dev)
-{
- atomic_inc(&dev->power.usage_count);
-}
-
-static inline void pm_runtime_put_noidle(struct device *dev)
-{
- atomic_add_unless(&dev->power.usage_count, -1, 0);
-}
-
static inline bool pm_runtime_suspended(struct device *dev)
{
return dev->power.runtime_status == RPM_SUSPENDED
--
2.17.1
Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
has added some tracepoints to monitor the change of runtime usage, and
there is something to improve:
1. There are some places that adjust the usage count not
been traced yet. For example, pm_runtime_get_noresume() and
pm_runtime_put_noidle()
2. The change of the usage count will not be tracked if decreased
from 1 to 0.
This patch intends to adjust the logic to be consistent with the
change of usage_counter, that is to say, only after the counter has
been possibly modified, we record it. Besides, all usage changes will
be shown using rpm_usage even if included by other trace points.
And these changes has helped track down the e1000e runtime issue.
Reviewed-by: Michał Mirosław <[email protected]>
Signed-off-by: Chen Yu <[email protected]>
---
drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
1 file changed, 24 insertions(+), 14 deletions(-)
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 85a248e196ca..5789d2624513 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
int retval;
if (rpmflags & RPM_GET_PUT) {
- if (!atomic_dec_and_test(&dev->power.usage_count)) {
- trace_rpm_usage_rcuidle(dev, rpmflags);
+ bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
+
+ trace_rpm_usage_rcuidle(dev, rpmflags);
+ if (non_zero)
return 0;
- }
}
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
@@ -1038,10 +1039,12 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
int retval;
if (rpmflags & RPM_GET_PUT) {
- if (!atomic_dec_and_test(&dev->power.usage_count)) {
- trace_rpm_usage_rcuidle(dev, rpmflags);
+ bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
+
+ trace_rpm_usage_rcuidle(dev, rpmflags);
+ if (non_zero)
return 0;
- }
+
}
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
@@ -1073,8 +1076,10 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
dev->power.runtime_status != RPM_ACTIVE);
- if (rpmflags & RPM_GET_PUT)
+ if (rpmflags & RPM_GET_PUT) {
atomic_inc(&dev->power.usage_count);
+ trace_rpm_usage_rcuidle(dev, rpmflags);
+ }
spin_lock_irqsave(&dev->power.lock, flags);
retval = rpm_resume(dev, rpmflags);
@@ -1433,6 +1438,7 @@ void pm_runtime_forbid(struct device *dev)
dev->power.runtime_auto = false;
atomic_inc(&dev->power.usage_count);
+ trace_rpm_usage_rcuidle(dev, 0);
rpm_resume(dev, 0);
out:
@@ -1448,16 +1454,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
*/
void pm_runtime_allow(struct device *dev)
{
+ bool is_zero;
+
spin_lock_irq(&dev->power.lock);
if (dev->power.runtime_auto)
goto out;
dev->power.runtime_auto = true;
- if (atomic_dec_and_test(&dev->power.usage_count))
+ is_zero = atomic_dec_and_test(&dev->power.usage_count);
+ trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
+ if (is_zero)
rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
- else
- trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
-
out:
spin_unlock_irq(&dev->power.lock);
}
@@ -1523,9 +1530,8 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
/* If it used to be allowed then prevent it. */
if (!old_use || old_delay >= 0) {
atomic_inc(&dev->power.usage_count);
- rpm_resume(dev, 0);
- } else {
trace_rpm_usage_rcuidle(dev, 0);
+ rpm_resume(dev, 0);
}
}
@@ -1533,8 +1539,10 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
else {
/* If it used to be prevented then allow it. */
- if (old_use && old_delay < 0)
+ if (old_use && old_delay < 0) {
atomic_dec(&dev->power.usage_count);
+ trace_rpm_usage_rcuidle(dev, 0);
+ }
/* Maybe we can autosuspend now. */
rpm_idle(dev, RPM_AUTO);
@@ -1741,12 +1749,14 @@ void pm_runtime_drop_link(struct device *dev)
void pm_runtime_get_noresume(struct device *dev)
{
atomic_inc(&dev->power.usage_count);
+ trace_rpm_usage_rcuidle(dev, 0);
}
EXPORT_SYMBOL_GPL(pm_runtime_get_noresume);
void pm_runtime_put_noidle(struct device *dev)
{
atomic_add_unless(&dev->power.usage_count, -1, 0);
+ trace_rpm_usage_rcuidle(dev, 0);
}
EXPORT_SYMBOL_GPL(pm_runtime_put_noidle);
--
2.17.1
On Wed, Jul 15, 2020 at 02:28:03PM +0800, Chen Yu wrote:
> Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> has added some tracepoints to monitor the change of runtime usage, and
> there is something to improve:
> 1. There are some places that adjust the usage count not
> been traced yet. For example, pm_runtime_get_noresume() and
> pm_runtime_put_noidle()
> 2. The change of the usage count will not be tracked if decreased
> from 1 to 0.
>
> This patch intends to adjust the logic to be consistent with the
> change of usage_counter, that is to say, only after the counter has
> been possibly modified, we record it. Besides, all usage changes will
> be shown using rpm_usage even if included by other trace points.
> And these changes has helped track down the e1000e runtime issue.
>
> Reviewed-by: Michał Mirosław <[email protected]>
> Signed-off-by: Chen Yu <[email protected]>
> ---
> drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> 1 file changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 85a248e196ca..5789d2624513 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> int retval;
>
> if (rpmflags & RPM_GET_PUT) {
> - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> - trace_rpm_usage_rcuidle(dev, rpmflags);
> + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> +
> + trace_rpm_usage_rcuidle(dev, rpmflags);
Why not just call trace everywhere before you do the atomic operations?
Why does the trace need to be called after the operation everywhere?
thanks,
greg k-h
On Wed, Jul 15, 2020 at 09:06:14AM +0200, Greg Kroah-Hartman wrote:
> On Wed, Jul 15, 2020 at 02:28:03PM +0800, Chen Yu wrote:
> > Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> > has added some tracepoints to monitor the change of runtime usage, and
> > there is something to improve:
> > 1. There are some places that adjust the usage count not
> > been traced yet. For example, pm_runtime_get_noresume() and
> > pm_runtime_put_noidle()
> > 2. The change of the usage count will not be tracked if decreased
> > from 1 to 0.
> >
> > This patch intends to adjust the logic to be consistent with the
> > change of usage_counter, that is to say, only after the counter has
> > been possibly modified, we record it. Besides, all usage changes will
> > be shown using rpm_usage even if included by other trace points.
> > And these changes has helped track down the e1000e runtime issue.
> >
> > Reviewed-by: Micha? Miros?aw <[email protected]>
> > Signed-off-by: Chen Yu <[email protected]>
> > ---
> > drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> > 1 file changed, 24 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> > index 85a248e196ca..5789d2624513 100644
> > --- a/drivers/base/power/runtime.c
> > +++ b/drivers/base/power/runtime.c
> > @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> > int retval;
> >
> > if (rpmflags & RPM_GET_PUT) {
> > - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> > - trace_rpm_usage_rcuidle(dev, rpmflags);
> > + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> > +
> > + trace_rpm_usage_rcuidle(dev, rpmflags);
>
> Why not just call trace everywhere before you do the atomic operations?
> Why does the trace need to be called after the operation everywhere?
I would argue that this is easier mentally: We trace what state the
device is in from now on (a "current state" for the time being) instead
of tracing what it was before (an information that has just expired).
Best Regards,
Micha? Miros?aw
Hi Greg,
thanks very much for taking a look,
On Wed, Jul 15, 2020 at 09:06:14AM +0200, Greg Kroah-Hartman wrote:
> On Wed, Jul 15, 2020 at 02:28:03PM +0800, Chen Yu wrote:
> > Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> > has added some tracepoints to monitor the change of runtime usage, and
> > there is something to improve:
> > 1. There are some places that adjust the usage count not
> > been traced yet. For example, pm_runtime_get_noresume() and
> > pm_runtime_put_noidle()
> > 2. The change of the usage count will not be tracked if decreased
> > from 1 to 0.
> >
> > This patch intends to adjust the logic to be consistent with the
> > change of usage_counter, that is to say, only after the counter has
> > been possibly modified, we record it. Besides, all usage changes will
> > be shown using rpm_usage even if included by other trace points.
> > And these changes has helped track down the e1000e runtime issue.
> >
> > Reviewed-by: Michał Mirosław <[email protected]>
> > Signed-off-by: Chen Yu <[email protected]>
> > ---
> > drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> > 1 file changed, 24 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> > index 85a248e196ca..5789d2624513 100644
> > --- a/drivers/base/power/runtime.c
> > +++ b/drivers/base/power/runtime.c
> > @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> > int retval;
> >
> > if (rpmflags & RPM_GET_PUT) {
> > - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> > - trace_rpm_usage_rcuidle(dev, rpmflags);
> > + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> > +
> > + trace_rpm_usage_rcuidle(dev, rpmflags);
>
> Why not just call trace everywhere before you do the atomic operations?
> Why does the trace need to be called after the operation everywhere?
>
If I understand correctly, besides Michal's comments, if we put the trace
before the atomic operation, we might be unable to judge whether the counter
is going to increase or decrease from rpmflags: it is RPM_GET_PUT which combine
the get() and put() together, then it is a little inconvenient for tracking IMO.
thanks,
Chenyu
> thanks,
>
> greg k-h
On Wed, Jul 15, 2020 at 09:27:28AM +0200, Michal Miroslaw wrote:
> On Wed, Jul 15, 2020 at 09:06:14AM +0200, Greg Kroah-Hartman wrote:
> > On Wed, Jul 15, 2020 at 02:28:03PM +0800, Chen Yu wrote:
> > > Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> > > has added some tracepoints to monitor the change of runtime usage, and
> > > there is something to improve:
> > > 1. There are some places that adjust the usage count not
> > > been traced yet. For example, pm_runtime_get_noresume() and
> > > pm_runtime_put_noidle()
> > > 2. The change of the usage count will not be tracked if decreased
> > > from 1 to 0.
> > >
> > > This patch intends to adjust the logic to be consistent with the
> > > change of usage_counter, that is to say, only after the counter has
> > > been possibly modified, we record it. Besides, all usage changes will
> > > be shown using rpm_usage even if included by other trace points.
> > > And these changes has helped track down the e1000e runtime issue.
> > >
> > > Reviewed-by: Michał Mirosław <[email protected]>
> > > Signed-off-by: Chen Yu <[email protected]>
> > > ---
> > > drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> > > 1 file changed, 24 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> > > index 85a248e196ca..5789d2624513 100644
> > > --- a/drivers/base/power/runtime.c
> > > +++ b/drivers/base/power/runtime.c
> > > @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> > > int retval;
> > >
> > > if (rpmflags & RPM_GET_PUT) {
> > > - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> > > - trace_rpm_usage_rcuidle(dev, rpmflags);
> > > + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> > > +
> > > + trace_rpm_usage_rcuidle(dev, rpmflags);
> >
> > Why not just call trace everywhere before you do the atomic operations?
> > Why does the trace need to be called after the operation everywhere?
>
> I would argue that this is easier mentally: We trace what state the
> device is in from now on (a "current state" for the time being) instead
> of tracing what it was before (an information that has just expired).
Is that really the case here and you look at that atomic value somehow
in the trace and need it?
thanks,
greg k-h
On Wed, Jul 15, 2020 at 04:18:38PM +0800, Chen Yu wrote:
> Hi Greg,
> thanks very much for taking a look,
> On Wed, Jul 15, 2020 at 09:06:14AM +0200, Greg Kroah-Hartman wrote:
> > On Wed, Jul 15, 2020 at 02:28:03PM +0800, Chen Yu wrote:
> > > Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> > > has added some tracepoints to monitor the change of runtime usage, and
> > > there is something to improve:
> > > 1. There are some places that adjust the usage count not
> > > been traced yet. For example, pm_runtime_get_noresume() and
> > > pm_runtime_put_noidle()
> > > 2. The change of the usage count will not be tracked if decreased
> > > from 1 to 0.
> > >
> > > This patch intends to adjust the logic to be consistent with the
> > > change of usage_counter, that is to say, only after the counter has
> > > been possibly modified, we record it. Besides, all usage changes will
> > > be shown using rpm_usage even if included by other trace points.
> > > And these changes has helped track down the e1000e runtime issue.
> > >
> > > Reviewed-by: Michał Mirosław <[email protected]>
> > > Signed-off-by: Chen Yu <[email protected]>
> > > ---
> > > drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> > > 1 file changed, 24 insertions(+), 14 deletions(-)
> > >
> > > diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> > > index 85a248e196ca..5789d2624513 100644
> > > --- a/drivers/base/power/runtime.c
> > > +++ b/drivers/base/power/runtime.c
> > > @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> > > int retval;
> > >
> > > if (rpmflags & RPM_GET_PUT) {
> > > - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> > > - trace_rpm_usage_rcuidle(dev, rpmflags);
> > > + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> > > +
> > > + trace_rpm_usage_rcuidle(dev, rpmflags);
> >
> > Why not just call trace everywhere before you do the atomic operations?
> > Why does the trace need to be called after the operation everywhere?
> >
> If I understand correctly, besides Michal's comments, if we put the trace
> before the atomic operation, we might be unable to judge whether the counter
> is going to increase or decrease from rpmflags: it is RPM_GET_PUT which combine
> the get() and put() together, then it is a little inconvenient for tracking IMO.
A trace can never know the exact value of an atomic value as it could
change right before or after the trace function is called, right?
So why are you caring about that? Care about the functionality that is
happening, not a reference count that you do not control at all.
thanks,
greg k-h
On Wed, Jul 15, 2020 at 8:26 AM Chen Yu <[email protected]> wrote:
>
> In order to track all the runtime usage count change, move the code
> related to runtime usage count change from pm_runtime.h to runtime.c,
> so that in runtime.c we can leverage trace event to do the tracking.
> Meanwhile export pm_runtime_get_noresume() and pm_runtime_put_noidle()
> so the module can use them.
>
> No functional changes intended.
>
> Signed-off-by: Chen Yu <[email protected]>
> ---
> drivers/base/power/runtime.c | 12 ++++++++++++
> include/linux/pm_runtime.h | 12 ++----------
> 2 files changed, 14 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 9f62790f644c..85a248e196ca 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -1738,6 +1738,18 @@ void pm_runtime_drop_link(struct device *dev)
> spin_unlock_irq(&dev->power.lock);
> }
>
> +void pm_runtime_get_noresume(struct device *dev)
> +{
> + atomic_inc(&dev->power.usage_count);
> +}
> +EXPORT_SYMBOL_GPL(pm_runtime_get_noresume);
> +
> +void pm_runtime_put_noidle(struct device *dev)
> +{
> + atomic_add_unless(&dev->power.usage_count, -1, 0);
> +}
> +EXPORT_SYMBOL_GPL(pm_runtime_put_noidle);
I honestly don't think that this is going in the right direction.
> +
> static bool pm_runtime_need_not_resume(struct device *dev)
> {
> return atomic_read(&dev->power.usage_count) <= 1 &&
> diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
> index 3dbc207bff53..26510fef2acd 100644
> --- a/include/linux/pm_runtime.h
> +++ b/include/linux/pm_runtime.h
> @@ -59,6 +59,8 @@ extern void pm_runtime_get_suppliers(struct device *dev);
> extern void pm_runtime_put_suppliers(struct device *dev);
> extern void pm_runtime_new_link(struct device *dev);
> extern void pm_runtime_drop_link(struct device *dev);
> +extern void pm_runtime_get_noresume(struct device *dev);
> +extern void pm_runtime_put_noidle(struct device *dev);
>
> static inline int pm_runtime_get_if_in_use(struct device *dev)
> {
> @@ -70,16 +72,6 @@ static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
> dev->power.ignore_children = enable;
> }
>
> -static inline void pm_runtime_get_noresume(struct device *dev)
> -{
> - atomic_inc(&dev->power.usage_count);
> -}
> -
> -static inline void pm_runtime_put_noidle(struct device *dev)
> -{
> - atomic_add_unless(&dev->power.usage_count, -1, 0);
> -}
> -
> static inline bool pm_runtime_suspended(struct device *dev)
> {
> return dev->power.runtime_status == RPM_SUSPENDED
> --
> 2.17.1
>
On Wed, Jul 15, 2020 at 8:26 AM Chen Yu <[email protected]> wrote:
>
> Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> has added some tracepoints to monitor the change of runtime usage, and
> there is something to improve:
> 1. There are some places that adjust the usage count not
> been traced yet. For example, pm_runtime_get_noresume() and
> pm_runtime_put_noidle()
> 2. The change of the usage count will not be tracked if decreased
> from 1 to 0.
>
> This patch intends to adjust the logic to be consistent with the
> change of usage_counter, that is to say, only after the counter has
> been possibly modified, we record it. Besides, all usage changes will
> be shown using rpm_usage even if included by other trace points.
> And these changes has helped track down the e1000e runtime issue.
>
> Reviewed-by: Michał Mirosław <[email protected]>
> Signed-off-by: Chen Yu <[email protected]>
> ---
> drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> 1 file changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> index 85a248e196ca..5789d2624513 100644
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> int retval;
>
> if (rpmflags & RPM_GET_PUT) {
> - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> - trace_rpm_usage_rcuidle(dev, rpmflags);
> + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> +
> + trace_rpm_usage_rcuidle(dev, rpmflags);
It looks like you could move the trace event before the atomic variable check.
The ordering between the two doesn't matter, because usage_count may
change between the check and the trace event anyway.
But then what is the trace event useful for in the first place?
> + if (non_zero)
> return 0;
> - }
> }
>
> might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
> @@ -1038,10 +1039,12 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
> int retval;
>
> if (rpmflags & RPM_GET_PUT) {
> - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> - trace_rpm_usage_rcuidle(dev, rpmflags);
> + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> +
> + trace_rpm_usage_rcuidle(dev, rpmflags);
And the same comments apply here.
> + if (non_zero)
> return 0;
> - }
> +
> }
>
> might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
> @@ -1073,8 +1076,10 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
> might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
> dev->power.runtime_status != RPM_ACTIVE);
>
> - if (rpmflags & RPM_GET_PUT)
> + if (rpmflags & RPM_GET_PUT) {
> atomic_inc(&dev->power.usage_count);
So the reason why things like that don't work is because the atomic
variable can change again between the inc and the trace event.
> + trace_rpm_usage_rcuidle(dev, rpmflags);
> + }
>
> spin_lock_irqsave(&dev->power.lock, flags);
> retval = rpm_resume(dev, rpmflags);
> @@ -1433,6 +1438,7 @@ void pm_runtime_forbid(struct device *dev)
>
> dev->power.runtime_auto = false;
> atomic_inc(&dev->power.usage_count);
Analogously here.
> + trace_rpm_usage_rcuidle(dev, 0);
> rpm_resume(dev, 0);
>
> out:
> @@ -1448,16 +1454,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
> */
> void pm_runtime_allow(struct device *dev)
> {
> + bool is_zero;
> +
> spin_lock_irq(&dev->power.lock);
> if (dev->power.runtime_auto)
> goto out;
>
> dev->power.runtime_auto = true;
> - if (atomic_dec_and_test(&dev->power.usage_count))
> + is_zero = atomic_dec_and_test(&dev->power.usage_count);
> + trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
> + if (is_zero)
> rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
> - else
> - trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
The change of ordering is pointless for the reasons outlined above.
And so on.
> -
> out:
> spin_unlock_irq(&dev->power.lock);
> }
> @@ -1523,9 +1530,8 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
> /* If it used to be allowed then prevent it. */
> if (!old_use || old_delay >= 0) {
> atomic_inc(&dev->power.usage_count);
> - rpm_resume(dev, 0);
> - } else {
> trace_rpm_usage_rcuidle(dev, 0);
> + rpm_resume(dev, 0);
> }
> }
>
> @@ -1533,8 +1539,10 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
> else {
>
> /* If it used to be prevented then allow it. */
> - if (old_use && old_delay < 0)
> + if (old_use && old_delay < 0) {
> atomic_dec(&dev->power.usage_count);
> + trace_rpm_usage_rcuidle(dev, 0);
> + }
>
> /* Maybe we can autosuspend now. */
> rpm_idle(dev, RPM_AUTO);
> @@ -1741,12 +1749,14 @@ void pm_runtime_drop_link(struct device *dev)
> void pm_runtime_get_noresume(struct device *dev)
> {
> atomic_inc(&dev->power.usage_count);
> + trace_rpm_usage_rcuidle(dev, 0);
> }
This actually kind of makes sense, as a matter of tracing the
pm_runtime_get_noresume() usage, but not as a matter of tracing the
atomic variable value.
> EXPORT_SYMBOL_GPL(pm_runtime_get_noresume);
>
> void pm_runtime_put_noidle(struct device *dev)
> {
> atomic_add_unless(&dev->power.usage_count, -1, 0);
> + trace_rpm_usage_rcuidle(dev, 0);
> }
> EXPORT_SYMBOL_GPL(pm_runtime_put_noidle);
>
> --
On Wed, Jul 15, 2020 at 10:33:22AM +0200, Greg Kroah-Hartman wrote:
> On Wed, Jul 15, 2020 at 04:18:38PM +0800, Chen Yu wrote:
> > Hi Greg,
> > thanks very much for taking a look,
> > On Wed, Jul 15, 2020 at 09:06:14AM +0200, Greg Kroah-Hartman wrote:
> > > On Wed, Jul 15, 2020 at 02:28:03PM +0800, Chen Yu wrote:
> > > > Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> > > > has added some tracepoints to monitor the change of runtime usage, and
> > > > there is something to improve:
> > > > 1. There are some places that adjust the usage count not
> > > > been traced yet. For example, pm_runtime_get_noresume() and
> > > > pm_runtime_put_noidle()
> > > > 2. The change of the usage count will not be tracked if decreased
> > > > from 1 to 0.
> > > >
> > > > This patch intends to adjust the logic to be consistent with the
> > > > change of usage_counter, that is to say, only after the counter has
> > > > been possibly modified, we record it. Besides, all usage changes will
> > > > be shown using rpm_usage even if included by other trace points.
> > > > And these changes has helped track down the e1000e runtime issue.
> > > >
> > > > Reviewed-by: Michał Mirosław <[email protected]>
> > > > Signed-off-by: Chen Yu <[email protected]>
> > > > ---
> > > > drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> > > > 1 file changed, 24 insertions(+), 14 deletions(-)
> > > >
> > > > diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> > > > index 85a248e196ca..5789d2624513 100644
> > > > --- a/drivers/base/power/runtime.c
> > > > +++ b/drivers/base/power/runtime.c
> > > > @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> > > > int retval;
> > > >
> > > > if (rpmflags & RPM_GET_PUT) {
> > > > - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> > > > - trace_rpm_usage_rcuidle(dev, rpmflags);
> > > > + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> > > > +
> > > > + trace_rpm_usage_rcuidle(dev, rpmflags);
> > >
> > > Why not just call trace everywhere before you do the atomic operations?
> > > Why does the trace need to be called after the operation everywhere?
> > >
> > If I understand correctly, besides Michal's comments, if we put the trace
> > before the atomic operation, we might be unable to judge whether the counter
> > is going to increase or decrease from rpmflags: it is RPM_GET_PUT which combine
> > the get() and put() together, then it is a little inconvenient for tracking IMO.
>
> A trace can never know the exact value of an atomic value as it could
> change right before or after the trace function is called, right?
>
> So why are you caring about that? Care about the functionality that is
> happening, not a reference count that you do not control at all.
>
Ah I see, thanks for the explanation, I'll re-think about the scenaio.
Thanks,
Chenyu
> thanks,
>
> greg k-h
On Wed, Jul 15, 2020 at 05:47:36PM +0200, Rafael J. Wysocki wrote:
> On Wed, Jul 15, 2020 at 8:26 AM Chen Yu <[email protected]> wrote:
> >
> > Commit d229290689ae ("PM-runtime: add tracepoints for usage_count changes")
> > has added some tracepoints to monitor the change of runtime usage, and
> > there is something to improve:
> > 1. There are some places that adjust the usage count not
> > been traced yet. For example, pm_runtime_get_noresume() and
> > pm_runtime_put_noidle()
> > 2. The change of the usage count will not be tracked if decreased
> > from 1 to 0.
> >
> > This patch intends to adjust the logic to be consistent with the
> > change of usage_counter, that is to say, only after the counter has
> > been possibly modified, we record it. Besides, all usage changes will
> > be shown using rpm_usage even if included by other trace points.
> > And these changes has helped track down the e1000e runtime issue.
> >
> > Reviewed-by: Michał Mirosław <[email protected]>
> > Signed-off-by: Chen Yu <[email protected]>
> > ---
> > drivers/base/power/runtime.c | 38 +++++++++++++++++++++++-------------
> > 1 file changed, 24 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
> > index 85a248e196ca..5789d2624513 100644
> > --- a/drivers/base/power/runtime.c
> > +++ b/drivers/base/power/runtime.c
> > @@ -1004,10 +1004,11 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
> > int retval;
> >
> > if (rpmflags & RPM_GET_PUT) {
> > - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> > - trace_rpm_usage_rcuidle(dev, rpmflags);
> > + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> > +
> > + trace_rpm_usage_rcuidle(dev, rpmflags);
>
> It looks like you could move the trace event before the atomic variable check.
>
> The ordering between the two doesn't matter, because usage_count may
> change between the check and the trace event anyway.
>
> But then what is the trace event useful for in the first place?
>
Thanks for explanation, I've changed my mind, it seems that we should not
trace the counter because we don't know who the operator is due to race condition.
> > + if (non_zero)
> > return 0;
> > - }
> > }
> >
> > might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
> > @@ -1038,10 +1039,12 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
> > int retval;
> >
> > if (rpmflags & RPM_GET_PUT) {
> > - if (!atomic_dec_and_test(&dev->power.usage_count)) {
> > - trace_rpm_usage_rcuidle(dev, rpmflags);
> > + bool non_zero = !atomic_dec_and_test(&dev->power.usage_count);
> > +
> > + trace_rpm_usage_rcuidle(dev, rpmflags);
>
> And the same comments apply here.
>
> > + if (non_zero)
> > return 0;
> > - }
> > +
> > }
> >
> > might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
> > @@ -1073,8 +1076,10 @@ int __pm_runtime_resume(struct device *dev, int rpmflags)
> > might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe &&
> > dev->power.runtime_status != RPM_ACTIVE);
> >
> > - if (rpmflags & RPM_GET_PUT)
> > + if (rpmflags & RPM_GET_PUT) {
> > atomic_inc(&dev->power.usage_count);
>
> So the reason why things like that don't work is because the atomic
> variable can change again between the inc and the trace event.
>
Got it.
> > + trace_rpm_usage_rcuidle(dev, rpmflags);
> > + }
> >
> > spin_lock_irqsave(&dev->power.lock, flags);
> > retval = rpm_resume(dev, rpmflags);
> > @@ -1433,6 +1438,7 @@ void pm_runtime_forbid(struct device *dev)
> >
> > dev->power.runtime_auto = false;
> > atomic_inc(&dev->power.usage_count);
>
> Analogously here.
>
> > + trace_rpm_usage_rcuidle(dev, 0);
> > rpm_resume(dev, 0);
> >
> > out:
> > @@ -1448,16 +1454,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
> > */
> > void pm_runtime_allow(struct device *dev)
> > {
> > + bool is_zero;
> > +
> > spin_lock_irq(&dev->power.lock);
> > if (dev->power.runtime_auto)
> > goto out;
> >
> > dev->power.runtime_auto = true;
> > - if (atomic_dec_and_test(&dev->power.usage_count))
> > + is_zero = atomic_dec_and_test(&dev->power.usage_count);
> > + trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
> > + if (is_zero)
> > rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
> > - else
> > - trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
>
> The change of ordering is pointless for the reasons outlined above.
>
> And so on.
>
> > -
> > out:
> > spin_unlock_irq(&dev->power.lock);
> > }
> > @@ -1523,9 +1530,8 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
> > /* If it used to be allowed then prevent it. */
> > if (!old_use || old_delay >= 0) {
> > atomic_inc(&dev->power.usage_count);
> > - rpm_resume(dev, 0);
> > - } else {
> > trace_rpm_usage_rcuidle(dev, 0);
> > + rpm_resume(dev, 0);
> > }
> > }
> >
> > @@ -1533,8 +1539,10 @@ static void update_autosuspend(struct device *dev, int old_delay, int old_use)
> > else {
> >
> > /* If it used to be prevented then allow it. */
> > - if (old_use && old_delay < 0)
> > + if (old_use && old_delay < 0) {
> > atomic_dec(&dev->power.usage_count);
> > + trace_rpm_usage_rcuidle(dev, 0);
> > + }
> >
> > /* Maybe we can autosuspend now. */
> > rpm_idle(dev, RPM_AUTO);
> > @@ -1741,12 +1749,14 @@ void pm_runtime_drop_link(struct device *dev)
> > void pm_runtime_get_noresume(struct device *dev)
> > {
> > atomic_inc(&dev->power.usage_count);
> > + trace_rpm_usage_rcuidle(dev, 0);
> > }
>
> This actually kind of makes sense, as a matter of tracing the
> pm_runtime_get_noresume() usage, but not as a matter of tracing the
> atomic variable value.
>
Okay, I'll re-iterate the code and re-think about it on how to
track the runtime process more robustly.
Thanks,
Chenyu
> > EXPORT_SYMBOL_GPL(pm_runtime_get_noresume);
> >
> > void pm_runtime_put_noidle(struct device *dev)
> > {
> > atomic_add_unless(&dev->power.usage_count, -1, 0);
> > + trace_rpm_usage_rcuidle(dev, 0);
> > }
> > EXPORT_SYMBOL_GPL(pm_runtime_put_noidle);
> >
> > --