Hi Zhang, Eduardo
Cc: Dung, Hiep
rcar_thermal doesn't work after unbind/re-bind today.
These patches are cosmetic, and tidyup it
Kuninori Morimoto (2):
thermal: rcar_thermal: remove redundant operation
thermal: rcar_thermal: use pm_runtime_put_sync()
drivers/thermal/rcar_thermal.c | 49 +++++++++++++++++++++----------------------------
1 file changed, 21 insertions(+), 28 deletions(-)
Best regards
---
Kuninori Morimoto
From: Kuninori Morimoto <[email protected]>
Probe error operation and remove operation are same.
Let's use same function.
Signed-off-by: Kuninori Morimoto <[email protected]>
---
drivers/thermal/rcar_thermal.c | 49 ++++++++++++++++++------------------------
1 file changed, 21 insertions(+), 28 deletions(-)
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 5d4ae7d..13d01ed 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -361,6 +361,24 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data)
/*
* platform functions
*/
+static int rcar_thermal_remove(struct platform_device *pdev)
+{
+ struct rcar_thermal_common *common = platform_get_drvdata(pdev);
+ struct device *dev = &pdev->dev;
+ struct rcar_thermal_priv *priv;
+
+ rcar_thermal_for_each_priv(priv, common) {
+ if (rcar_has_irq_support(priv))
+ rcar_thermal_irq_disable(priv);
+ thermal_zone_device_unregister(priv->zone);
+ }
+
+ pm_runtime_put(dev);
+ pm_runtime_disable(dev);
+
+ return 0;
+}
+
static int rcar_thermal_probe(struct platform_device *pdev)
{
struct rcar_thermal_common *common;
@@ -377,6 +395,8 @@ static int rcar_thermal_probe(struct platform_device *pdev)
if (!common)
return -ENOMEM;
+ platform_set_drvdata(pdev, common);
+
INIT_LIST_HEAD(&common->head);
spin_lock_init(&common->lock);
common->dev = dev;
@@ -454,43 +474,16 @@ static int rcar_thermal_probe(struct platform_device *pdev)
rcar_thermal_common_write(common, ENR, enr_bits);
}
- platform_set_drvdata(pdev, common);
-
dev_info(dev, "%d sensor probed\n", i);
return 0;
error_unregister:
- rcar_thermal_for_each_priv(priv, common) {
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_disable(priv);
- thermal_zone_device_unregister(priv->zone);
- }
-
- pm_runtime_put(dev);
- pm_runtime_disable(dev);
+ rcar_thermal_remove(pdev);
return ret;
}
-static int rcar_thermal_remove(struct platform_device *pdev)
-{
- struct rcar_thermal_common *common = platform_get_drvdata(pdev);
- struct device *dev = &pdev->dev;
- struct rcar_thermal_priv *priv;
-
- rcar_thermal_for_each_priv(priv, common) {
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_disable(priv);
- thermal_zone_device_unregister(priv->zone);
- }
-
- pm_runtime_put(dev);
- pm_runtime_disable(dev);
-
- return 0;
-}
-
static const struct of_device_id rcar_thermal_dt_ids[] = {
{ .compatible = "renesas,rcar-thermal", },
{},
--
1.9.1
From: Kuninori Morimoto <[email protected]>
It is using pm_runtime_get_sync() on probe(). Let's use
pm_runtime_put_sync() instead of pm_runtime_put(). Otherwise thermal
sensor doesn't work after unbind/re-bind
Signed-off-by: Kuninori Morimoto <[email protected]>
---
drivers/thermal/rcar_thermal.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 13d01ed..f7cf2d7 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -373,7 +373,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
thermal_zone_device_unregister(priv->zone);
}
- pm_runtime_put(dev);
+ pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
return 0;
--
1.9.1
Hi
Actually, these issue are reported by Dung Hiep,
but I didn't add their name on these patches.
Thus, Dung, Hiep, can you please send Tested-by for these ?
> rcar_thermal doesn't work after unbind/re-bind today.
> These patches are cosmetic, and tidyup it
>
> Kuninori Morimoto (2):
> thermal: rcar_thermal: remove redundant operation
> thermal: rcar_thermal: use pm_runtime_put_sync()
>
> drivers/thermal/rcar_thermal.c | 49 +++++++++++++++++++++----------------------------
> 1 file changed, 21 insertions(+), 28 deletions(-)
>
>
> Best regards
> ---
> Kuninori Morimoto
Dear Mr Morimoto
we go to test rcar_thermal funstion with this patchs.
If result is good I will send Tested-by nv-dung
best regard
Nguyen Viet Dung
On 2015年11月10日 11:18, Kuninori Morimoto wrote:
> Hi
>
> Actually, these issue are reported by Dung Hiep,
> but I didn't add their name on these patches.
> Thus, Dung, Hiep, can you please send Tested-by for these ?
>
>> rcar_thermal doesn't work after unbind/re-bind today.
>> These patches are cosmetic, and tidyup it
>>
>> Kuninori Morimoto (2):
>> thermal: rcar_thermal: remove redundant operation
>> thermal: rcar_thermal: use pm_runtime_put_sync()
>>
>> drivers/thermal/rcar_thermal.c | 49 +++++++++++++++++++++----------------------------
>> 1 file changed, 21 insertions(+), 28 deletions(-)
>>
>>
>> Best regards
>> ---
>> Kuninori Morimoto
Hi,
I have tested rcar_thermal funstion on stable linux v4.3.
It is good with this patchs.
Tested-by: Nguyen Viet Dung <[email protected]>
Best regards
Nguyen Viet Dung
On 2015年11月10日 11:11, Kuninori Morimoto wrote:
> Hi Zhang, Eduardo
> Cc: Dung, Hiep
>
> rcar_thermal doesn't work after unbind/re-bind today.
> These patches are cosmetic, and tidyup it
>
> Kuninori Morimoto (2):
> thermal: rcar_thermal: remove redundant operation
> thermal: rcar_thermal: use pm_runtime_put_sync()
>
> drivers/thermal/rcar_thermal.c | 49 +++++++++++++++++++++----------------------------
> 1 file changed, 21 insertions(+), 28 deletions(-)
>
>
> Best regards
> ---
> Kuninori Morimoto
>
Hi Morimoto-san, Ulf,
On Tue, Nov 10, 2015 at 3:12 AM, Kuninori Morimoto
<[email protected]> wrote:
> From: Kuninori Morimoto <[email protected]>
>
> It is using pm_runtime_get_sync() on probe(). Let's use
> pm_runtime_put_sync() instead of pm_runtime_put(). Otherwise thermal
> sensor doesn't work after unbind/re-bind
>
> Signed-off-by: Kuninori Morimoto <[email protected]>
> ---
> drivers/thermal/rcar_thermal.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
> index 13d01ed..f7cf2d7 100644
> --- a/drivers/thermal/rcar_thermal.c
> +++ b/drivers/thermal/rcar_thermal.c
> @@ -373,7 +373,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
> thermal_zone_device_unregister(priv->zone);
> }
>
> - pm_runtime_put(dev);
> + pm_runtime_put_sync(dev);
> pm_runtime_disable(dev);
>
> return 0;
While I can confirm this fixes the issue, I think this is a bug in the PM
core, and thus your patch is merely a workaround.
Morimoto-san: I assume this is a recent regression. Have you tried to bisect?
With a bit more debugging info, this is the difference between the failing
and the "fixed" cases:
unbind:
+rcar_thermal e61f0000.thermal: pm_clk_suspend()
+renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
rcar_thermal e61f0000.thermal: removing from PM domain clock-controller
pm_genpd_remove_device: Remove e61f0000.thermal from clock-controller
-renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
bind:
rcar_thermal e61f0000.thermal: adding to PM domain clock-controller
__pm_genpd_add_device: Add e61f0000.thermal to clock-controller
rcar_thermal e61f0000.thermal: Clock thermal con_id (null) managed by
runtime PM.
-rcar_thermal e61f0000.thermal: thermal sensor was broken
+rcar_thermal e61f0000.thermal: pm_clk_resume()
+renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal ON
rcar_thermal e61f0000.thermal: 1 sensor probed
In the failing case, pm_clk_suspend() is not called, and turning off the
module clock is thus delayed until removal of the device from the clock
domain.
But as pm_clk_suspend() wasn't called, the device isn't correctly resumed on
rebind, and the module clock is never re-enabled, leading to a failure.
Ulf, what do you think?
Thanks!
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
On 10 November 2015 at 09:18, Geert Uytterhoeven <[email protected]> wrote:
> Hi Morimoto-san, Ulf,
>
> On Tue, Nov 10, 2015 at 3:12 AM, Kuninori Morimoto
> <[email protected]> wrote:
>> From: Kuninori Morimoto <[email protected]>
>>
>> It is using pm_runtime_get_sync() on probe(). Let's use
>> pm_runtime_put_sync() instead of pm_runtime_put(). Otherwise thermal
>> sensor doesn't work after unbind/re-bind
>>
>> Signed-off-by: Kuninori Morimoto <[email protected]>
>> ---
>> drivers/thermal/rcar_thermal.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
>> index 13d01ed..f7cf2d7 100644
>> --- a/drivers/thermal/rcar_thermal.c
>> +++ b/drivers/thermal/rcar_thermal.c
>> @@ -373,7 +373,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
>> thermal_zone_device_unregister(priv->zone);
>> }
>>
>> - pm_runtime_put(dev);
>> + pm_runtime_put_sync(dev);
>> pm_runtime_disable(dev);
For the reasons explained by Geert, this is to me also a "workaround".
I would replace pm_runtime_put() and pm_runtime_disable() with a call
to pm_runtime_force_suspend().
In that way, you will make sure you device get runtime suspended
(clock domain will gate the clock). Additionally, the runtime PM
status will properly reflect the status of the device.
>>
>> return 0;
>
> While I can confirm this fixes the issue, I think this is a bug in the PM
> core, and thus your patch is merely a workaround.
>
> Morimoto-san: I assume this is a recent regression. Have you tried to bisect?
>
> With a bit more debugging info, this is the difference between the failing
> and the "fixed" cases:
>
> unbind:
>
> +rcar_thermal e61f0000.thermal: pm_clk_suspend()
> +renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
> rcar_thermal e61f0000.thermal: removing from PM domain clock-controller
> pm_genpd_remove_device: Remove e61f0000.thermal from clock-controller
> -renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
>
> bind:
>
> rcar_thermal e61f0000.thermal: adding to PM domain clock-controller
> __pm_genpd_add_device: Add e61f0000.thermal to clock-controller
> rcar_thermal e61f0000.thermal: Clock thermal con_id (null) managed by
> runtime PM.
> -rcar_thermal e61f0000.thermal: thermal sensor was broken
> +rcar_thermal e61f0000.thermal: pm_clk_resume()
> +renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal ON
> rcar_thermal e61f0000.thermal: 1 sensor probed
>
> In the failing case, pm_clk_suspend() is not called, and turning off the
> module clock is thus delayed until removal of the device from the clock
> domain.
> But as pm_clk_suspend() wasn't called, the device isn't correctly resumed on
> rebind, and the module clock is never re-enabled, leading to a failure.
>
> Ulf, what do you think?
I totally agree on your analyse.
The problem is that the runtime PM status of the device isn't
correctly updated at ->remove(). The effect is that the the
pm_runtime_get_sync() in ->probe() at re-bind will *not* trigger the
->runtime_resume() callbacks to be invoked, as the runtime PM core
believes the device is already runtime resumed.
Kind regards
Uffe
Hi Ulf,
On Tue, Nov 10, 2015 at 10:57 AM, Ulf Hansson <[email protected]> wrote:
> On 10 November 2015 at 09:18, Geert Uytterhoeven <[email protected]> wrote:
>> On Tue, Nov 10, 2015 at 3:12 AM, Kuninori Morimoto
>> <[email protected]> wrote:
>>> From: Kuninori Morimoto <[email protected]>
>>>
>>> It is using pm_runtime_get_sync() on probe(). Let's use
>>> pm_runtime_put_sync() instead of pm_runtime_put(). Otherwise thermal
>>> sensor doesn't work after unbind/re-bind
>>>
>>> Signed-off-by: Kuninori Morimoto <[email protected]>
>>> ---
>>> drivers/thermal/rcar_thermal.c | 2 +-
>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
>>> index 13d01ed..f7cf2d7 100644
>>> --- a/drivers/thermal/rcar_thermal.c
>>> +++ b/drivers/thermal/rcar_thermal.c
>>> @@ -373,7 +373,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
>>> thermal_zone_device_unregister(priv->zone);
>>> }
>>>
>>> - pm_runtime_put(dev);
>>> + pm_runtime_put_sync(dev);
>>> pm_runtime_disable(dev);
>
> For the reasons explained by Geert, this is to me also a "workaround".
>
> I would replace pm_runtime_put() and pm_runtime_disable() with a call
> to pm_runtime_force_suspend().
>
> In that way, you will make sure you device get runtime suspended
> (clock domain will gate the clock). Additionally, the runtime PM
> status will properly reflect the status of the device.
That still sounds like a workaround to me, which we have to apply to all
drivers relying on Runtime PM?
>> With a bit more debugging info, this is the difference between the failing
>> and the "fixed" cases:
>>
>> unbind:
>>
>> +rcar_thermal e61f0000.thermal: pm_clk_suspend()
>> +renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
>> rcar_thermal e61f0000.thermal: removing from PM domain clock-controller
>> pm_genpd_remove_device: Remove e61f0000.thermal from clock-controller
>> -renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
>>
>> bind:
>>
>> rcar_thermal e61f0000.thermal: adding to PM domain clock-controller
>> __pm_genpd_add_device: Add e61f0000.thermal to clock-controller
>> rcar_thermal e61f0000.thermal: Clock thermal con_id (null) managed by
>> runtime PM.
>> -rcar_thermal e61f0000.thermal: thermal sensor was broken
>> +rcar_thermal e61f0000.thermal: pm_clk_resume()
>> +renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal ON
>> rcar_thermal e61f0000.thermal: 1 sensor probed
>>
>> In the failing case, pm_clk_suspend() is not called, and turning off the
>> module clock is thus delayed until removal of the device from the clock
>> domain.
>> But as pm_clk_suspend() wasn't called, the device isn't correctly resumed on
>> rebind, and the module clock is never re-enabled, leading to a failure.
>>
>> Ulf, what do you think?
>
> I totally agree on your analyse.
>
> The problem is that the runtime PM status of the device isn't
> correctly updated at ->remove(). The effect is that the the
> pm_runtime_get_sync() in ->probe() at re-bind will *not* trigger the
> ->runtime_resume() callbacks to be invoked, as the runtime PM core
> believes the device is already runtime resumed.
So that's where it should be fixed?
Gr{oetje,eeting}s,
Geert
--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [email protected]
In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds
+Rafael, Alan
On 10 November 2015 at 11:10, Geert Uytterhoeven <[email protected]> wrote:
> Hi Ulf,
>
> On Tue, Nov 10, 2015 at 10:57 AM, Ulf Hansson <[email protected]> wrote:
>> On 10 November 2015 at 09:18, Geert Uytterhoeven <[email protected]> wrote:
>>> On Tue, Nov 10, 2015 at 3:12 AM, Kuninori Morimoto
>>> <[email protected]> wrote:
>>>> From: Kuninori Morimoto <[email protected]>
>>>>
>>>> It is using pm_runtime_get_sync() on probe(). Let's use
>>>> pm_runtime_put_sync() instead of pm_runtime_put(). Otherwise thermal
>>>> sensor doesn't work after unbind/re-bind
>>>>
>>>> Signed-off-by: Kuninori Morimoto <[email protected]>
>>>> ---
>>>> drivers/thermal/rcar_thermal.c | 2 +-
>>>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
>>>> index 13d01ed..f7cf2d7 100644
>>>> --- a/drivers/thermal/rcar_thermal.c
>>>> +++ b/drivers/thermal/rcar_thermal.c
>>>> @@ -373,7 +373,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
>>>> thermal_zone_device_unregister(priv->zone);
>>>> }
>>>>
>>>> - pm_runtime_put(dev);
>>>> + pm_runtime_put_sync(dev);
>>>> pm_runtime_disable(dev);
>>
>> For the reasons explained by Geert, this is to me also a "workaround".
>>
>> I would replace pm_runtime_put() and pm_runtime_disable() with a call
>> to pm_runtime_force_suspend().
>>
>> In that way, you will make sure you device get runtime suspended
>> (clock domain will gate the clock). Additionally, the runtime PM
>> status will properly reflect the status of the device.
>
> That still sounds like a workaround to me, which we have to apply to all
> drivers relying on Runtime PM?
Definitely not all drivers, but those that runs pm_runtime_get_sync()
during ->probe() and expects the ->runtime_resume() callback to always
be invoked because of that. I guess we need to check upon which
drivers that may suffer from this.
I wouldn't be surprised if at least a subset of those cases we find,
are poorly designed from PM point of view and won't even probe
successfully unless CONFIG_PM is set. Whatever that means...
>
>>> With a bit more debugging info, this is the difference between the failing
>>> and the "fixed" cases:
>>>
>>> unbind:
>>>
>>> +rcar_thermal e61f0000.thermal: pm_clk_suspend()
>>> +renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
>>> rcar_thermal e61f0000.thermal: removing from PM domain clock-controller
>>> pm_genpd_remove_device: Remove e61f0000.thermal from clock-controller
>>> -renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal OFF
>>>
>>> bind:
>>>
>>> rcar_thermal e61f0000.thermal: adding to PM domain clock-controller
>>> __pm_genpd_add_device: Add e61f0000.thermal to clock-controller
>>> rcar_thermal e61f0000.thermal: Clock thermal con_id (null) managed by
>>> runtime PM.
>>> -rcar_thermal e61f0000.thermal: thermal sensor was broken
>>> +rcar_thermal e61f0000.thermal: pm_clk_resume()
>>> +renesas-cpg-mssr e6150000.clock-controller: MSTP 522/thermal ON
>>> rcar_thermal e61f0000.thermal: 1 sensor probed
>>>
>>> In the failing case, pm_clk_suspend() is not called, and turning off the
>>> module clock is thus delayed until removal of the device from the clock
>>> domain.
>>> But as pm_clk_suspend() wasn't called, the device isn't correctly resumed on
>>> rebind, and the module clock is never re-enabled, leading to a failure.
>>>
>>> Ulf, what do you think?
>>
>> I totally agree on your analyse.
>>
>> The problem is that the runtime PM status of the device isn't
>> correctly updated at ->remove(). The effect is that the the
>> pm_runtime_get_sync() in ->probe() at re-bind will *not* trigger the
>> ->runtime_resume() callbacks to be invoked, as the runtime PM core
>> believes the device is already runtime resumed.
>
> So that's where it should be fixed?
That would be a more generic approach, although I am not sure how the
driver/PM core should be able to take the correct decision in this
phase. Devices may be runtime PM managed also without a driver bound.
Perhaps when __device_release_driver() finds a bounded driver for the
device, it could after all actions been performed to unbind the
driver, check if runtime PM is enabled. If it isn't, it could set the
runtime PM status to suspended!?
I have no idea if that would introduce other issues as it would kind
of force the runtime PM status of the device to suspend, without
actually knowing if it's the correct thing to do.
Kind regards
Uffe
Hi,
On Tue, Nov 10, 2015 at 02:00:38PM +0100, Ulf Hansson wrote:
> +Rafael, Alan
>
> On 10 November 2015 at 11:10, Geert Uytterhoeven <[email protected]> wrote:
> > Hi Ulf,
> >
> > On Tue, Nov 10, 2015 at 10:57 AM, Ulf Hansson <[email protected]> wrote:
> >> On 10 November 2015 at 09:18, Geert Uytterhoeven <[email protected]> wrote:
> >>> On Tue, Nov 10, 2015 at 3:12 AM, Kuninori Morimoto
> >>> <[email protected]> wrote:
> >>>> From: Kuninori Morimoto <[email protected]>
> >>>>
> >>>> It is using pm_runtime_get_sync() on probe(). Let's use
> >>>> pm_runtime_put_sync() instead of pm_runtime_put(). Otherwise thermal
> >>>> sensor doesn't work after unbind/re-bind
> >>>>
> >>>> Signed-off-by: Kuninori Morimoto <[email protected]>
> >>>> ---
> >>>> drivers/thermal/rcar_thermal.c | 2 +-
> >>>> 1 file changed, 1 insertion(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
> >>>> index 13d01ed..f7cf2d7 100644
> >>>> --- a/drivers/thermal/rcar_thermal.c
> >>>> +++ b/drivers/thermal/rcar_thermal.c
> >>>> @@ -373,7 +373,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
> >>>> thermal_zone_device_unregister(priv->zone);
> >>>> }
> >>>>
> >>>> - pm_runtime_put(dev);
> >>>> + pm_runtime_put_sync(dev);
> >>>> pm_runtime_disable(dev);
> >>
> >> For the reasons explained by Geert, this is to me also a "workaround".
> >>
> >> I would replace pm_runtime_put() and pm_runtime_disable() with a call
> >> to pm_runtime_force_suspend().
> >>
> >> In that way, you will make sure you device get runtime suspended
> >> (clock domain will gate the clock). Additionally, the runtime PM
> >> status will properly reflect the status of the device.
> >
> > That still sounds like a workaround to me, which we have to apply to all
> > drivers relying on Runtime PM?
>
> Definitely not all drivers, but those that runs pm_runtime_get_sync()
> during ->probe() and expects the ->runtime_resume() callback to always
> be invoked because of that. I guess we need to check upon which
> drivers that may suffer from this.
>
> I wouldn't be surprised if at least a subset of those cases we find,
> are poorly designed from PM point of view and won't even probe
> successfully unless CONFIG_PM is set. Whatever that means...
Yeah, if it is the case this is a bug in runtime pm core, I would prefer
this to be properly fixed, and not only this driver benefits of it.
Rafael? Any thoughts?
BR,
Eduardo Valentin
On Tuesday, November 10, 2015 02:00:38 PM Ulf Hansson wrote:
> +Rafael, Alan
>
> On 10 November 2015 at 11:10, Geert Uytterhoeven <[email protected]> wrote:
> > Hi Ulf,
> >
[cut]
> >>
> >> The problem is that the runtime PM status of the device isn't
> >> correctly updated at ->remove(). The effect is that the the
> >> pm_runtime_get_sync() in ->probe() at re-bind will *not* trigger the
> >> ->runtime_resume() callbacks to be invoked, as the runtime PM core
> >> believes the device is already runtime resumed.
> >
> > So that's where it should be fixed?
>
> That would be a more generic approach, although I am not sure how the
> driver/PM core should be able to take the correct decision in this
> phase. Devices may be runtime PM managed also without a driver bound.
>
> Perhaps when __device_release_driver() finds a bounded driver for the
> device, it could after all actions been performed to unbind the
> driver, check if runtime PM is enabled. If it isn't, it could set the
> runtime PM status to suspended!?
>
> I have no idea if that would introduce other issues as it would kind
> of force the runtime PM status of the device to suspend, without
> actually knowing if it's the correct thing to do.
IMO, that needs to depend on the bus type. If the bus type has a way
to manage PM for devices without drivers, it should be allowed to do so.
Of course, the platform bus type is somewhat special in that respect,
but it looks like we simply need some sort of a convention in there too
(the expectations should be the same for everybody).
Thanks,
Rafael
On Tuesday, November 10, 2015 10:30:51 AM Eduardo Valentin wrote:
> Hi,
>
> On Tue, Nov 10, 2015 at 02:00:38PM +0100, Ulf Hansson wrote:
> > +Rafael, Alan
> >
> > On 10 November 2015 at 11:10, Geert Uytterhoeven <[email protected]> wrote:
> > > Hi Ulf,
> > >
> > > On Tue, Nov 10, 2015 at 10:57 AM, Ulf Hansson <[email protected]> wrote:
> > >> On 10 November 2015 at 09:18, Geert Uytterhoeven <[email protected]> wrote:
> > >>> On Tue, Nov 10, 2015 at 3:12 AM, Kuninori Morimoto
> > >>> <[email protected]> wrote:
> > >>>> From: Kuninori Morimoto <[email protected]>
> > >>>>
> > >>>> It is using pm_runtime_get_sync() on probe(). Let's use
> > >>>> pm_runtime_put_sync() instead of pm_runtime_put(). Otherwise thermal
> > >>>> sensor doesn't work after unbind/re-bind
> > >>>>
> > >>>> Signed-off-by: Kuninori Morimoto <[email protected]>
> > >>>> ---
> > >>>> drivers/thermal/rcar_thermal.c | 2 +-
> > >>>> 1 file changed, 1 insertion(+), 1 deletion(-)
> > >>>>
> > >>>> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
> > >>>> index 13d01ed..f7cf2d7 100644
> > >>>> --- a/drivers/thermal/rcar_thermal.c
> > >>>> +++ b/drivers/thermal/rcar_thermal.c
> > >>>> @@ -373,7 +373,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
> > >>>> thermal_zone_device_unregister(priv->zone);
> > >>>> }
> > >>>>
> > >>>> - pm_runtime_put(dev);
> > >>>> + pm_runtime_put_sync(dev);
> > >>>> pm_runtime_disable(dev);
> > >>
> > >> For the reasons explained by Geert, this is to me also a "workaround".
> > >>
> > >> I would replace pm_runtime_put() and pm_runtime_disable() with a call
> > >> to pm_runtime_force_suspend().
> > >>
> > >> In that way, you will make sure you device get runtime suspended
> > >> (clock domain will gate the clock). Additionally, the runtime PM
> > >> status will properly reflect the status of the device.
> > >
> > > That still sounds like a workaround to me, which we have to apply to all
> > > drivers relying on Runtime PM?
> >
> > Definitely not all drivers, but those that runs pm_runtime_get_sync()
> > during ->probe() and expects the ->runtime_resume() callback to always
> > be invoked because of that. I guess we need to check upon which
> > drivers that may suffer from this.
Generally, calling pm_runtime_get_sync() in ->probe() and expecting the
driver's ->runtime_resume() to be always be invoked is a mistake. I know
nothing about any guarantees that this will always happen.
If you want your ->runtime_resume() to be invoked no matter what, you really
need to figure out what the current state of things is, change it to your
expectations with runtime PM disabled and enable runtime PM after that.
Still, that also needs to be done with care as the bus type/PM domain may be
affected by it.
> >
> > I wouldn't be surprised if at least a subset of those cases we find,
> > are poorly designed from PM point of view and won't even probe
> > successfully unless CONFIG_PM is set. Whatever that means...
>
>
> Yeah, if it is the case this is a bug in runtime pm core, I would prefer
> this to be properly fixed, and not only this driver benefits of it.
>
> Rafael? Any thoughts?
First off, it's not a bug in the runtime PM core, as that code is agnostic
to what should or should not happen to devices during ->probe, ->remove etc.
Second, as I said above (and elsewhere), the driver is just a piece of the
puzzle in many cases.
Thanks,
Rafael
Hi Geert
> > - pm_runtime_put(dev);
> > + pm_runtime_put_sync(dev);
> > pm_runtime_disable(dev);
> >
> > return 0;
>
> While I can confirm this fixes the issue, I think this is a bug in the PM
> core, and thus your patch is merely a workaround.
>
> Morimoto-san: I assume this is a recent regression. Have you tried to bisect?
I thought that this is driver side issue, but I noticed that it was working before.
I tried bisect, and found that this patch breaks bind/unbind
cbc41d0a761bffb3166a413a3c77100a737c0cd7
("drivers: sh: Disable PM runtime for multi-platform ARM with genpd")
Best regards
---
Kuninori Morimoto
On 11 November 2015 at 00:57, Rafael J. Wysocki <[email protected]> wrote:
> On Tuesday, November 10, 2015 02:00:38 PM Ulf Hansson wrote:
>> +Rafael, Alan
>>
>> On 10 November 2015 at 11:10, Geert Uytterhoeven <[email protected]> wrote:
>> > Hi Ulf,
>> >
>
> [cut]
>
>> >>
>> >> The problem is that the runtime PM status of the device isn't
>> >> correctly updated at ->remove(). The effect is that the the
>> >> pm_runtime_get_sync() in ->probe() at re-bind will *not* trigger the
>> >> ->runtime_resume() callbacks to be invoked, as the runtime PM core
>> >> believes the device is already runtime resumed.
>> >
>> > So that's where it should be fixed?
>>
>> That would be a more generic approach, although I am not sure how the
>> driver/PM core should be able to take the correct decision in this
>> phase. Devices may be runtime PM managed also without a driver bound.
>>
>> Perhaps when __device_release_driver() finds a bounded driver for the
>> device, it could after all actions been performed to unbind the
>> driver, check if runtime PM is enabled. If it isn't, it could set the
>> runtime PM status to suspended!?
>>
>> I have no idea if that would introduce other issues as it would kind
>> of force the runtime PM status of the device to suspend, without
>> actually knowing if it's the correct thing to do.
>
> IMO, that needs to depend on the bus type. If the bus type has a way
> to manage PM for devices without drivers, it should be allowed to do so.
By following my suggestion above, we would allow the bus/driver's
->remove() to manage whether runtime PM should be enabled/disabled for
the device, before __device_release_driver() checks that.
Don't you think that the driver core could rely on that information?
I realize that it would be a kind of policy decision for runtime PM,
but it's quite similar as when register/unregister devices when we set
the runtime PM status to suspended.
If you don't think this is a good idea, I guess we need to deal with
this from subsystem level code somehow instead.
>
> Of course, the platform bus type is somewhat special in that respect,
> but it looks like we simply need some sort of a convention in there too
> (the expectations should be the same for everybody).
>
> Thanks,
> Rafael
>
Kind regards
Uffe
On Wednesday, November 11, 2015 12:03:52 PM Ulf Hansson wrote:
> On 11 November 2015 at 00:57, Rafael J. Wysocki <[email protected]> wrote:
> > On Tuesday, November 10, 2015 02:00:38 PM Ulf Hansson wrote:
> >> +Rafael, Alan
> >>
> >> On 10 November 2015 at 11:10, Geert Uytterhoeven <[email protected]> wrote:
> >> > Hi Ulf,
> >> >
> >
> > [cut]
> >
> >> >>
> >> >> The problem is that the runtime PM status of the device isn't
> >> >> correctly updated at ->remove(). The effect is that the the
> >> >> pm_runtime_get_sync() in ->probe() at re-bind will *not* trigger the
> >> >> ->runtime_resume() callbacks to be invoked, as the runtime PM core
> >> >> believes the device is already runtime resumed.
> >> >
> >> > So that's where it should be fixed?
> >>
> >> That would be a more generic approach, although I am not sure how the
> >> driver/PM core should be able to take the correct decision in this
> >> phase. Devices may be runtime PM managed also without a driver bound.
> >>
> >> Perhaps when __device_release_driver() finds a bounded driver for the
> >> device, it could after all actions been performed to unbind the
> >> driver, check if runtime PM is enabled. If it isn't, it could set the
> >> runtime PM status to suspended!?
> >>
> >> I have no idea if that would introduce other issues as it would kind
> >> of force the runtime PM status of the device to suspend, without
> >> actually knowing if it's the correct thing to do.
> >
> > IMO, that needs to depend on the bus type. If the bus type has a way
> > to manage PM for devices without drivers, it should be allowed to do so.
>
> By following my suggestion above, we would allow the bus/driver's
> ->remove() to manage whether runtime PM should be enabled/disabled for
> the device, before __device_release_driver() checks that.
> Don't you think that the driver core could rely on that information?
>
> I realize that it would be a kind of policy decision for runtime PM,
> but it's quite similar as when register/unregister devices when we set
> the runtime PM status to suspended.
OK
If we did that, all devices that had just been unbound from their drivers
and had runtime PM disabled after that would be set to "suspended" by the
core, right?
If that helps, I don't really have objections.
Thanks,
Rafael
On 12 November 2015 at 02:06, Rafael J. Wysocki <[email protected]> wrote:
> On Wednesday, November 11, 2015 12:03:52 PM Ulf Hansson wrote:
>> On 11 November 2015 at 00:57, Rafael J. Wysocki <[email protected]> wrote:
>> > On Tuesday, November 10, 2015 02:00:38 PM Ulf Hansson wrote:
>> >> +Rafael, Alan
>> >>
>> >> On 10 November 2015 at 11:10, Geert Uytterhoeven <[email protected]> wrote:
>> >> > Hi Ulf,
>> >> >
>> >
>> > [cut]
>> >
>> >> >>
>> >> >> The problem is that the runtime PM status of the device isn't
>> >> >> correctly updated at ->remove(). The effect is that the the
>> >> >> pm_runtime_get_sync() in ->probe() at re-bind will *not* trigger the
>> >> >> ->runtime_resume() callbacks to be invoked, as the runtime PM core
>> >> >> believes the device is already runtime resumed.
>> >> >
>> >> > So that's where it should be fixed?
>> >>
>> >> That would be a more generic approach, although I am not sure how the
>> >> driver/PM core should be able to take the correct decision in this
>> >> phase. Devices may be runtime PM managed also without a driver bound.
>> >>
>> >> Perhaps when __device_release_driver() finds a bounded driver for the
>> >> device, it could after all actions been performed to unbind the
>> >> driver, check if runtime PM is enabled. If it isn't, it could set the
>> >> runtime PM status to suspended!?
>> >>
>> >> I have no idea if that would introduce other issues as it would kind
>> >> of force the runtime PM status of the device to suspend, without
>> >> actually knowing if it's the correct thing to do.
>> >
>> > IMO, that needs to depend on the bus type. If the bus type has a way
>> > to manage PM for devices without drivers, it should be allowed to do so.
>>
>> By following my suggestion above, we would allow the bus/driver's
>> ->remove() to manage whether runtime PM should be enabled/disabled for
>> the device, before __device_release_driver() checks that.
>> Don't you think that the driver core could rely on that information?
>>
>> I realize that it would be a kind of policy decision for runtime PM,
>> but it's quite similar as when register/unregister devices when we set
>> the runtime PM status to suspended.
>
> OK
>
> If we did that, all devices that had just been unbound from their drivers
> and had runtime PM disabled after that would be set to "suspended" by the
> core, right?
Yes, that's the idea. I will send a patch we can test.
>
> If that helps, I don't really have objections.
>
Thanks!
Kind regards
Uffe
Hello,
On Thu, Nov 12, 2015 at 09:04:09AM +0100, Ulf Hansson wrote:
> >
> > OK
> >
> > If we did that, all devices that had just been unbound from their drivers
> > and had runtime PM disabled after that would be set to "suspended" by the
> > core, right?
>
> Yes, that's the idea. I will send a patch we can test.
>
> >
> > If that helps, I don't really have objections.
Given this discussion,
Is this series of two patches on this thermal driver still applicable?
BR,
Eduardo
> >
>
> Thanks!
>
> Kind regards
> Uffe
On 10 November 2015 at 03:12, Kuninori Morimoto
<[email protected]> wrote:
> From: Kuninori Morimoto <[email protected]>
>
> Probe error operation and remove operation are same.
Currently they are, but that's because the error handling in ->probe() is wrong.
In some error cases the pm_runtime_put|disable() should have been
called but they aren't. Perhaps it's better to fix that first.
Kind regards
Uffe
> Let's use same function.
>
> Signed-off-by: Kuninori Morimoto <[email protected]>
> ---
> drivers/thermal/rcar_thermal.c | 49 ++++++++++++++++++------------------------
> 1 file changed, 21 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
> index 5d4ae7d..13d01ed 100644
> --- a/drivers/thermal/rcar_thermal.c
> +++ b/drivers/thermal/rcar_thermal.c
> @@ -361,6 +361,24 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data)
> /*
> * platform functions
> */
> +static int rcar_thermal_remove(struct platform_device *pdev)
> +{
> + struct rcar_thermal_common *common = platform_get_drvdata(pdev);
> + struct device *dev = &pdev->dev;
> + struct rcar_thermal_priv *priv;
> +
> + rcar_thermal_for_each_priv(priv, common) {
> + if (rcar_has_irq_support(priv))
> + rcar_thermal_irq_disable(priv);
> + thermal_zone_device_unregister(priv->zone);
> + }
> +
> + pm_runtime_put(dev);
> + pm_runtime_disable(dev);
> +
> + return 0;
> +}
> +
> static int rcar_thermal_probe(struct platform_device *pdev)
> {
> struct rcar_thermal_common *common;
> @@ -377,6 +395,8 @@ static int rcar_thermal_probe(struct platform_device *pdev)
> if (!common)
> return -ENOMEM;
>
> + platform_set_drvdata(pdev, common);
> +
> INIT_LIST_HEAD(&common->head);
> spin_lock_init(&common->lock);
> common->dev = dev;
> @@ -454,43 +474,16 @@ static int rcar_thermal_probe(struct platform_device *pdev)
> rcar_thermal_common_write(common, ENR, enr_bits);
> }
>
> - platform_set_drvdata(pdev, common);
> -
> dev_info(dev, "%d sensor probed\n", i);
>
> return 0;
>
> error_unregister:
> - rcar_thermal_for_each_priv(priv, common) {
> - if (rcar_has_irq_support(priv))
> - rcar_thermal_irq_disable(priv);
> - thermal_zone_device_unregister(priv->zone);
> - }
> -
> - pm_runtime_put(dev);
> - pm_runtime_disable(dev);
> + rcar_thermal_remove(pdev);
>
> return ret;
> }
>
> -static int rcar_thermal_remove(struct platform_device *pdev)
> -{
> - struct rcar_thermal_common *common = platform_get_drvdata(pdev);
> - struct device *dev = &pdev->dev;
> - struct rcar_thermal_priv *priv;
> -
> - rcar_thermal_for_each_priv(priv, common) {
> - if (rcar_has_irq_support(priv))
> - rcar_thermal_irq_disable(priv);
> - thermal_zone_device_unregister(priv->zone);
> - }
> -
> - pm_runtime_put(dev);
> - pm_runtime_disable(dev);
> -
> - return 0;
> -}
> -
> static const struct of_device_id rcar_thermal_dt_ids[] = {
> { .compatible = "renesas,rcar-thermal", },
> {},
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12 November 2015 at 19:43, Eduardo Valentin <[email protected]> wrote:
> Hello,
>
> On Thu, Nov 12, 2015 at 09:04:09AM +0100, Ulf Hansson wrote:
>> >
>> > OK
>> >
>> > If we did that, all devices that had just been unbound from their drivers
>> > and had runtime PM disabled after that would be set to "suspended" by the
>> > core, right?
>>
>> Yes, that's the idea. I will send a patch we can test.
>>
>> >
>> > If that helps, I don't really have objections.
>
> Given this discussion,
>
> Is this series of two patches on this thermal driver still applicable?
I think patch1 is different, it's a cleanup patch (I just replied to
it separately).
As for subject patch, I think we agreed upon that it's a workaround
but I don't have strong opinion if you want to pick it up anyway.
On the other hand the change won't be needed *if* we solve problem via
driver core. I intend to send a patch for this on Monday, keep you on
cc.
Kind regards
Uffe
On Tue, Nov 10, 2015 at 02:12:06AM +0000, Kuninori Morimoto wrote:
> From: Kuninori Morimoto <[email protected]>
>
> Probe error operation and remove operation are same.
> Let's use same function.
I am picking this one and sending for next rc cycle. The second patch
is still under discussion and the issue may be solved differently,
so for now I wont get that.
BR,
>
> Signed-off-by: Kuninori Morimoto <[email protected]>
> ---
> drivers/thermal/rcar_thermal.c | 49 ++++++++++++++++++------------------------
> 1 file changed, 21 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
> index 5d4ae7d..13d01ed 100644
> --- a/drivers/thermal/rcar_thermal.c
> +++ b/drivers/thermal/rcar_thermal.c
> @@ -361,6 +361,24 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data)
> /*
> * platform functions
> */
> +static int rcar_thermal_remove(struct platform_device *pdev)
> +{
> + struct rcar_thermal_common *common = platform_get_drvdata(pdev);
> + struct device *dev = &pdev->dev;
> + struct rcar_thermal_priv *priv;
> +
> + rcar_thermal_for_each_priv(priv, common) {
> + if (rcar_has_irq_support(priv))
> + rcar_thermal_irq_disable(priv);
> + thermal_zone_device_unregister(priv->zone);
> + }
> +
> + pm_runtime_put(dev);
> + pm_runtime_disable(dev);
> +
> + return 0;
> +}
> +
> static int rcar_thermal_probe(struct platform_device *pdev)
> {
> struct rcar_thermal_common *common;
> @@ -377,6 +395,8 @@ static int rcar_thermal_probe(struct platform_device *pdev)
> if (!common)
> return -ENOMEM;
>
> + platform_set_drvdata(pdev, common);
> +
> INIT_LIST_HEAD(&common->head);
> spin_lock_init(&common->lock);
> common->dev = dev;
> @@ -454,43 +474,16 @@ static int rcar_thermal_probe(struct platform_device *pdev)
> rcar_thermal_common_write(common, ENR, enr_bits);
> }
>
> - platform_set_drvdata(pdev, common);
> -
> dev_info(dev, "%d sensor probed\n", i);
>
> return 0;
>
> error_unregister:
> - rcar_thermal_for_each_priv(priv, common) {
> - if (rcar_has_irq_support(priv))
> - rcar_thermal_irq_disable(priv);
> - thermal_zone_device_unregister(priv->zone);
> - }
> -
> - pm_runtime_put(dev);
> - pm_runtime_disable(dev);
> + rcar_thermal_remove(pdev);
>
> return ret;
> }
>
> -static int rcar_thermal_remove(struct platform_device *pdev)
> -{
> - struct rcar_thermal_common *common = platform_get_drvdata(pdev);
> - struct device *dev = &pdev->dev;
> - struct rcar_thermal_priv *priv;
> -
> - rcar_thermal_for_each_priv(priv, common) {
> - if (rcar_has_irq_support(priv))
> - rcar_thermal_irq_disable(priv);
> - thermal_zone_device_unregister(priv->zone);
> - }
> -
> - pm_runtime_put(dev);
> - pm_runtime_disable(dev);
> -
> - return 0;
> -}
> -
> static const struct of_device_id rcar_thermal_dt_ids[] = {
> { .compatible = "renesas,rcar-thermal", },
> {},
> --
> 1.9.1
>