Fix access illegal address problem in following condition:
There are muti devfreq cooling devices in system, some of them has
em model but other does not, energy model ops such as state2power will
append to global devfreq_cooling_ops when the cooling device with
em model register. It makes the cooling device without em model
also use devfreq_cooling_ops after appending when register later by
of_devfreq_cooling_register_power() or of_devfreq_cooling_register().
IPA governor regards the cooling devices without em model as a power actor
because they also have energy model ops, and will access illegal address
at dfc->em_pd when execute cdev->ops->get_requested_power,
cdev->ops->state2power or cdev->ops->power2state.
Signed-off-by: Kant Fan <[email protected]>
---
drivers/thermal/devfreq_cooling.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 4310cb342a9f..d38a80adec73 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -358,21 +358,28 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
+ struct thermal_cooling_device_ops *ops;
char *name;
int err, num_opps;
- dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
- if (!dfc)
+ ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
return ERR_PTR(-ENOMEM);
+ dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
+ if (!dfc) {
+ err = -ENOMEM;
+ goto free_ops;
+ }
+
dfc->devfreq = df;
dfc->em_pd = em_pd_get(dev);
if (dfc->em_pd) {
- devfreq_cooling_ops.get_requested_power =
+ ops->get_requested_power =
devfreq_cooling_get_requested_power;
- devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
- devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
+ ops->state2power = devfreq_cooling_state2power;
+ ops->power2state = devfreq_cooling_power2state;
dfc->power_ops = dfc_power;
@@ -407,8 +414,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
if (!name)
goto remove_qos_req;
- cdev = thermal_of_cooling_device_register(np, name, dfc,
- &devfreq_cooling_ops);
+ cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
kfree(name);
if (IS_ERR(cdev)) {
@@ -429,6 +435,8 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
kfree(dfc->freq_table);
free_dfc:
kfree(dfc);
+free_ops:
+ kfree(ops);
return ERR_PTR(err);
}
@@ -510,11 +518,13 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
{
struct devfreq_cooling_device *dfc;
+ const struct thermal_cooling_device_ops *ops;
struct device *dev;
if (IS_ERR_OR_NULL(cdev))
return;
+ ops = cdev->ops;
dfc = cdev->devdata;
dev = dfc->devfreq->dev.parent;
@@ -525,5 +535,6 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
kfree(dfc->freq_table);
kfree(dfc);
+ kfree(ops);
}
EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
--
2.29.0
Hi Kant,
On 3/12/22 04:59, Kant Fan wrote:
> Fix access illegal address problem in following condition:
> There are muti devfreq cooling devices in system, some of them has
> em model but other does not, energy model ops such as state2power will
> append to global devfreq_cooling_ops when the cooling device with
> em model register. It makes the cooling device without em model
> also use devfreq_cooling_ops after appending when register later by
> of_devfreq_cooling_register_power() or of_devfreq_cooling_register().
>
> IPA governor regards the cooling devices without em model as a power actor
> because they also have energy model ops, and will access illegal address
> at dfc->em_pd when execute cdev->ops->get_requested_power,
> cdev->ops->state2power or cdev->ops->power2state.
>
> Signed-off-by: Kant Fan <[email protected]>
Thank you for finding this issue. This was also an issue since the
beginning of that code. The modified global ops after first registration
which went through, was also previously there. Thus, we would need two
different patches for stable kernels.
For this one, please add the tag:
Fixes: 615510fe13bd2 ("thermal: devfreq_cooling: remove old power model
and use EM")
This patch would also go via stable tree for kernels v5.11+
Please read the process how to send a patch which will be merged to the
stable tree.
There will be a need to create another patch(es) for stable kernels with
Fixes: a76caf55e5b35 ("thermal: Add devfreq cooling")
In those kernels also the global ops is modified and might not support
properly many cooling devices. It's present in other stable kernels:
v5.10 and older
> ---
> drivers/thermal/devfreq_cooling.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> index 4310cb342a9f..d38a80adec73 100644
> --- a/drivers/thermal/devfreq_cooling.c
> +++ b/drivers/thermal/devfreq_cooling.c
> @@ -358,21 +358,28 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> struct thermal_cooling_device *cdev;
> struct device *dev = df->dev.parent;
> struct devfreq_cooling_device *dfc;
> + struct thermal_cooling_device_ops *ops;
> char *name;
> int err, num_opps;
>
> - dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
> - if (!dfc)
> + ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL);
> + if (!ops)
> return ERR_PTR(-ENOMEM);
>
> + dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
> + if (!dfc) {
> + err = -ENOMEM;
> + goto free_ops;
> + }
> +
> dfc->devfreq = df;
>
> dfc->em_pd = em_pd_get(dev);
> if (dfc->em_pd) {
> - devfreq_cooling_ops.get_requested_power =
> + ops->get_requested_power =
> devfreq_cooling_get_requested_power;
> - devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
> - devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
> + ops->state2power = devfreq_cooling_state2power;
> + ops->power2state = devfreq_cooling_power2state;
>
> dfc->power_ops = dfc_power;
>
> @@ -407,8 +414,7 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> if (!name)
> goto remove_qos_req;
>
> - cdev = thermal_of_cooling_device_register(np, name, dfc,
> - &devfreq_cooling_ops);
> + cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
> kfree(name);
>
> if (IS_ERR(cdev)) {
> @@ -429,6 +435,8 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> kfree(dfc->freq_table);
> free_dfc:
> kfree(dfc);
> +free_ops:
> + kfree(ops);
>
> return ERR_PTR(err);
> }
> @@ -510,11 +518,13 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
> void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
> {
> struct devfreq_cooling_device *dfc;
> + const struct thermal_cooling_device_ops *ops;
> struct device *dev;
>
> if (IS_ERR_OR_NULL(cdev))
> return;
>
> + ops = cdev->ops;
> dfc = cdev->devdata;
> dev = dfc->devfreq->dev.parent;
>
> @@ -525,5 +535,6 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>
> kfree(dfc->freq_table);
> kfree(dfc);
> + kfree(ops);
> }
> EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
The fix looks good.
On 14/03/2022 21:41, Lukasz Luba wrote:
> Hi Kant,
>
> On 3/12/22 04:59, Kant Fan wrote:
>> Fix access illegal address problem in following condition:
>> There are muti devfreq cooling devices in system, some of them has
>> em model but other does not, energy model ops such as state2power will
>> append to global devfreq_cooling_ops when the cooling device with
>> em model register. It makes the cooling device without em model
>> also use devfreq_cooling_ops after appending when register later by
>> of_devfreq_cooling_register_power() or of_devfreq_cooling_register().
>>
>> IPA governor regards the cooling devices without em model as a power
>> actor
>> because they also have energy model ops, and will access illegal address
>> at dfc->em_pd when execute cdev->ops->get_requested_power,
>> cdev->ops->state2power or cdev->ops->power2state.
>>
>> Signed-off-by: Kant Fan <[email protected]>
>
> Thank you for finding this issue. This was also an issue since the
> beginning of that code. The modified global ops after first registration
> which went through, was also previously there. Thus, we would need two
> different patches for stable kernels.
>
> For this one, please add the tag:
> Fixes: 615510fe13bd2 ("thermal: devfreq_cooling: remove old power model
> and use EM")
>
> This patch would also go via stable tree for kernels v5.11+
> Please read the process how to send a patch which will be merged to the
> stable tree.
>
> There will be a need to create another patch(es) for stable kernels with
> Fixes: a76caf55e5b35 ("thermal: Add devfreq cooling")
> In those kernels also the global ops is modified and might not support
> properly many cooling devices. It's present in other stable kernels:
> v5.10 and older
>
>> ---
>> drivers/thermal/devfreq_cooling.c | 25 ++++++++++++++++++-------
>> 1 file changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/thermal/devfreq_cooling.c
>> b/drivers/thermal/devfreq_cooling.c
>> index 4310cb342a9f..d38a80adec73 100644
>> --- a/drivers/thermal/devfreq_cooling.c
>> +++ b/drivers/thermal/devfreq_cooling.c
>> @@ -358,21 +358,28 @@ of_devfreq_cooling_register_power(struct
>> device_node *np, struct devfreq *df,
>> struct thermal_cooling_device *cdev;
>> struct device *dev = df->dev.parent;
>> struct devfreq_cooling_device *dfc;
>> + struct thermal_cooling_device_ops *ops;
>> char *name;
>> int err, num_opps;
>> - dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
>> - if (!dfc)
>> + ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL);
>> + if (!ops)
>> return ERR_PTR(-ENOMEM);
>> + dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
>> + if (!dfc) {
>> + err = -ENOMEM;
>> + goto free_ops;
>> + }
>> +
>> dfc->devfreq = df;
>> dfc->em_pd = em_pd_get(dev);
>> if (dfc->em_pd) {
>> - devfreq_cooling_ops.get_requested_power =
>> + ops->get_requested_power =
>> devfreq_cooling_get_requested_power;
>> - devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
>> - devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
>> + ops->state2power = devfreq_cooling_state2power;
>> + ops->power2state = devfreq_cooling_power2state;
>> dfc->power_ops = dfc_power;
>> @@ -407,8 +414,7 @@ of_devfreq_cooling_register_power(struct
>> device_node *np, struct devfreq *df,
>> if (!name)
>> goto remove_qos_req;
>> - cdev = thermal_of_cooling_device_register(np, name, dfc,
>> - &devfreq_cooling_ops);
>> + cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
>> kfree(name);
>> if (IS_ERR(cdev)) {
>> @@ -429,6 +435,8 @@ of_devfreq_cooling_register_power(struct
>> device_node *np, struct devfreq *df,
>> kfree(dfc->freq_table);
>> free_dfc:
>> kfree(dfc);
>> +free_ops:
>> + kfree(ops);
>> return ERR_PTR(err);
>> }
>> @@ -510,11 +518,13 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
>> void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>> {
>> struct devfreq_cooling_device *dfc;
>> + const struct thermal_cooling_device_ops *ops;
>> struct device *dev;
>> if (IS_ERR_OR_NULL(cdev))
>> return;
>> + ops = cdev->ops;
>> dfc = cdev->devdata;
>> dev = dfc->devfreq->dev.parent;
>> @@ -525,5 +535,6 @@ void devfreq_cooling_unregister(struct
>> thermal_cooling_device *cdev)
>> kfree(dfc->freq_table);
>> kfree(dfc);
>> + kfree(ops);
>> }
>> EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
>
> The fix looks good.
Hi Lukasz,
Thanks for your advice. According to that, I made two separate patches
for mainline and the stable trees:
The first patch (patchwork.kernel.org: Message ID:
[email protected]) is for mainline. I added
the 'fix' tag and 'Cc: [email protected] # 5.13+' to remind which
stable trees should be back-ported.
The second patch (patchwork.kernel.org: Message ID:
[email protected]) is for stable tree v5.10
and older. I added an upstream commit ID to indicate where the patch
comes from. I also added 'Cc: [email protected] # 4.4+' to remind
which stable trees should be back-ported.
Please check if they are correct. Thank you.
Kant Fan
On 3/25/22 10:43, Kant Fan wrote:
> On 14/03/2022 21:41, Lukasz Luba wrote:
>> Hi Kant,
>>
>> On 3/12/22 04:59, Kant Fan wrote:
>>> Fix access illegal address problem in following condition:
>>> There are muti devfreq cooling devices in system, some of them has
>>> em model but other does not, energy model ops such as state2power will
>>> append to global devfreq_cooling_ops when the cooling device with
>>> em model register. It makes the cooling device without em model
>>> also use devfreq_cooling_ops after appending when register later by
>>> of_devfreq_cooling_register_power() or of_devfreq_cooling_register().
>>>
>>> IPA governor regards the cooling devices without em model as a power
>>> actor
>>> because they also have energy model ops, and will access illegal address
>>> at dfc->em_pd when execute cdev->ops->get_requested_power,
>>> cdev->ops->state2power or cdev->ops->power2state.
>>>
>>> Signed-off-by: Kant Fan <[email protected]>
>>
>> Thank you for finding this issue. This was also an issue since the
>> beginning of that code. The modified global ops after first registration
>> which went through, was also previously there. Thus, we would need two
>> different patches for stable kernels.
>>
>> For this one, please add the tag:
>> Fixes: 615510fe13bd2 ("thermal: devfreq_cooling: remove old power
>> model and use EM")
>>
>> This patch would also go via stable tree for kernels v5.11+
>> Please read the process how to send a patch which will be merged to the
>> stable tree.
>>
>> There will be a need to create another patch(es) for stable kernels with
>> Fixes: a76caf55e5b35 ("thermal: Add devfreq cooling")
>> In those kernels also the global ops is modified and might not support
>> properly many cooling devices. It's present in other stable kernels:
>> v5.10 and older
>>
>>> ---
>>> drivers/thermal/devfreq_cooling.c | 25 ++++++++++++++++++-------
>>> 1 file changed, 18 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/thermal/devfreq_cooling.c
>>> b/drivers/thermal/devfreq_cooling.c
>>> index 4310cb342a9f..d38a80adec73 100644
>>> --- a/drivers/thermal/devfreq_cooling.c
>>> +++ b/drivers/thermal/devfreq_cooling.c
>>> @@ -358,21 +358,28 @@ of_devfreq_cooling_register_power(struct
>>> device_node *np, struct devfreq *df,
>>> struct thermal_cooling_device *cdev;
>>> struct device *dev = df->dev.parent;
>>> struct devfreq_cooling_device *dfc;
>>> + struct thermal_cooling_device_ops *ops;
>>> char *name;
>>> int err, num_opps;
>>> - dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
>>> - if (!dfc)
>>> + ops = kmemdup(&devfreq_cooling_ops, sizeof(*ops), GFP_KERNEL);
>>> + if (!ops)
>>> return ERR_PTR(-ENOMEM);
>>> + dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
>>> + if (!dfc) {
>>> + err = -ENOMEM;
>>> + goto free_ops;
>>> + }
>>> +
>>> dfc->devfreq = df;
>>> dfc->em_pd = em_pd_get(dev);
>>> if (dfc->em_pd) {
>>> - devfreq_cooling_ops.get_requested_power =
>>> + ops->get_requested_power =
>>> devfreq_cooling_get_requested_power;
>>> - devfreq_cooling_ops.state2power = devfreq_cooling_state2power;
>>> - devfreq_cooling_ops.power2state = devfreq_cooling_power2state;
>>> + ops->state2power = devfreq_cooling_state2power;
>>> + ops->power2state = devfreq_cooling_power2state;
>>> dfc->power_ops = dfc_power;
>>> @@ -407,8 +414,7 @@ of_devfreq_cooling_register_power(struct
>>> device_node *np, struct devfreq *df,
>>> if (!name)
>>> goto remove_qos_req;
>>> - cdev = thermal_of_cooling_device_register(np, name, dfc,
>>> - &devfreq_cooling_ops);
>>> + cdev = thermal_of_cooling_device_register(np, name, dfc, ops);
>>> kfree(name);
>>> if (IS_ERR(cdev)) {
>>> @@ -429,6 +435,8 @@ of_devfreq_cooling_register_power(struct
>>> device_node *np, struct devfreq *df,
>>> kfree(dfc->freq_table);
>>> free_dfc:
>>> kfree(dfc);
>>> +free_ops:
>>> + kfree(ops);
>>> return ERR_PTR(err);
>>> }
>>> @@ -510,11 +518,13 @@ EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
>>> void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>>> {
>>> struct devfreq_cooling_device *dfc;
>>> + const struct thermal_cooling_device_ops *ops;
>>> struct device *dev;
>>> if (IS_ERR_OR_NULL(cdev))
>>> return;
>>> + ops = cdev->ops;
>>> dfc = cdev->devdata;
>>> dev = dfc->devfreq->dev.parent;
>>> @@ -525,5 +535,6 @@ void devfreq_cooling_unregister(struct
>>> thermal_cooling_device *cdev)
>>> kfree(dfc->freq_table);
>>> kfree(dfc);
>>> + kfree(ops);
>>> }
>>> EXPORT_SYMBOL_GPL(devfreq_cooling_unregister);
>>
>> The fix looks good.
>
> Hi Lukasz,
> Thanks for your advice. According to that, I made two separate patches
> for mainline and the stable trees:
> The first patch (patchwork.kernel.org: Message ID:
> [email protected]) is for mainline. I added
> the 'fix' tag and 'Cc: [email protected] # 5.13+' to remind which
> stable trees should be back-ported.
> The second patch (patchwork.kernel.org: Message ID:
> [email protected]) is for stable tree v5.10
> and older. I added an upstream commit ID to indicate where the patch
> comes from. I also added 'Cc: [email protected] # 4.4+' to remind
> which stable trees should be back-ported.
> Please check if they are correct. Thank you.
Thank you for sending them. Let me have a look.