2020-01-10 17:50:29

by Matthias Kaehlcke

[permalink] [raw]
Subject: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits

Now that devfreq supports limiting the frequency range of a device
through PM QoS make use of it instead of disabling OPPs that should
not be used.

Signed-off-by: Matthias Kaehlcke <[email protected]>
---

drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
1 file changed, 20 insertions(+), 46 deletions(-)

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index ef59256887ff..3a63603afcf2 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -24,11 +24,13 @@
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/pm_opp.h>
+#include <linux/pm_qos.h>
#include <linux/thermal.h>

#include <trace/events/thermal.h>

-#define SCALE_ERROR_MITIGATION 100
+#define HZ_PER_KHZ 1000
+#define SCALE_ERROR_MITIGATION 100

static DEFINE_IDA(devfreq_ida);

@@ -65,49 +67,9 @@ struct devfreq_cooling_device {
struct devfreq_cooling_power *power_ops;
u32 res_util;
int capped_state;
+ struct dev_pm_qos_request req_max_freq;
};

-/**
- * partition_enable_opps() - disable all opps above a given state
- * @dfc: Pointer to devfreq we are operating on
- * @cdev_state: cooling device state we're setting
- *
- * Go through the OPPs of the device, enabling all OPPs until
- * @cdev_state and disabling those frequencies above it.
- */
-static int partition_enable_opps(struct devfreq_cooling_device *dfc,
- unsigned long cdev_state)
-{
- int i;
- struct device *dev = dfc->devfreq->dev.parent;
-
- for (i = 0; i < dfc->freq_table_size; i++) {
- struct dev_pm_opp *opp;
- int ret = 0;
- unsigned int freq = dfc->freq_table[i];
- bool want_enable = i >= cdev_state ? true : false;
-
- opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
-
- if (PTR_ERR(opp) == -ERANGE)
- continue;
- else if (IS_ERR(opp))
- return PTR_ERR(opp);
-
- dev_pm_opp_put(opp);
-
- if (want_enable)
- ret = dev_pm_opp_enable(dev, freq);
- else
- ret = dev_pm_opp_disable(dev, freq);
-
- if (ret)
- return ret;
- }
-
- return 0;
-}
-
static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
@@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
struct devfreq_cooling_device *dfc = cdev->devdata;
struct devfreq *df = dfc->devfreq;
struct device *dev = df->dev.parent;
- int ret;
+ unsigned long freq;

if (state == dfc->cooling_state)
return 0;
@@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
if (state >= dfc->freq_table_size)
return -EINVAL;

- ret = partition_enable_opps(dfc, state);
- if (ret)
- return ret;
+ freq = dfc->freq_table[state];
+
+ dev_pm_qos_update_request(&dfc->req_max_freq,
+ DIV_ROUND_UP(freq, HZ_PER_KHZ));

dfc->cooling_state = state;

@@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
if (err)
goto free_dfc;

+ err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
+ DEV_PM_QOS_MAX_FREQUENCY,
+ PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+ if (err < 0)
+ goto remove_qos_req;
+
err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
if (err < 0)
goto free_tables;
@@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,

release_ida:
ida_simple_remove(&devfreq_ida, dfc->id);
+
+remove_qos_req:
+ dev_pm_qos_remove_request(&dfc->req_max_freq);
+
free_tables:
kfree(dfc->power_table);
kfree(dfc->freq_table);
@@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)

thermal_cooling_device_unregister(dfc->cdev);
ida_simple_remove(&devfreq_ida, dfc->id);
+ dev_pm_qos_remove_request(&dfc->req_max_freq);
kfree(dfc->power_table);
kfree(dfc->freq_table);

--
2.25.0.rc1.283.g88dfdc4193-goog


2020-01-10 17:50:39

by Matthias Kaehlcke

[permalink] [raw]
Subject: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits

Traditionally devfreq cooling devices dynamically disabled OPPs
that shouldn't be used because of thermal pressure. Devfreq cooling
devices now use PM QoS to set frequency limits, hence the devfreq
code dealing that deals with disabled OPPs can be removed.

Signed-off-by: Matthias Kaehlcke <[email protected]>
---

drivers/devfreq/devfreq.c | 75 +++++----------------------------------
include/linux/devfreq.h | 4 ---
2 files changed, 8 insertions(+), 71 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 57f6944d65a6..ec66e2c27cc4 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
return ERR_PTR(-ENODEV);
}

-static unsigned long find_available_min_freq(struct devfreq *devfreq)
-{
- struct dev_pm_opp *opp;
- unsigned long min_freq = 0;
-
- opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
- if (IS_ERR(opp))
- min_freq = 0;
- else
- dev_pm_opp_put(opp);
-
- return min_freq;
-}
-
-static unsigned long find_available_max_freq(struct devfreq *devfreq)
-{
- struct dev_pm_opp *opp;
- unsigned long max_freq = ULONG_MAX;
-
- opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
- if (IS_ERR(opp))
- max_freq = 0;
- else
- dev_pm_opp_put(opp);
-
- return max_freq;
-}
-
/**
* get_freq_range() - Get the current freq range
* @devfreq: the devfreq instance
@@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
*max_freq = min(*max_freq,
(unsigned long)HZ_PER_KHZ * qos_max_freq);

- /* Apply constraints from OPP interface */
- *min_freq = max(*min_freq, devfreq->scaling_min_freq);
- *max_freq = min(*max_freq, devfreq->scaling_max_freq);
-
if (*min_freq > *max_freq)
*min_freq = *max_freq;
}
@@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
void *devp)
{
struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
- int err = -EINVAL;
+ int err;

mutex_lock(&devfreq->lock);
-
- devfreq->scaling_min_freq = find_available_min_freq(devfreq);
- if (!devfreq->scaling_min_freq)
- goto out;
-
- devfreq->scaling_max_freq = find_available_max_freq(devfreq);
- if (!devfreq->scaling_max_freq) {
- devfreq->scaling_max_freq = ULONG_MAX;
- goto out;
- }
-
err = update_devfreq(devfreq);
-
-out:
mutex_unlock(&devfreq->lock);
if (err)
dev_err(devfreq->dev.parent,
@@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
mutex_lock(&devfreq->lock);
}

- devfreq->scaling_min_freq = find_available_min_freq(devfreq);
- if (!devfreq->scaling_min_freq) {
- mutex_unlock(&devfreq->lock);
- err = -EINVAL;
+ err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
+ DEV_PM_QOS_MIN_FREQUENCY, 0);
+ if (err < 0)
goto err_dev;
- }
-
- devfreq->scaling_max_freq = find_available_max_freq(devfreq);
- if (!devfreq->scaling_max_freq) {
- mutex_unlock(&devfreq->lock);
- err = -EINVAL;
+ err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
+ DEV_PM_QOS_MAX_FREQUENCY,
+ PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+ if (err < 0)
goto err_dev;
- }

devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
atomic_set(&devfreq->suspend_count, 0);
@@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,

mutex_unlock(&devfreq->lock);

- err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
- DEV_PM_QOS_MIN_FREQUENCY, 0);
- if (err < 0)
- goto err_devfreq;
- err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
- DEV_PM_QOS_MAX_FREQUENCY,
- PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
- if (err < 0)
- goto err_devfreq;
-
devfreq->nb_min.notifier_call = qos_min_notifier_call;
err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
DEV_PM_QOS_MIN_FREQUENCY);
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index fb376b5b7281..cb75f23ad2f4 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -126,8 +126,6 @@ struct devfreq_dev_profile {
* touch this.
* @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs)
* @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs)
- * @scaling_min_freq: Limit minimum frequency requested by OPP interface
- * @scaling_max_freq: Limit maximum frequency requested by OPP interface
* @stop_polling: devfreq polling status of a device.
* @suspend_freq: frequency of a device set during suspend phase.
* @resume_freq: frequency of a device set in resume phase.
@@ -166,8 +164,6 @@ struct devfreq {

struct dev_pm_qos_request user_min_freq_req;
struct dev_pm_qos_request user_max_freq_req;
- unsigned long scaling_min_freq;
- unsigned long scaling_max_freq;
bool stop_polling;

unsigned long suspend_freq;
--
2.25.0.rc1.283.g88dfdc4193-goog

2020-01-13 07:20:40

by Chanwoo Choi

[permalink] [raw]
Subject: Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits

Hi,

On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> Now that devfreq supports limiting the frequency range of a device
> through PM QoS make use of it instead of disabling OPPs that should
> not be used.
>
> Signed-off-by: Matthias Kaehlcke <[email protected]>
> ---
>
> drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
> 1 file changed, 20 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> index ef59256887ff..3a63603afcf2 100644
> --- a/drivers/thermal/devfreq_cooling.c
> +++ b/drivers/thermal/devfreq_cooling.c
> @@ -24,11 +24,13 @@
> #include <linux/idr.h>
> #include <linux/slab.h>
> #include <linux/pm_opp.h>
> +#include <linux/pm_qos.h>
> #include <linux/thermal.h>
>
> #include <trace/events/thermal.h>
>
> -#define SCALE_ERROR_MITIGATION 100
> +#define HZ_PER_KHZ 1000
> +#define SCALE_ERROR_MITIGATION 100
>
> static DEFINE_IDA(devfreq_ida);
>
> @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
> struct devfreq_cooling_power *power_ops;
> u32 res_util;
> int capped_state;
> + struct dev_pm_qos_request req_max_freq;

Need to add the description of 'req_max_freq'.

> };
>
> -/**
> - * partition_enable_opps() - disable all opps above a given state
> - * @dfc: Pointer to devfreq we are operating on
> - * @cdev_state: cooling device state we're setting
> - *
> - * Go through the OPPs of the device, enabling all OPPs until
> - * @cdev_state and disabling those frequencies above it.
> - */
> -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> - unsigned long cdev_state)
> -{
> - int i;
> - struct device *dev = dfc->devfreq->dev.parent;
> -
> - for (i = 0; i < dfc->freq_table_size; i++) {
> - struct dev_pm_opp *opp;
> - int ret = 0;
> - unsigned int freq = dfc->freq_table[i];
> - bool want_enable = i >= cdev_state ? true : false;
> -
> - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> -
> - if (PTR_ERR(opp) == -ERANGE)
> - continue;
> - else if (IS_ERR(opp))
> - return PTR_ERR(opp);
> -
> - dev_pm_opp_put(opp);
> -
> - if (want_enable)
> - ret = dev_pm_opp_enable(dev, freq);
> - else
> - ret = dev_pm_opp_disable(dev, freq);
> -
> - if (ret)
> - return ret;
> - }
> -
> - return 0;
> -}
> -
> static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> unsigned long *state)
> {
> @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> struct devfreq_cooling_device *dfc = cdev->devdata;
> struct devfreq *df = dfc->devfreq;
> struct device *dev = df->dev.parent;
> - int ret;
> + unsigned long freq;
>
> if (state == dfc->cooling_state)
> return 0;
> @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> if (state >= dfc->freq_table_size)
> return -EINVAL;
>
> - ret = partition_enable_opps(dfc, state);
> - if (ret)
> - return ret;
> + freq = dfc->freq_table[state];
> +
> + dev_pm_qos_update_request(&dfc->req_max_freq,
> + DIV_ROUND_UP(freq, HZ_PER_KHZ));
>
> dfc->cooling_state = state;
>
> @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> if (err)
> goto free_dfc;
>
> + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> + DEV_PM_QOS_MAX_FREQUENCY,
> + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> + if (err < 0)
> + goto remove_qos_req;

Jump 'free_table' instead of 'remove_qos_req'.

> +
> err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> if (err < 0)
> goto free_tables;

Jump remove_qos_req.

> @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
>
> release_ida:
> ida_simple_remove(&devfreq_ida, dfc->id);
> +
> +remove_qos_req:
> + dev_pm_qos_remove_request(&dfc->req_max_freq);
> +
> free_tables:
> kfree(dfc->power_table);
> kfree(dfc->freq_table);
> @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>
> thermal_cooling_device_unregister(dfc->cdev);
> ida_simple_remove(&devfreq_ida, dfc->id);
> + dev_pm_qos_remove_request(&dfc->req_max_freq);
> kfree(dfc->power_table);
> kfree(dfc->freq_table);
>
>


--
Best Regards,
Chanwoo Choi
Samsung Electronics

2020-01-13 07:25:37

by Chanwoo Choi

[permalink] [raw]
Subject: Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits

Hi,


Any device driver except for devfreq_cooling.c might
use dev_pm_opp_enable/disable interface.
So, don't need to remove the devfreq->scaling_max_freq
and devfreq->scaling_min_freq for supporting OPP interface.

Regards,
Chanwoo Choi

On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> Traditionally devfreq cooling devices dynamically disabled OPPs
> that shouldn't be used because of thermal pressure. Devfreq cooling
> devices now use PM QoS to set frequency limits, hence the devfreq
> code dealing that deals with disabled OPPs can be removed.
>
> Signed-off-by: Matthias Kaehlcke <[email protected]>
> ---
>
> drivers/devfreq/devfreq.c | 75 +++++----------------------------------
> include/linux/devfreq.h | 4 ---
> 2 files changed, 8 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 57f6944d65a6..ec66e2c27cc4 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
> return ERR_PTR(-ENODEV);
> }
>
> -static unsigned long find_available_min_freq(struct devfreq *devfreq)
> -{
> - struct dev_pm_opp *opp;
> - unsigned long min_freq = 0;
> -
> - opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
> - if (IS_ERR(opp))
> - min_freq = 0;
> - else
> - dev_pm_opp_put(opp);
> -
> - return min_freq;
> -}
> -
> -static unsigned long find_available_max_freq(struct devfreq *devfreq)
> -{
> - struct dev_pm_opp *opp;
> - unsigned long max_freq = ULONG_MAX;
> -
> - opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
> - if (IS_ERR(opp))
> - max_freq = 0;
> - else
> - dev_pm_opp_put(opp);
> -
> - return max_freq;
> -}
> -
> /**
> * get_freq_range() - Get the current freq range
> * @devfreq: the devfreq instance
> @@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
> *max_freq = min(*max_freq,
> (unsigned long)HZ_PER_KHZ * qos_max_freq);
>
> - /* Apply constraints from OPP interface */
> - *min_freq = max(*min_freq, devfreq->scaling_min_freq);
> - *max_freq = min(*max_freq, devfreq->scaling_max_freq);
> -
> if (*min_freq > *max_freq)
> *min_freq = *max_freq;
> }
> @@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
> void *devp)
> {
> struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
> - int err = -EINVAL;
> + int err;
>
> mutex_lock(&devfreq->lock);
> -
> - devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> - if (!devfreq->scaling_min_freq)
> - goto out;
> -
> - devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> - if (!devfreq->scaling_max_freq) {
> - devfreq->scaling_max_freq = ULONG_MAX;
> - goto out;
> - }
> -
> err = update_devfreq(devfreq);
> -
> -out:
> mutex_unlock(&devfreq->lock);
> if (err)
> dev_err(devfreq->dev.parent,
> @@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
> mutex_lock(&devfreq->lock);
> }
>
> - devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> - if (!devfreq->scaling_min_freq) {
> - mutex_unlock(&devfreq->lock);
> - err = -EINVAL;
> + err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> + DEV_PM_QOS_MIN_FREQUENCY, 0);
> + if (err < 0)
> goto err_dev;
> - }
> -
> - devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> - if (!devfreq->scaling_max_freq) {
> - mutex_unlock(&devfreq->lock);
> - err = -EINVAL;
> + err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> + DEV_PM_QOS_MAX_FREQUENCY,
> + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> + if (err < 0)
> goto err_dev;
> - }
>
> devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
> atomic_set(&devfreq->suspend_count, 0);
> @@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
>
> mutex_unlock(&devfreq->lock);
>
> - err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> - DEV_PM_QOS_MIN_FREQUENCY, 0);
> - if (err < 0)
> - goto err_devfreq;
> - err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> - DEV_PM_QOS_MAX_FREQUENCY,
> - PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> - if (err < 0)
> - goto err_devfreq;
> -
> devfreq->nb_min.notifier_call = qos_min_notifier_call;
> err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
> DEV_PM_QOS_MIN_FREQUENCY);
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index fb376b5b7281..cb75f23ad2f4 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -126,8 +126,6 @@ struct devfreq_dev_profile {
> * touch this.
> * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs)
> * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs)
> - * @scaling_min_freq: Limit minimum frequency requested by OPP interface
> - * @scaling_max_freq: Limit maximum frequency requested by OPP interface
> * @stop_polling: devfreq polling status of a device.
> * @suspend_freq: frequency of a device set during suspend phase.
> * @resume_freq: frequency of a device set in resume phase.
> @@ -166,8 +164,6 @@ struct devfreq {
>
> struct dev_pm_qos_request user_min_freq_req;
> struct dev_pm_qos_request user_max_freq_req;
> - unsigned long scaling_min_freq;
> - unsigned long scaling_max_freq;
> bool stop_polling;
>
> unsigned long suspend_freq;
>

2020-01-14 16:09:34

by Leonard Crestez

[permalink] [raw]
Subject: Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits

On 13.01.2020 09:24, Chanwoo Choi wrote:
> Hi,
>
> Any device driver except for devfreq_cooling.c might
> use dev_pm_opp_enable/disable interface.
> So, don't need to remove the devfreq->scaling_max_freq
> and devfreq->scaling_min_freq for supporting OPP interface.

It seems that devfreq_cooling was the only upstream user of
dev_pm_opp_enable and the remaining callers of dev_pm_opp_disable are
probe-time checks.

> Regards,
> Chanwoo Choi
>
> On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
>> Traditionally devfreq cooling devices dynamically disabled OPPs
>> that shouldn't be used because of thermal pressure. Devfreq cooling
>> devices now use PM QoS to set frequency limits, hence the devfreq
>> code dealing that deals with disabled OPPs can be removed.
>>
>> Signed-off-by: Matthias Kaehlcke <[email protected]>
>> ---
>>
>> drivers/devfreq/devfreq.c | 75 +++++----------------------------------
>> include/linux/devfreq.h | 4 ---
>> 2 files changed, 8 insertions(+), 71 deletions(-)
>>
>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>> index 57f6944d65a6..ec66e2c27cc4 100644
>> --- a/drivers/devfreq/devfreq.c
>> +++ b/drivers/devfreq/devfreq.c
>> @@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
>> return ERR_PTR(-ENODEV);
>> }
>>
>> -static unsigned long find_available_min_freq(struct devfreq *devfreq)
>> -{
>> - struct dev_pm_opp *opp;
>> - unsigned long min_freq = 0;
>> -
>> - opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
>> - if (IS_ERR(opp))
>> - min_freq = 0;
>> - else
>> - dev_pm_opp_put(opp);
>> -
>> - return min_freq;
>> -}
>> -
>> -static unsigned long find_available_max_freq(struct devfreq *devfreq)
>> -{
>> - struct dev_pm_opp *opp;
>> - unsigned long max_freq = ULONG_MAX;
>> -
>> - opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
>> - if (IS_ERR(opp))
>> - max_freq = 0;
>> - else
>> - dev_pm_opp_put(opp);
>> -
>> - return max_freq;
>> -}
>> -
>> /**
>> * get_freq_range() - Get the current freq range
>> * @devfreq: the devfreq instance
>> @@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
>> *max_freq = min(*max_freq,
>> (unsigned long)HZ_PER_KHZ * qos_max_freq);
>>
>> - /* Apply constraints from OPP interface */
>> - *min_freq = max(*min_freq, devfreq->scaling_min_freq);
>> - *max_freq = min(*max_freq, devfreq->scaling_max_freq);
>> -
>> if (*min_freq > *max_freq)
>> *min_freq = *max_freq;
>> }
>> @@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
>> void *devp)
>> {
>> struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
>> - int err = -EINVAL;
>> + int err;
>>
>> mutex_lock(&devfreq->lock);
>> -
>> - devfreq->scaling_min_freq = find_available_min_freq(devfreq);
>> - if (!devfreq->scaling_min_freq)
>> - goto out;
>> -
>> - devfreq->scaling_max_freq = find_available_max_freq(devfreq);
>> - if (!devfreq->scaling_max_freq) {
>> - devfreq->scaling_max_freq = ULONG_MAX;
>> - goto out;
>> - }
>> -
>> err = update_devfreq(devfreq);
>> -
>> -out:
>> mutex_unlock(&devfreq->lock);
>> if (err)
>> dev_err(devfreq->dev.parent,
>> @@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
>> mutex_lock(&devfreq->lock);
>> }
>>
>> - devfreq->scaling_min_freq = find_available_min_freq(devfreq);
>> - if (!devfreq->scaling_min_freq) {
>> - mutex_unlock(&devfreq->lock);
>> - err = -EINVAL;
>> + err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
>> + DEV_PM_QOS_MIN_FREQUENCY, 0);
>> + if (err < 0)
>> goto err_dev;
>> - }
>> -
>> - devfreq->scaling_max_freq = find_available_max_freq(devfreq);
>> - if (!devfreq->scaling_max_freq) {
>> - mutex_unlock(&devfreq->lock);
>> - err = -EINVAL;
>> + err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
>> + DEV_PM_QOS_MAX_FREQUENCY,
>> + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
>> + if (err < 0)
>> goto err_dev;
>> - }
>>
>> devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
>> atomic_set(&devfreq->suspend_count, 0);
>> @@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>
>> mutex_unlock(&devfreq->lock);
>>
>> - err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
>> - DEV_PM_QOS_MIN_FREQUENCY, 0);
>> - if (err < 0)
>> - goto err_devfreq;
>> - err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
>> - DEV_PM_QOS_MAX_FREQUENCY,
>> - PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
>> - if (err < 0)
>> - goto err_devfreq;
>> -

Performing PM QoS initialization under devfreq->lock triggers lockdep
warnings for me. The warnings seem to be legitimate:

1) At init time &dev_pm_qos_mtx is taken under &devfreq->lock;
2) At update time &devfreq->lock is called under &dev_pm_qos_mtx (it's
held while notifiers are called).

It's not clear why you moved dev_pm_qos_add_request higher?

>> devfreq->nb_min.notifier_call = qos_min_notifier_call;
>> err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
>> DEV_PM_QOS_MIN_FREQUENCY);
>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>> index fb376b5b7281..cb75f23ad2f4 100644
>> --- a/include/linux/devfreq.h
>> +++ b/include/linux/devfreq.h
>> @@ -126,8 +126,6 @@ struct devfreq_dev_profile {
>> * touch this.
>> * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs)
>> * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs)
>> - * @scaling_min_freq: Limit minimum frequency requested by OPP interface
>> - * @scaling_max_freq: Limit maximum frequency requested by OPP interface
>> * @stop_polling: devfreq polling status of a device.
>> * @suspend_freq: frequency of a device set during suspend phase.
>> * @resume_freq: frequency of a device set in resume phase.
>> @@ -166,8 +164,6 @@ struct devfreq {
>>
>> struct dev_pm_qos_request user_min_freq_req;
>> struct dev_pm_qos_request user_max_freq_req;
>> - unsigned long scaling_min_freq;
>> - unsigned long scaling_max_freq;
>> bool stop_polling;
>>
>> unsigned long suspend_freq;
>>
>

2020-01-14 16:10:23

by Leonard Crestez

[permalink] [raw]
Subject: Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits

On 10.01.2020 19:49, Matthias Kaehlcke wrote:
> Now that devfreq supports limiting the frequency range of a device
> through PM QoS make use of it instead of disabling OPPs that should
> not be used.
>
> Signed-off-by: Matthias Kaehlcke <[email protected]>
> ---

It is not obvious but this changes behavior when min max requests
conflict (min > max): with PM QoS a MIN_FREQUENCY request takes
precedence but if higher OPPs are disabled then this will override
MIN_FREQUENCY.

There are very few users of this functionality so I don't think there
are any systems that depend on this behaving one way or the other but
perhaps it should be mentioned in commit message?

As far as I can tell the only user of devfreq_cooling in upstream is
drivers/gpu/drm/panfrost?

> drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
> 1 file changed, 20 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> index ef59256887ff..3a63603afcf2 100644
> --- a/drivers/thermal/devfreq_cooling.c
> +++ b/drivers/thermal/devfreq_cooling.c
> @@ -24,11 +24,13 @@
> #include <linux/idr.h>
> #include <linux/slab.h>
> #include <linux/pm_opp.h>
> +#include <linux/pm_qos.h>
> #include <linux/thermal.h>
>
> #include <trace/events/thermal.h>
>
> -#define SCALE_ERROR_MITIGATION 100
> +#define HZ_PER_KHZ 1000
> +#define SCALE_ERROR_MITIGATION 100
>
> static DEFINE_IDA(devfreq_ida);
>
> @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
> struct devfreq_cooling_power *power_ops;
> u32 res_util;
> int capped_state;
> + struct dev_pm_qos_request req_max_freq;
> };
>
> -/**
> - * partition_enable_opps() - disable all opps above a given state
> - * @dfc: Pointer to devfreq we are operating on
> - * @cdev_state: cooling device state we're setting
> - *
> - * Go through the OPPs of the device, enabling all OPPs until
> - * @cdev_state and disabling those frequencies above it.
> - */
> -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> - unsigned long cdev_state)
> -{
> - int i;
> - struct device *dev = dfc->devfreq->dev.parent;
> -
> - for (i = 0; i < dfc->freq_table_size; i++) {
> - struct dev_pm_opp *opp;
> - int ret = 0;
> - unsigned int freq = dfc->freq_table[i];
> - bool want_enable = i >= cdev_state ? true : false;
> -
> - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> -
> - if (PTR_ERR(opp) == -ERANGE)
> - continue;
> - else if (IS_ERR(opp))
> - return PTR_ERR(opp);
> -
> - dev_pm_opp_put(opp);
> -
> - if (want_enable)
> - ret = dev_pm_opp_enable(dev, freq);
> - else
> - ret = dev_pm_opp_disable(dev, freq);
> -
> - if (ret)
> - return ret;
> - }
> -
> - return 0;
> -}
> -
> static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> unsigned long *state)
> {
> @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> struct devfreq_cooling_device *dfc = cdev->devdata;
> struct devfreq *df = dfc->devfreq;
> struct device *dev = df->dev.parent;
> - int ret;
> + unsigned long freq;
>
> if (state == dfc->cooling_state)
> return 0;
> @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> if (state >= dfc->freq_table_size)
> return -EINVAL;
>
> - ret = partition_enable_opps(dfc, state);
> - if (ret)
> - return ret;
> + freq = dfc->freq_table[state];
> +
> + dev_pm_qos_update_request(&dfc->req_max_freq,
> + DIV_ROUND_UP(freq, HZ_PER_KHZ));
>
> dfc->cooling_state = state;
>
> @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> if (err)
> goto free_dfc;
>
> + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> + DEV_PM_QOS_MAX_FREQUENCY,
> + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> + if (err < 0)
> + goto remove_qos_req;
> +
> err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> if (err < 0)
> goto free_tables;
> @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
>
> release_ida:
> ida_simple_remove(&devfreq_ida, dfc->id);
> +
> +remove_qos_req:
> + dev_pm_qos_remove_request(&dfc->req_max_freq); > +

A quirk of the dev_pm_qos API is that dev_pm_qos_remove_request prints a
WARN splat if !dev_pm_qos_request_active and this can true on
dev_pm_qos_add_request error.

I dealt with this by checking dev_pm_qos_request_active explicitly but
perhaps dev_pm_qos API could be changed? In general "free/release"
functions shouldn't complain if there's nothing to do.

> free_tables:
> kfree(dfc->power_table);
> kfree(dfc->freq_table);
> @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>
> thermal_cooling_device_unregister(dfc->cdev);
> ida_simple_remove(&devfreq_ida, dfc->id);
> + dev_pm_qos_remove_request(&dfc->req_max_freq);
> kfree(dfc->power_table);
> kfree(dfc->freq_table);

2020-01-14 17:37:33

by Chanwoo Choi

[permalink] [raw]
Subject: Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits

On Wed, Jan 15, 2020 at 1:08 AM Leonard Crestez <[email protected]> wrote:
>
> On 13.01.2020 09:24, Chanwoo Choi wrote:
> > Hi,
> >
> > Any device driver except for devfreq_cooling.c might
> > use dev_pm_opp_enable/disable interface.
> > So, don't need to remove the devfreq->scaling_max_freq
> > and devfreq->scaling_min_freq for supporting OPP interface.
>
> It seems that devfreq_cooling was the only upstream user of
> dev_pm_opp_enable and the remaining callers of dev_pm_opp_disable are
> probe-time checks.

OPP interface has still dev_pm_opp_enable and dev_pm_opp_disable
function. As long as remains them, any device driver related to devfreq
could call them at some time. The devfreq supports the OPP interface,
not just for only devfreq_cooling.

>
> > Regards,
> > Chanwoo Choi
> >
> > On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> >> Traditionally devfreq cooling devices dynamically disabled OPPs
> >> that shouldn't be used because of thermal pressure. Devfreq cooling
> >> devices now use PM QoS to set frequency limits, hence the devfreq
> >> code dealing that deals with disabled OPPs can be removed.
> >>
> >> Signed-off-by: Matthias Kaehlcke <[email protected]>
> >> ---
> >>
> >> drivers/devfreq/devfreq.c | 75 +++++----------------------------------
> >> include/linux/devfreq.h | 4 ---
> >> 2 files changed, 8 insertions(+), 71 deletions(-)
> >>
> >> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> >> index 57f6944d65a6..ec66e2c27cc4 100644
> >> --- a/drivers/devfreq/devfreq.c
> >> +++ b/drivers/devfreq/devfreq.c
> >> @@ -73,34 +73,6 @@ static struct devfreq *find_device_devfreq(struct device *dev)
> >> return ERR_PTR(-ENODEV);
> >> }
> >>
> >> -static unsigned long find_available_min_freq(struct devfreq *devfreq)
> >> -{
> >> - struct dev_pm_opp *opp;
> >> - unsigned long min_freq = 0;
> >> -
> >> - opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &min_freq);
> >> - if (IS_ERR(opp))
> >> - min_freq = 0;
> >> - else
> >> - dev_pm_opp_put(opp);
> >> -
> >> - return min_freq;
> >> -}
> >> -
> >> -static unsigned long find_available_max_freq(struct devfreq *devfreq)
> >> -{
> >> - struct dev_pm_opp *opp;
> >> - unsigned long max_freq = ULONG_MAX;
> >> -
> >> - opp = dev_pm_opp_find_freq_floor(devfreq->dev.parent, &max_freq);
> >> - if (IS_ERR(opp))
> >> - max_freq = 0;
> >> - else
> >> - dev_pm_opp_put(opp);
> >> -
> >> - return max_freq;
> >> -}
> >> -
> >> /**
> >> * get_freq_range() - Get the current freq range
> >> * @devfreq: the devfreq instance
> >> @@ -141,10 +113,6 @@ static void get_freq_range(struct devfreq *devfreq,
> >> *max_freq = min(*max_freq,
> >> (unsigned long)HZ_PER_KHZ * qos_max_freq);
> >>
> >> - /* Apply constraints from OPP interface */
> >> - *min_freq = max(*min_freq, devfreq->scaling_min_freq);
> >> - *max_freq = min(*max_freq, devfreq->scaling_max_freq);
> >> -
> >> if (*min_freq > *max_freq)
> >> *min_freq = *max_freq;
> >> }
> >> @@ -610,23 +578,10 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
> >> void *devp)
> >> {
> >> struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
> >> - int err = -EINVAL;
> >> + int err;
> >>
> >> mutex_lock(&devfreq->lock);
> >> -
> >> - devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> >> - if (!devfreq->scaling_min_freq)
> >> - goto out;
> >> -
> >> - devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> >> - if (!devfreq->scaling_max_freq) {
> >> - devfreq->scaling_max_freq = ULONG_MAX;
> >> - goto out;
> >> - }
> >> -
> >> err = update_devfreq(devfreq);
> >> -
> >> -out:
> >> mutex_unlock(&devfreq->lock);
> >> if (err)
> >> dev_err(devfreq->dev.parent,
> >> @@ -781,19 +736,15 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >> mutex_lock(&devfreq->lock);
> >> }
> >>
> >> - devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> >> - if (!devfreq->scaling_min_freq) {
> >> - mutex_unlock(&devfreq->lock);
> >> - err = -EINVAL;
> >> + err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> >> + DEV_PM_QOS_MIN_FREQUENCY, 0);
> >> + if (err < 0)
> >> goto err_dev;
> >> - }
> >> -
> >> - devfreq->scaling_max_freq = find_available_max_freq(devfreq);
> >> - if (!devfreq->scaling_max_freq) {
> >> - mutex_unlock(&devfreq->lock);
> >> - err = -EINVAL;
> >> + err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> >> + DEV_PM_QOS_MAX_FREQUENCY,
> >> + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> >> + if (err < 0)
> >> goto err_dev;
> >> - }
> >>
> >> devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
> >> atomic_set(&devfreq->suspend_count, 0);
> >> @@ -834,16 +785,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >>
> >> mutex_unlock(&devfreq->lock);
> >>
> >> - err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
> >> - DEV_PM_QOS_MIN_FREQUENCY, 0);
> >> - if (err < 0)
> >> - goto err_devfreq;
> >> - err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
> >> - DEV_PM_QOS_MAX_FREQUENCY,
> >> - PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> >> - if (err < 0)
> >> - goto err_devfreq;
> >> -
>
> Performing PM QoS initialization under devfreq->lock triggers lockdep
> warnings for me. The warnings seem to be legitimate:
>
> 1) At init time &dev_pm_qos_mtx is taken under &devfreq->lock;
> 2) At update time &devfreq->lock is called under &dev_pm_qos_mtx (it's
> held while notifiers are called).
>
> It's not clear why you moved dev_pm_qos_add_request higher?
>
> >> devfreq->nb_min.notifier_call = qos_min_notifier_call;
> >> err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
> >> DEV_PM_QOS_MIN_FREQUENCY);
> >> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> >> index fb376b5b7281..cb75f23ad2f4 100644
> >> --- a/include/linux/devfreq.h
> >> +++ b/include/linux/devfreq.h
> >> @@ -126,8 +126,6 @@ struct devfreq_dev_profile {
> >> * touch this.
> >> * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs)
> >> * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs)
> >> - * @scaling_min_freq: Limit minimum frequency requested by OPP interface
> >> - * @scaling_max_freq: Limit maximum frequency requested by OPP interface
> >> * @stop_polling: devfreq polling status of a device.
> >> * @suspend_freq: frequency of a device set during suspend phase.
> >> * @resume_freq: frequency of a device set in resume phase.
> >> @@ -166,8 +164,6 @@ struct devfreq {
> >>
> >> struct dev_pm_qos_request user_min_freq_req;
> >> struct dev_pm_qos_request user_max_freq_req;
> >> - unsigned long scaling_min_freq;
> >> - unsigned long scaling_max_freq;
> >> bool stop_polling;
> >>
> >> unsigned long suspend_freq;
> >>
> >
>


--
Best Regards,
Chanwoo Choi

2020-01-14 18:52:07

by Matthias Kaehlcke

[permalink] [raw]
Subject: Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits

On Mon, Jan 13, 2020 at 04:25:17PM +0900, Chanwoo Choi wrote:
> Hi,
>
> On 1/11/20 2:49 AM, Matthias Kaehlcke wrote:
> > Now that devfreq supports limiting the frequency range of a device
> > through PM QoS make use of it instead of disabling OPPs that should
> > not be used.
> >
> > Signed-off-by: Matthias Kaehlcke <[email protected]>
> > ---
> >
> > drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
> > 1 file changed, 20 insertions(+), 46 deletions(-)
> >
> > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> > index ef59256887ff..3a63603afcf2 100644
> > --- a/drivers/thermal/devfreq_cooling.c
> > +++ b/drivers/thermal/devfreq_cooling.c
> > @@ -24,11 +24,13 @@
> > #include <linux/idr.h>
> > #include <linux/slab.h>
> > #include <linux/pm_opp.h>
> > +#include <linux/pm_qos.h>
> > #include <linux/thermal.h>
> >
> > #include <trace/events/thermal.h>
> >
> > -#define SCALE_ERROR_MITIGATION 100
> > +#define HZ_PER_KHZ 1000
> > +#define SCALE_ERROR_MITIGATION 100
> >
> > static DEFINE_IDA(devfreq_ida);
> >
> > @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
> > struct devfreq_cooling_power *power_ops;
> > u32 res_util;
> > int capped_state;
> > + struct dev_pm_qos_request req_max_freq;
>
> Need to add the description of 'req_max_freq'.

will add the description

> > };
> >
> > -/**
> > - * partition_enable_opps() - disable all opps above a given state
> > - * @dfc: Pointer to devfreq we are operating on
> > - * @cdev_state: cooling device state we're setting
> > - *
> > - * Go through the OPPs of the device, enabling all OPPs until
> > - * @cdev_state and disabling those frequencies above it.
> > - */
> > -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> > - unsigned long cdev_state)
> > -{
> > - int i;
> > - struct device *dev = dfc->devfreq->dev.parent;
> > -
> > - for (i = 0; i < dfc->freq_table_size; i++) {
> > - struct dev_pm_opp *opp;
> > - int ret = 0;
> > - unsigned int freq = dfc->freq_table[i];
> > - bool want_enable = i >= cdev_state ? true : false;
> > -
> > - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> > -
> > - if (PTR_ERR(opp) == -ERANGE)
> > - continue;
> > - else if (IS_ERR(opp))
> > - return PTR_ERR(opp);
> > -
> > - dev_pm_opp_put(opp);
> > -
> > - if (want_enable)
> > - ret = dev_pm_opp_enable(dev, freq);
> > - else
> > - ret = dev_pm_opp_disable(dev, freq);
> > -
> > - if (ret)
> > - return ret;
> > - }
> > -
> > - return 0;
> > -}
> > -
> > static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> > unsigned long *state)
> > {
> > @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> > struct devfreq_cooling_device *dfc = cdev->devdata;
> > struct devfreq *df = dfc->devfreq;
> > struct device *dev = df->dev.parent;
> > - int ret;
> > + unsigned long freq;
> >
> > if (state == dfc->cooling_state)
> > return 0;
> > @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> > if (state >= dfc->freq_table_size)
> > return -EINVAL;
> >
> > - ret = partition_enable_opps(dfc, state);
> > - if (ret)
> > - return ret;
> > + freq = dfc->freq_table[state];
> > +
> > + dev_pm_qos_update_request(&dfc->req_max_freq,
> > + DIV_ROUND_UP(freq, HZ_PER_KHZ));
> >
> > dfc->cooling_state = state;
> >
> > @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> > if (err)
> > goto free_dfc;
> >
> > + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> > + DEV_PM_QOS_MAX_FREQUENCY,
> > + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> > + if (err < 0)
> > + goto remove_qos_req;
>
> Jump 'free_table' instead of 'remove_qos_req'.

ack

> > +
> > err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> > if (err < 0)
> > goto free_tables;
>
> Jump remove_qos_req.

ack

> > @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> >
> > release_ida:
> > ida_simple_remove(&devfreq_ida, dfc->id);
> > +
> > +remove_qos_req:
> > + dev_pm_qos_remove_request(&dfc->req_max_freq);
> > +
> > free_tables:
> > kfree(dfc->power_table);
> > kfree(dfc->freq_table);
> > @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
> >
> > thermal_cooling_device_unregister(dfc->cdev);
> > ida_simple_remove(&devfreq_ida, dfc->id);
> > + dev_pm_qos_remove_request(&dfc->req_max_freq);
> > kfree(dfc->power_table);
> > kfree(dfc->freq_table);

2020-01-14 19:08:55

by Matthias Kaehlcke

[permalink] [raw]
Subject: Re: [PATCH 1/2] thermal: devfreq_cooling: Use PM QoS to set frequency limits

On Tue, Jan 14, 2020 at 04:08:38PM +0000, Leonard Crestez wrote:
> On 10.01.2020 19:49, Matthias Kaehlcke wrote:
> > Now that devfreq supports limiting the frequency range of a device
> > through PM QoS make use of it instead of disabling OPPs that should
> > not be used.
> >
> > Signed-off-by: Matthias Kaehlcke <[email protected]>
> > ---
>
> It is not obvious but this changes behavior when min max requests
> conflict (min > max): with PM QoS a MIN_FREQUENCY request takes
> precedence but if higher OPPs are disabled then this will override
> MIN_FREQUENCY.

Thanks for pointing this out.

> There are very few users of this functionality so I don't think there
> are any systems that depend on this behaving one way or the other but
> perhaps it should be mentioned in commit message?

Sounds good, I'll add a note.

> As far as I can tell the only user of devfreq_cooling in upstream is
> drivers/gpu/drm/panfrost?

Indeed, apparently GPUs are the primary devfreq device used for cooling,
and unfortunately most GPU drivers are not in upstream.

> > drivers/thermal/devfreq_cooling.c | 66 ++++++++++---------------------
> > 1 file changed, 20 insertions(+), 46 deletions(-)
> >
> > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> > index ef59256887ff..3a63603afcf2 100644
> > --- a/drivers/thermal/devfreq_cooling.c
> > +++ b/drivers/thermal/devfreq_cooling.c
> > @@ -24,11 +24,13 @@
> > #include <linux/idr.h>
> > #include <linux/slab.h>
> > #include <linux/pm_opp.h>
> > +#include <linux/pm_qos.h>
> > #include <linux/thermal.h>
> >
> > #include <trace/events/thermal.h>
> >
> > -#define SCALE_ERROR_MITIGATION 100
> > +#define HZ_PER_KHZ 1000
> > +#define SCALE_ERROR_MITIGATION 100
> >
> > static DEFINE_IDA(devfreq_ida);
> >
> > @@ -65,49 +67,9 @@ struct devfreq_cooling_device {
> > struct devfreq_cooling_power *power_ops;
> > u32 res_util;
> > int capped_state;
> > + struct dev_pm_qos_request req_max_freq;
> > };
> >
> > -/**
> > - * partition_enable_opps() - disable all opps above a given state
> > - * @dfc: Pointer to devfreq we are operating on
> > - * @cdev_state: cooling device state we're setting
> > - *
> > - * Go through the OPPs of the device, enabling all OPPs until
> > - * @cdev_state and disabling those frequencies above it.
> > - */
> > -static int partition_enable_opps(struct devfreq_cooling_device *dfc,
> > - unsigned long cdev_state)
> > -{
> > - int i;
> > - struct device *dev = dfc->devfreq->dev.parent;
> > -
> > - for (i = 0; i < dfc->freq_table_size; i++) {
> > - struct dev_pm_opp *opp;
> > - int ret = 0;
> > - unsigned int freq = dfc->freq_table[i];
> > - bool want_enable = i >= cdev_state ? true : false;
> > -
> > - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable);
> > -
> > - if (PTR_ERR(opp) == -ERANGE)
> > - continue;
> > - else if (IS_ERR(opp))
> > - return PTR_ERR(opp);
> > -
> > - dev_pm_opp_put(opp);
> > -
> > - if (want_enable)
> > - ret = dev_pm_opp_enable(dev, freq);
> > - else
> > - ret = dev_pm_opp_disable(dev, freq);
> > -
> > - if (ret)
> > - return ret;
> > - }
> > -
> > - return 0;
> > -}
> > -
> > static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> > unsigned long *state)
> > {
> > @@ -134,7 +96,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> > struct devfreq_cooling_device *dfc = cdev->devdata;
> > struct devfreq *df = dfc->devfreq;
> > struct device *dev = df->dev.parent;
> > - int ret;
> > + unsigned long freq;
> >
> > if (state == dfc->cooling_state)
> > return 0;
> > @@ -144,9 +106,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev,
> > if (state >= dfc->freq_table_size)
> > return -EINVAL;
> >
> > - ret = partition_enable_opps(dfc, state);
> > - if (ret)
> > - return ret;
> > + freq = dfc->freq_table[state];
> > +
> > + dev_pm_qos_update_request(&dfc->req_max_freq,
> > + DIV_ROUND_UP(freq, HZ_PER_KHZ));
> >
> > dfc->cooling_state = state;
> >
> > @@ -529,6 +492,12 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> > if (err)
> > goto free_dfc;
> >
> > + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq,
> > + DEV_PM_QOS_MAX_FREQUENCY,
> > + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
> > + if (err < 0)
> > + goto remove_qos_req;
> > +
> > err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
> > if (err < 0)
> > goto free_tables;
> > @@ -552,6 +521,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df,
> >
> > release_ida:
> > ida_simple_remove(&devfreq_ida, dfc->id);
> > +
> > +remove_qos_req:
> > + dev_pm_qos_remove_request(&dfc->req_max_freq); > +
>
> A quirk of the dev_pm_qos API is that dev_pm_qos_remove_request prints a
> WARN splat if !dev_pm_qos_request_active and this can true on
> dev_pm_qos_add_request error.
>
> I dealt with this by checking dev_pm_qos_request_active explicitly but
> perhaps dev_pm_qos API could be changed? In general "free/release"
> functions shouldn't complain if there's nothing to do.

I think we should be good here if we jump to 'free_tables' if
_add_request() fails, as requested by Chanwoo. Then _remove_request() is
only called when _add_request() was successful.

> > free_tables:
> > kfree(dfc->power_table);
> > kfree(dfc->freq_table);
> > @@ -600,6 +573,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
> >
> > thermal_cooling_device_unregister(dfc->cdev);
> > ida_simple_remove(&devfreq_ida, dfc->id);
> > + dev_pm_qos_remove_request(&dfc->req_max_freq);
> > kfree(dfc->power_table);
> > kfree(dfc->freq_table);

2020-01-14 19:14:57

by Matthias Kaehlcke

[permalink] [raw]
Subject: Re: [PATCH 2/2] PM / devfreq: Use exclusively PM QoS to determine frequency limits

On Wed, Jan 15, 2020 at 02:35:48AM +0900, Chanwoo Choi wrote:
> On Wed, Jan 15, 2020 at 1:08 AM Leonard Crestez <[email protected]> wrote:
> >
> > On 13.01.2020 09:24, Chanwoo Choi wrote:
> > > Hi,
> > >
> > > Any device driver except for devfreq_cooling.c might
> > > use dev_pm_opp_enable/disable interface.
> > > So, don't need to remove the devfreq->scaling_max_freq
> > > and devfreq->scaling_min_freq for supporting OPP interface.
> >
> > It seems that devfreq_cooling was the only upstream user of
> > dev_pm_opp_enable and the remaining callers of dev_pm_opp_disable are
> > probe-time checks.
>
> OPP interface has still dev_pm_opp_enable and dev_pm_opp_disable
> function. As long as remains them, any device driver related to devfreq
> could call them at some time. The devfreq supports the OPP interface,
> not just for only devfreq_cooling.

I would like to remove the disabled OPP handling since no devfreq device
makes use of dev_pm_opp_enable/disable, but I fear you are right that
we have to keep it as long as the API is available.