While developing a krait cache scaling devfreq driver I encounter tons
of panics and errors with using the new cpufreq passive governor
functions. While the krait cache scaling is still WIP and required some
testing I would like to push all the fixes to make the new
implementation wroking since currently with a the governor
PROBE_DEFERRing all sort of things happen from kernel panic from invalid
address access to freq_table getting corrupted.
With the following fixes my WIP driver works correctly without any
warning/problems.
v4:
- Fix error with dev_err_probe
- Move cpu_parent_data deletion to dedicated function
- Drop PM / devfreq: Fix kernel panic with cpu based scaling
to passive gov (merged)
v3:
- Fix compilation error for list_for_each_entry_safe (i'm stupid -.-''')
v2:
- Fix wrong list_for_each_entry reported by Dan Carpenter
Christian 'Ansuel' Marangi (4):
PM / devfreq: Fix cpufreq passive unregister erroring on PROBE_DEFER
PM / devfreq: Fix kernel warning with cpufreq passive register fail
PM / devfreq: Rework freq_table to be local to devfreq struct
PM / devfreq: Mute warning on governor PROBE_DEFER
drivers/devfreq/devfreq.c | 76 +++++++++++++++---------------
drivers/devfreq/governor_passive.c | 54 ++++++++++-----------
include/linux/devfreq.h | 4 ++
3 files changed, 65 insertions(+), 69 deletions(-)
--
2.36.1
With the passive governor, the cpu based scaling can PROBE_DEFER due to
the fact that CPU policy are not ready.
The cpufreq passive unregister notifier is called both from the
GOV_START errors and for the GOV_STOP and assume the notifier is
successfully registred every time. With GOV_START failing it's wrong to
loop over each possible CPU since the register path has failed for
some CPU policy not ready. Change the logic and unregister the notifer
based on the current allocated parent_cpu_data list to correctly handle
errors and the governor unregister path.
Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
---
drivers/devfreq/governor_passive.c | 39 +++++++++++++-----------------
1 file changed, 17 insertions(+), 22 deletions(-)
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index 72c67979ebe1..95de336f20d5 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -34,6 +34,20 @@ get_parent_cpu_data(struct devfreq_passive_data *p_data,
return NULL;
}
+static void delete_parent_cpu_data(struct devfreq_passive_data *p_data)
+{
+ struct devfreq_cpu_data *parent_cpu_data, *tmp;
+
+ list_for_each_entry_safe(parent_cpu_data, tmp, &p_data->cpu_data_list, node) {
+ list_del(&parent_cpu_data->node);
+
+ if (parent_cpu_data->opp_table)
+ dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
+
+ kfree(parent_cpu_data);
+ }
+}
+
static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
struct opp_table *p_opp_table,
struct opp_table *opp_table,
@@ -222,8 +236,7 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
{
struct devfreq_passive_data *p_data
= (struct devfreq_passive_data *)devfreq->data;
- struct devfreq_cpu_data *parent_cpu_data;
- int cpu, ret = 0;
+ int ret;
if (p_data->nb.notifier_call) {
ret = cpufreq_unregister_notifier(&p_data->nb,
@@ -232,27 +245,9 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
return ret;
}
- for_each_possible_cpu(cpu) {
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
- if (!policy) {
- ret = -EINVAL;
- continue;
- }
-
- parent_cpu_data = get_parent_cpu_data(p_data, policy);
- if (!parent_cpu_data) {
- cpufreq_cpu_put(policy);
- continue;
- }
-
- list_del(&parent_cpu_data->node);
- if (parent_cpu_data->opp_table)
- dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
- kfree(parent_cpu_data);
- cpufreq_cpu_put(policy);
- }
+ delete_parent_cpu_data(p_data);
- return ret;
+ return 0;
}
static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
--
2.36.1
When the cpufreq passive register path from the passive governor fails,
the cpufreq_passive_unregister is called and a kernel WARNING is always
reported.
This is caused by the fact that the devfreq driver already call the
governor unregister with the GOV_STOP, for this reason the second
cpufreq_passive_unregister always return error and a WARN is printed
from the WARN_ON function.
Remove the unregister call from the error handling of the cpufreq register
notifier as it's fundamentally wrong and already handled by the devfreq
core code.
Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
---
drivers/devfreq/governor_passive.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index 95de336f20d5..dcc9dd518197 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -331,7 +331,6 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
err_put_policy:
cpufreq_cpu_put(policy);
err:
- WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
return ret;
}
--
2.36.1
Don't print warning when a governor PROBE_DEFER as it's not a real
GOV_START fail.
Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
---
drivers/devfreq/devfreq.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 2e2b3b414d67..6a39638ed064 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -931,8 +931,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
NULL);
if (err) {
- dev_err(dev, "%s: Unable to start governor for the device\n",
- __func__);
+ dev_err_probe(dev, err,
+ "%s: Unable to start governor for the device\n",
+ __func__);
goto err_init;
}
create_sysfs_files(devfreq, devfreq->governor);
--
2.36.1
Currently we reference the freq_table to the profile defined one and we
make changes on it. Devfreq never supported PROBE_DEFER before the cpu
based scaling support to the passive governor and assumed that a devfreq
device could only had error and be done with it.
Now that a device can PROBE_DEFER a rework to the freq_table logic is
required.
If a device PROBE_DEFER on the GOV_START, the freq_table is already set
in the device profile struct and its init is skipped. This is due to the
fact that it's common for devs to declare this kind of struct static.
This cause the devfreq logic to find a freq table declared (freq_table
not NULL) with random data and poiting to the old addrs freed by devm.
This problem CAN be solved by devs by clearing the freq_table in their
profile struct on driver exit path but it should not be trusted and it
looks to use a flawed logic.
A better solution is to move the freq_table and max_state to the
devfreq struct and never change the profile struct.
This permit to correctly handle PROBE_DEFER since the devfreq struct is
reallocated and contains new values.
Also the profile struct should only be used to init the driver and should
not be used by the devfreq to write the freq_table if it's not provided
by the driver.
Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
---
drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
drivers/devfreq/governor_passive.c | 14 +++---
include/linux/devfreq.h | 4 ++
3 files changed, 45 insertions(+), 44 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 01474daf4548..2e2b3b414d67 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
unsigned long *min_freq,
unsigned long *max_freq)
{
- unsigned long *freq_table = devfreq->profile->freq_table;
+ unsigned long *freq_table = devfreq->freq_table;
s32 qos_min_freq, qos_max_freq;
lockdep_assert_held(&devfreq->lock);
@@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
* The devfreq drivers can initialize this in either ascending or
* descending order and devfreq core supports both.
*/
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
+ if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
*min_freq = freq_table[0];
- *max_freq = freq_table[devfreq->profile->max_state - 1];
+ *max_freq = freq_table[devfreq->max_state - 1];
} else {
- *min_freq = freq_table[devfreq->profile->max_state - 1];
+ *min_freq = freq_table[devfreq->max_state - 1];
*max_freq = freq_table[0];
}
@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
{
int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++)
- if (freq == devfreq->profile->freq_table[lev])
+ for (lev = 0; lev < devfreq->max_state; lev++)
+ if (freq == devfreq->freq_table[lev])
return lev;
return -EINVAL;
@@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
static int set_freq_table(struct devfreq *devfreq)
{
- struct devfreq_dev_profile *profile = devfreq->profile;
struct dev_pm_opp *opp;
unsigned long freq;
int i, count;
@@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
if (count <= 0)
return -EINVAL;
- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
- profile->max_state,
- sizeof(*profile->freq_table),
- GFP_KERNEL);
- if (!profile->freq_table) {
- profile->max_state = 0;
+ devfreq->max_state = count;
+ devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
+ devfreq->max_state,
+ sizeof(*devfreq->freq_table),
+ GFP_KERNEL);
+ if (!devfreq->freq_table)
return -ENOMEM;
- }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
+ for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
if (IS_ERR(opp)) {
- devm_kfree(devfreq->dev.parent, profile->freq_table);
- profile->max_state = 0;
+ devm_kfree(devfreq->dev.parent, devfreq->freq_table);
return PTR_ERR(opp);
}
dev_pm_opp_put(opp);
- profile->freq_table[i] = freq;
+ devfreq->freq_table[i] = freq;
}
return 0;
@@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
if (lev != prev_lev) {
devfreq->stats.trans_table[
- (prev_lev * devfreq->profile->max_state) + lev]++;
+ (prev_lev * devfreq->max_state) + lev]++;
devfreq->stats.total_trans++;
}
@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
if (err < 0)
goto err_dev;
mutex_lock(&devfreq->lock);
+ } else {
+ devfreq->freq_table = devfreq->profile->freq_table;
+ devfreq->max_state = devfreq->profile->max_state;
}
devfreq->scaling_min_freq = find_available_min_freq(devfreq);
@@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
array3_size(sizeof(unsigned int),
- devfreq->profile->max_state,
- devfreq->profile->max_state),
+ devfreq->max_state,
+ devfreq->max_state),
GFP_KERNEL);
if (!devfreq->stats.trans_table) {
mutex_unlock(&devfreq->lock);
@@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
}
devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
- devfreq->profile->max_state,
+ devfreq->max_state,
sizeof(*devfreq->stats.time_in_state),
GFP_KERNEL);
if (!devfreq->stats.time_in_state) {
@@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++)
+ for (i = 0; i < df->max_state; i++)
count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
- "%lu ", df->profile->freq_table[i]);
+ "%lu ", df->freq_table[i]);
mutex_unlock(&df->lock);
/* Truncate the trailing space */
@@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
if (!df->profile)
return -EINVAL;
- max_state = df->profile->max_state;
+ max_state = df->max_state;
if (max_state == 0)
return sprintf(buf, "Not Supported.\n");
@@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
len += sprintf(buf + len, " :");
for (i = 0; i < max_state; i++)
len += sprintf(buf + len, "%10lu",
- df->profile->freq_table[i]);
+ df->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n");
for (i = 0; i < max_state; i++) {
- if (df->profile->freq_table[i]
- == df->previous_freq) {
+ if (df->freq_table[i] == df->previous_freq)
len += sprintf(buf + len, "*");
- } else {
+ else
len += sprintf(buf + len, " ");
- }
- len += sprintf(buf + len, "%10lu:",
- df->profile->freq_table[i]);
+
+ len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
for (j = 0; j < max_state; j++)
len += sprintf(buf + len, "%10u",
df->stats.trans_table[(i * max_state) + j]);
@@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
if (!df->profile)
return -EINVAL;
- if (df->profile->max_state == 0)
+ if (df->max_state == 0)
return count;
err = kstrtoint(buf, 10, &value);
@@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
return -EINVAL;
mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
+ memset(df->stats.time_in_state, 0, (df->max_state *
sizeof(*df->stats.time_in_state)));
memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
- df->profile->max_state,
- df->profile->max_state));
+ df->max_state,
+ df->max_state));
df->stats.total_trans = 0;
df->stats.last_update = get_jiffies_64();
mutex_unlock(&df->lock);
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index dcc9dd518197..1810966fd61d 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -145,18 +145,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
goto out;
/* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
- if (parent_devfreq->profile->freq_table[i] == *freq)
+ for (i = 0; i < parent_devfreq->max_state; i++)
+ if (parent_devfreq->freq_table[i] == *freq)
break;
- if (i == parent_devfreq->profile->max_state)
+ if (i == parent_devfreq->max_state)
return -EINVAL;
- if (i < devfreq->profile->max_state) {
- child_freq = devfreq->profile->freq_table[i];
+ if (i < devfreq->max_state) {
+ child_freq = devfreq->freq_table[i];
} else {
- count = devfreq->profile->max_state;
- child_freq = devfreq->profile->freq_table[count - 1];
+ count = devfreq->max_state;
+ child_freq = devfreq->freq_table[count - 1];
}
out:
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index dc10bee75a72..770a7532655c 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -185,6 +185,10 @@ struct devfreq {
struct notifier_block nb;
struct delayed_work work;
+ /* devfreq local freq_table */
+ unsigned long *freq_table;
+ unsigned int max_state;
+
unsigned long previous_freq;
struct devfreq_dev_status last_status;
--
2.36.1
On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> Don't print warning when a governor PROBE_DEFER as it's not a real
> GOV_START fail.
>
> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> ---
> drivers/devfreq/devfreq.c | 5 +++--
> 1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 2e2b3b414d67..6a39638ed064 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -931,8 +931,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
> NULL);
> if (err) {
> - dev_err(dev, "%s: Unable to start governor for the device\n",
> - __func__);
> + dev_err_probe(dev, err,
> + "%s: Unable to start governor for the device\n",
> + __func__);
> goto err_init;
> }
> create_sysfs_files(devfreq, devfreq->governor);
In order to keep the left-align with above error log
when try_then_request_governor() is failed,
I recommend to use the tab without space indentation as following:
If you have no objection, I'll merge this change.
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 01474daf4548..80a1235ef8fb 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -932,8 +932,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
NULL);
if (err) {
- dev_err(dev, "%s: Unable to start governor for the device\n",
- __func__);
+ dev_err_probe(dev, err,
+ "%s: Unable to start governor for the device\n",
+ __func__);
goto err_init;
}
create_sysfs_files(devfreq, devfreq->governor);
--
Best Regards,
Samsung Electronics
Chanwoo Choi
On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> When the cpufreq passive register path from the passive governor fails,
> the cpufreq_passive_unregister is called and a kernel WARNING is always
> reported.
> This is caused by the fact that the devfreq driver already call the
> governor unregister with the GOV_STOP, for this reason the second
> cpufreq_passive_unregister always return error and a WARN is printed
> from the WARN_ON function.
> Remove the unregister call from the error handling of the cpufreq register
> notifier as it's fundamentally wrong and already handled by the devfreq
> core code.
>
> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> ---
> drivers/devfreq/governor_passive.c | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index 95de336f20d5..dcc9dd518197 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -331,7 +331,6 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
> err_put_policy:
> cpufreq_cpu_put(policy);
> err:
> - WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
>
> return ret;
> }
I think that it is necessary to free the resource when error happen.
Also, after merging the your patch1, I think that cpufreq_passive_unregister_notifier(devfreq)
will not return error. Instead, just 0 for success.
Instead, 'err_free_cpu_data' and 'err_put_policy' goto statement are wrong exception
handling. If fix the exception handling code in cpufreq_passive_register_notifier
as following and with your patch1, I'll handle the resource for free/un-registration
when error happen during cpufreq_passive_register_notifier.
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index a35b39ac656c..0246e0731fc0 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -289,22 +289,23 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
parent_cpu_data = kzalloc(sizeof(*parent_cpu_data),
GFP_KERNEL);
if (!parent_cpu_data) {
+ cpufreq_cpu_put(policy);
ret = -ENOMEM;
- goto err_put_policy;
+ goto err;
}
cpu_dev = get_cpu_device(cpu);
if (!cpu_dev) {
dev_err(dev, "failed to get cpu device\n");
ret = -ENODEV;
- goto err_free_cpu_data;
+ goto err;
}
opp_table = dev_pm_opp_get_opp_table(cpu_dev);
if (IS_ERR(opp_table)) {
dev_err(dev, "failed to get opp_table of cpu%d\n", cpu);
ret = PTR_ERR(opp_table);
- goto err_free_cpu_data;
+ goto err;
}
parent_cpu_data->dev = cpu_dev;
@@ -326,10 +327,6 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
return ret;
-err_free_cpu_data:
- kfree(parent_cpu_data);
-err_put_policy:
- cpufreq_cpu_put(policy);
err:
WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
--
Best Regards,
Samsung Electronics
Chanwoo Choi
On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> With the passive governor, the cpu based scaling can PROBE_DEFER due to
> the fact that CPU policy are not ready.
> The cpufreq passive unregister notifier is called both from the
> GOV_START errors and for the GOV_STOP and assume the notifier is
> successfully registred every time. With GOV_START failing it's wrong to
> loop over each possible CPU since the register path has failed for
> some CPU policy not ready. Change the logic and unregister the notifer
> based on the current allocated parent_cpu_data list to correctly handle
> errors and the governor unregister path.
>
> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> ---
> drivers/devfreq/governor_passive.c | 39 +++++++++++++-----------------
> 1 file changed, 17 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index 72c67979ebe1..95de336f20d5 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -34,6 +34,20 @@ get_parent_cpu_data(struct devfreq_passive_data *p_data,
> return NULL;
> }
>
> +static void delete_parent_cpu_data(struct devfreq_passive_data *p_data)
> +{
> + struct devfreq_cpu_data *parent_cpu_data, *tmp;
> +
Need to add the validation checking of argument as following:
if (!p_data)
return;
> + list_for_each_entry_safe(parent_cpu_data, tmp, &p_data->cpu_data_list, node) {
> + list_del(&parent_cpu_data->node);
> +
> + if (parent_cpu_data->opp_table)
> + dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
> +
> + kfree(parent_cpu_data);
> + }
> +}
> +
> static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
> struct opp_table *p_opp_table,
> struct opp_table *opp_table,
> @@ -222,8 +236,7 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
> {
> struct devfreq_passive_data *p_data
> = (struct devfreq_passive_data *)devfreq->data;
> - struct devfreq_cpu_data *parent_cpu_data;
> - int cpu, ret = 0;
> + int ret;
>
> if (p_data->nb.notifier_call) {
> ret = cpufreq_unregister_notifier(&p_data->nb,
> @@ -232,27 +245,9 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
> return ret;
> }
>
> - for_each_possible_cpu(cpu) {
> - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> - if (!policy) {
> - ret = -EINVAL;
> - continue;
> - }
> -
> - parent_cpu_data = get_parent_cpu_data(p_data, policy);
> - if (!parent_cpu_data) {
> - cpufreq_cpu_put(policy);
> - continue;
> - }
> -
> - list_del(&parent_cpu_data->node);
> - if (parent_cpu_data->opp_table)
> - dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
> - kfree(parent_cpu_data);
> - cpufreq_cpu_put(policy);
> - }
> + delete_parent_cpu_data(p_data);
>
> - return ret;
> + return 0;
> }
>
> static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
--
Best Regards,
Samsung Electronics
Chanwoo Choi
On Wed, Jun 15, 2022 at 03:48:03PM +0900, Chanwoo Choi wrote:
> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> > With the passive governor, the cpu based scaling can PROBE_DEFER due to
> > the fact that CPU policy are not ready.
> > The cpufreq passive unregister notifier is called both from the
> > GOV_START errors and for the GOV_STOP and assume the notifier is
> > successfully registred every time. With GOV_START failing it's wrong to
> > loop over each possible CPU since the register path has failed for
> > some CPU policy not ready. Change the logic and unregister the notifer
> > based on the current allocated parent_cpu_data list to correctly handle
> > errors and the governor unregister path.
> >
> > Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> > Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> > ---
> > drivers/devfreq/governor_passive.c | 39 +++++++++++++-----------------
> > 1 file changed, 17 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> > index 72c67979ebe1..95de336f20d5 100644
> > --- a/drivers/devfreq/governor_passive.c
> > +++ b/drivers/devfreq/governor_passive.c
> > @@ -34,6 +34,20 @@ get_parent_cpu_data(struct devfreq_passive_data *p_data,
> > return NULL;
> > }
> >
> > +static void delete_parent_cpu_data(struct devfreq_passive_data *p_data)
> > +{
> > + struct devfreq_cpu_data *parent_cpu_data, *tmp;
> > +
>
> Need to add the validation checking of argument as following:
>
> if (!p_data)
> return;
>
Considering this is called only by cpufreq_passive_unregister_notifier
and cpufreq_passive_unregister_notifier is called only by devfreq_passive_event_handler
where the check is already done, isn't that redundant.
We should never reach delete_parent_cpu_data with no p_data.
(Unless you want to use that function somewhere else)
> > + list_for_each_entry_safe(parent_cpu_data, tmp, &p_data->cpu_data_list, node) {
> > + list_del(&parent_cpu_data->node);
> > +
> > + if (parent_cpu_data->opp_table)
> > + dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
> > +
> > + kfree(parent_cpu_data);
> > + }
> > +}
> > +
> > static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
> > struct opp_table *p_opp_table,
> > struct opp_table *opp_table,
> > @@ -222,8 +236,7 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
> > {
> > struct devfreq_passive_data *p_data
> > = (struct devfreq_passive_data *)devfreq->data;
> > - struct devfreq_cpu_data *parent_cpu_data;
> > - int cpu, ret = 0;
> > + int ret;
> >
> > if (p_data->nb.notifier_call) {
> > ret = cpufreq_unregister_notifier(&p_data->nb,
> > @@ -232,27 +245,9 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
> > return ret;
> > }
> >
> > - for_each_possible_cpu(cpu) {
> > - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> > - if (!policy) {
> > - ret = -EINVAL;
> > - continue;
> > - }
> > -
> > - parent_cpu_data = get_parent_cpu_data(p_data, policy);
> > - if (!parent_cpu_data) {
> > - cpufreq_cpu_put(policy);
> > - continue;
> > - }
> > -
> > - list_del(&parent_cpu_data->node);
> > - if (parent_cpu_data->opp_table)
> > - dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
> > - kfree(parent_cpu_data);
> > - cpufreq_cpu_put(policy);
> > - }
> > + delete_parent_cpu_data(p_data);
> >
> > - return ret;
> > + return 0;
> > }
> >
> > static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
>
>
> --
> Best Regards,
> Samsung Electronics
> Chanwoo Choi
--
Ansuel
On Wed, Jun 15, 2022 at 04:11:13PM +0900, Chanwoo Choi wrote:
> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> > When the cpufreq passive register path from the passive governor fails,
> > the cpufreq_passive_unregister is called and a kernel WARNING is always
> > reported.
> > This is caused by the fact that the devfreq driver already call the
> > governor unregister with the GOV_STOP, for this reason the second
> > cpufreq_passive_unregister always return error and a WARN is printed
> > from the WARN_ON function.
> > Remove the unregister call from the error handling of the cpufreq register
> > notifier as it's fundamentally wrong and already handled by the devfreq
> > core code.
> >
> > Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> > Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> > ---
> > drivers/devfreq/governor_passive.c | 1 -
> > 1 file changed, 1 deletion(-)
> >
> > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> > index 95de336f20d5..dcc9dd518197 100644
> > --- a/drivers/devfreq/governor_passive.c
> > +++ b/drivers/devfreq/governor_passive.c
> > @@ -331,7 +331,6 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
> > err_put_policy:
> > cpufreq_cpu_put(policy);
> > err:
> > - WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
> >
> > return ret;
> > }
>
> I think that it is necessary to free the resource when error happen.
Thing is that it should not be done in the register. Following the flow
of the devfreq core code, if a gov fails to START, the gov STOP is
called and we correctly free our resources. In the current
implementation we call the free 2 times and the second time will always
print error as the notifier is already unregistered.
> Also, after merging the your patch1, I think that cpufreq_passive_unregister_notifier(devfreq)
> will not return error. Instead, just 0 for success.
With path1 we removed the error with the parent_cpu_data deletion but
the unregister error is still there.
>
> Instead, 'err_free_cpu_data' and 'err_put_policy' goto statement are wrong exception
> handling. If fix the exception handling code in cpufreq_passive_register_notifier
> as following and with your patch1, I'll handle the resource for free/un-registration
> when error happen during cpufreq_passive_register_notifier.
>
Don't know the main problem here is calling unregister 2 times.
>
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index a35b39ac656c..0246e0731fc0 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -289,22 +289,23 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
> parent_cpu_data = kzalloc(sizeof(*parent_cpu_data),
> GFP_KERNEL);
> if (!parent_cpu_data) {
> + cpufreq_cpu_put(policy);
> ret = -ENOMEM;
> - goto err_put_policy;
> + goto err;
> }
>
> cpu_dev = get_cpu_device(cpu);
> if (!cpu_dev) {
> dev_err(dev, "failed to get cpu device\n");
> ret = -ENODEV;
> - goto err_free_cpu_data;
> + goto err;
> }
>
> opp_table = dev_pm_opp_get_opp_table(cpu_dev);
> if (IS_ERR(opp_table)) {
> dev_err(dev, "failed to get opp_table of cpu%d\n", cpu);
> ret = PTR_ERR(opp_table);
> - goto err_free_cpu_data;
> + goto err;
> }
>
> parent_cpu_data->dev = cpu_dev;
> @@ -326,10 +327,6 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
>
> return ret;
>
> -err_free_cpu_data:
> - kfree(parent_cpu_data);
> -err_put_policy:
> - cpufreq_cpu_put(policy);
> err:
> WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
>
>
>
>
> --
> Best Regards,
> Samsung Electronics
> Chanwoo Choi
--
Ansuel
On Wed, Jun 15, 2022 at 03:56:31PM +0900, Chanwoo Choi wrote:
> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> > Don't print warning when a governor PROBE_DEFER as it's not a real
> > GOV_START fail.
> >
> > Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> > Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> > ---
> > drivers/devfreq/devfreq.c | 5 +++--
> > 1 file changed, 3 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> > index 2e2b3b414d67..6a39638ed064 100644
> > --- a/drivers/devfreq/devfreq.c
> > +++ b/drivers/devfreq/devfreq.c
> > @@ -931,8 +931,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
> > NULL);
> > if (err) {
> > - dev_err(dev, "%s: Unable to start governor for the device\n",
> > - __func__);
> > + dev_err_probe(dev, err,
> > + "%s: Unable to start governor for the device\n",
> > + __func__);
> > goto err_init;
> > }
> > create_sysfs_files(devfreq, devfreq->governor);
>
>
> In order to keep the left-align with above error log
> when try_then_request_governor() is failed,
> I recommend to use the tab without space indentation as following:
>
> If you have no objection, I'll merge this change.
>
Sure, good for me. Anyway I wonder if we can relax the hard limit for 80
for error print since we now can use 100, but your choice.
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 01474daf4548..80a1235ef8fb 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -932,8 +932,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
> NULL);
> if (err) {
> - dev_err(dev, "%s: Unable to start governor for the device\n",
> - __func__);
> + dev_err_probe(dev, err,
> + "%s: Unable to start governor for the device\n",
> + __func__);
> goto err_init;
> }
> create_sysfs_files(devfreq, devfreq->governor);
>
>
>
>
>
> --
> Best Regards,
> Samsung Electronics
> Chanwoo Choi
--
Ansuel
On 22. 6. 15. 18:22, Ansuel Smith wrote:
> On Wed, Jun 15, 2022 at 03:56:31PM +0900, Chanwoo Choi wrote:
>> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
>>> Don't print warning when a governor PROBE_DEFER as it's not a real
>>> GOV_START fail.
>>>
>>> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
>>> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
>>> ---
>>> drivers/devfreq/devfreq.c | 5 +++--
>>> 1 file changed, 3 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>>> index 2e2b3b414d67..6a39638ed064 100644
>>> --- a/drivers/devfreq/devfreq.c
>>> +++ b/drivers/devfreq/devfreq.c
>>> @@ -931,8 +931,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>> err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
>>> NULL);
>>> if (err) {
>>> - dev_err(dev, "%s: Unable to start governor for the device\n",
>>> - __func__);
>>> + dev_err_probe(dev, err,
>>> + "%s: Unable to start governor for the device\n",
>>> + __func__);
>>> goto err_init;
>>> }
>>> create_sysfs_files(devfreq, devfreq->governor);
>>
>>
>> In order to keep the left-align with above error log
>> when try_then_request_governor() is failed,
>> I recommend to use the tab without space indentation as following:
>>
>> If you have no objection, I'll merge this change.
>>
>
> Sure, good for me. Anyway I wonder if we can relax the hard limit for 80
> for error print since we now can use 100, but your choice.
My suggestion is not over 80 line. Applied it.
>
>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>> index 01474daf4548..80a1235ef8fb 100644
>> --- a/drivers/devfreq/devfreq.c
>> +++ b/drivers/devfreq/devfreq.c
>> @@ -932,8 +932,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
>> err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
>> NULL);
>> if (err) {
>> - dev_err(dev, "%s: Unable to start governor for the device\n",
>> - __func__);
>> + dev_err_probe(dev, err,
>> + "%s: Unable to start governor for the device\n",
>> + __func__);
>> goto err_init;
>> }
>> create_sysfs_files(devfreq, devfreq->governor);
>>
>>
>>
>>
>>
>> --
>> Best Regards,
>> Samsung Electronics
>> Chanwoo Choi
>
--
Best Regards,
Samsung Electronics
Chanwoo Choi
On 22. 6. 15. 18:13, Ansuel Smith wrote:
> On Wed, Jun 15, 2022 at 03:48:03PM +0900, Chanwoo Choi wrote:
>> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
>>> With the passive governor, the cpu based scaling can PROBE_DEFER due to
>>> the fact that CPU policy are not ready.
>>> The cpufreq passive unregister notifier is called both from the
>>> GOV_START errors and for the GOV_STOP and assume the notifier is
>>> successfully registred every time. With GOV_START failing it's wrong to
>>> loop over each possible CPU since the register path has failed for
>>> some CPU policy not ready. Change the logic and unregister the notifer
>>> based on the current allocated parent_cpu_data list to correctly handle
>>> errors and the governor unregister path.>>>
>>> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
>>> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
>>> ---
>>> drivers/devfreq/governor_passive.c | 39 +++++++++++++-----------------
>>> 1 file changed, 17 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
>>> index 72c67979ebe1..95de336f20d5 100644
>>> --- a/drivers/devfreq/governor_passive.c
>>> +++ b/drivers/devfreq/governor_passive.c
>>> @@ -34,6 +34,20 @@ get_parent_cpu_data(struct devfreq_passive_data *p_data,
>>> return NULL;
>>> }
>>>
>>> +static void delete_parent_cpu_data(struct devfreq_passive_data *p_data)
>>> +{
>>> + struct devfreq_cpu_data *parent_cpu_data, *tmp;
>>> +
>>
>> Need to add the validation checking of argument as following:
>>
>> if (!p_data)
>> return;
>>
>
> Considering this is called only by cpufreq_passive_unregister_notifier
> and cpufreq_passive_unregister_notifier is called only by devfreq_passive_event_handler
> where the check is already done, isn't that redundant.
> We should never reach delete_parent_cpu_data with no p_data.
> (Unless you want to use that function somewhere else)
Actually, right as you mentioned. I'd like to check the parameter validation
on each function. But, I agree to keep this path without checking p_data.
If needed on later, I'll do that.
Applied it. Thanks.
>
>>> + list_for_each_entry_safe(parent_cpu_data, tmp, &p_data->cpu_data_list, node) {
>>> + list_del(&parent_cpu_data->node);
>>> +
>>> + if (parent_cpu_data->opp_table)
>>> + dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
>>> +
>>> + kfree(parent_cpu_data);
>>> + }
>>> +}
>>> +
>>> static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
>>> struct opp_table *p_opp_table,
>>> struct opp_table *opp_table,
>>> @@ -222,8 +236,7 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
>>> {
>>> struct devfreq_passive_data *p_data
>>> = (struct devfreq_passive_data *)devfreq->data;
>>> - struct devfreq_cpu_data *parent_cpu_data;
>>> - int cpu, ret = 0;
>>> + int ret;
>>>
>>> if (p_data->nb.notifier_call) {
>>> ret = cpufreq_unregister_notifier(&p_data->nb,
>>> @@ -232,27 +245,9 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq)
>>> return ret;
>>> }
>>>
>>> - for_each_possible_cpu(cpu) {
>>> - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>>> - if (!policy) {
>>> - ret = -EINVAL;
>>> - continue;
>>> - }
>>> -
>>> - parent_cpu_data = get_parent_cpu_data(p_data, policy);
>>> - if (!parent_cpu_data) {
>>> - cpufreq_cpu_put(policy);
>>> - continue;
>>> - }
>>> -
>>> - list_del(&parent_cpu_data->node);
>>> - if (parent_cpu_data->opp_table)
>>> - dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
>>> - kfree(parent_cpu_data);
>>> - cpufreq_cpu_put(policy);
>>> - }
>>> + delete_parent_cpu_data(p_data);
>>>
>>> - return ret;
>>> + return 0;
>>> }
>>>
>>> static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
>>
>>
>> --
>> Best Regards,
>> Samsung Electronics
>> Chanwoo Choi
>
--
Best Regards,
Samsung Electronics
Chanwoo Choi
On 22. 6. 15. 18:20, Ansuel Smith wrote:
> On Wed, Jun 15, 2022 at 04:11:13PM +0900, Chanwoo Choi wrote:
>> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
>>> When the cpufreq passive register path from the passive governor fails,
>>> the cpufreq_passive_unregister is called and a kernel WARNING is always
>>> reported.
>>> This is caused by the fact that the devfreq driver already call the
>>> governor unregister with the GOV_STOP, for this reason the second
>>> cpufreq_passive_unregister always return error and a WARN is printed
>>> from the WARN_ON function.
>>> Remove the unregister call from the error handling of the cpufreq register
>>> notifier as it's fundamentally wrong and already handled by the devfreq
>>> core code.
If possible, could you make the patch description more simply?
>>>
>>> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
>>> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
>>> ---
>>> drivers/devfreq/governor_passive.c | 1 -
>>> 1 file changed, 1 deletion(-)
>>>
>>> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
>>> index 95de336f20d5..dcc9dd518197 100644
>>> --- a/drivers/devfreq/governor_passive.c
>>> +++ b/drivers/devfreq/governor_passive.c
>>> @@ -331,7 +331,6 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
>>> err_put_policy:
>>> cpufreq_cpu_put(policy);
>>> err:
>>> - WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
>>>
>>> return ret;
>>> }
>>
>> I think that it is necessary to free the resource when error happen.
>
> Thing is that it should not be done in the register. Following the flow
> of the devfreq core code, if a gov fails to START, the gov STOP is
> called and we correctly free our resources. In the current
> implementation we call the free 2 times and the second time will always
> print error as the notifier is already unregistered.
>
>> Also, after merging the your patch1, I think that cpufreq_passive_unregister_notifier(devfreq)
>> will not return error. Instead, just 0 for success.
>
> With path1 we removed the error with the parent_cpu_data deletion but
> the unregister error is still there.
>
>>
>> Instead, 'err_free_cpu_data' and 'err_put_policy' goto statement are wrong exception
>> handling. If fix the exception handling code in cpufreq_passive_register_notifier
>> as following and with your patch1, I'll handle the resource for free/un-registration
>> when error happen during cpufreq_passive_register_notifier.
>>
>
> Don't know the main problem here is calling unregister 2 times.
Ah. I understood. To fix the error path handling with unregister function
is called twice, I think that need to to fix it as following:
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index a35b39ac656c..8f38a63beefc 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -289,22 +289,25 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
parent_cpu_data = kzalloc(sizeof(*parent_cpu_data),
GFP_KERNEL);
if (!parent_cpu_data) {
+ cpufreq_cpu_put(policy);
ret = -ENOMEM;
- goto err_put_policy;
+ goto err;
}
cpu_dev = get_cpu_device(cpu);
if (!cpu_dev) {
dev_err(dev, "failed to get cpu device\n");
+ cpufreq_cpu_put(policy);
ret = -ENODEV;
- goto err_free_cpu_data;
+ goto err;
}
opp_table = dev_pm_opp_get_opp_table(cpu_dev);
if (IS_ERR(opp_table)) {
dev_err(dev, "failed to get opp_table of cpu%d\n", cpu);
+ cpufreq_cpu_put(policy);
ret = PTR_ERR(opp_table);
- goto err_free_cpu_data;
+ goto err;
}
parent_cpu_data->dev = cpu_dev;
@@ -324,15 +327,7 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
if (ret)
dev_err(dev, "failed to update the frequency\n");
- return ret;
-
-err_free_cpu_data:
- kfree(parent_cpu_data);
-err_put_policy:
- cpufreq_cpu_put(policy);
err:
- WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
-
return ret;
}
--
Best Regards,
Samsung Electronics
Chanwoo Choi
Hi,
Thanks for the good catch.
I think that this patch fixes issue of commit 0ec09ac2cebe
("PM / devfreq: Set the freq_table of devfreq device").
When some devfreq driver without using passive governor
faces on the all errors including PROBE_DEFER
after executing devfreq_add_device, this issue will happen.
Also need to check the devfreq device driver using freq_table
which was allocated dynamically. In case of devfreq/drivers/exynos-bus.c
used the devfreq->profile->freq_table after devfreq_add_device.
On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> Currently we reference the freq_table to the profile defined one and we
> make changes on it. Devfreq never supported PROBE_DEFER before the cpu
> based scaling support to the passive governor and assumed that a devfreq
> device could only had error and be done with it.
> Now that a device can PROBE_DEFER a rework to the freq_table logic is
> required.
>
> If a device PROBE_DEFER on the GOV_START, the freq_table is already set
> in the device profile struct and its init is skipped. This is due to the
> fact that it's common for devs to declare this kind of struct static.
> This cause the devfreq logic to find a freq table declared (freq_table
> not NULL) with random data and poiting to the old addrs freed by devm.
> This problem CAN be solved by devs by clearing the freq_table in their
> profile struct on driver exit path but it should not be trusted and it
> looks to use a flawed logic.
>
> A better solution is to move the freq_table and max_state to the
> devfreq struct and never change the profile struct.
> This permit to correctly handle PROBE_DEFER since the devfreq struct is
> reallocated and contains new values.
> Also the profile struct should only be used to init the driver and should
> not be used by the devfreq to write the freq_table if it's not provided
> by the driver.
>
If possible, could you explain the patch description more simply?
Maybe, just focus on the 'freq_table' issue by device managed functions (devs).
> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> ---
> drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
> drivers/devfreq/governor_passive.c | 14 +++---
> include/linux/devfreq.h | 4 ++
> 3 files changed, 45 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 01474daf4548..2e2b3b414d67 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> unsigned long *min_freq,
> unsigned long *max_freq)
> {
> - unsigned long *freq_table = devfreq->profile->freq_table;
> + unsigned long *freq_table = devfreq->freq_table;
> s32 qos_min_freq, qos_max_freq;
>
> lockdep_assert_held(&devfreq->lock);
> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> * The devfreq drivers can initialize this in either ascending or
> * descending order and devfreq core supports both.
> */
> - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> *min_freq = freq_table[0];
> - *max_freq = freq_table[devfreq->profile->max_state - 1];
> + *max_freq = freq_table[devfreq->max_state - 1];
> } else {
> - *min_freq = freq_table[devfreq->profile->max_state - 1];
> + *min_freq = freq_table[devfreq->max_state - 1];
> *max_freq = freq_table[0];
> }
>
> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> {
> int lev;
>
> - for (lev = 0; lev < devfreq->profile->max_state; lev++)
> - if (freq == devfreq->profile->freq_table[lev])
> + for (lev = 0; lev < devfreq->max_state; lev++)
> + if (freq == devfreq->freq_table[lev])
> return lev;
>
> return -EINVAL;
> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>
> static int set_freq_table(struct devfreq *devfreq)
> {
> - struct devfreq_dev_profile *profile = devfreq->profile;
> struct dev_pm_opp *opp;
> unsigned long freq;
> int i, count;
> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> if (count <= 0)
> return -EINVAL;
>
> - profile->max_state = count;
> - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> - profile->max_state,
> - sizeof(*profile->freq_table),
> - GFP_KERNEL);
> - if (!profile->freq_table) {
> - profile->max_state = 0;
> + devfreq->max_state = count;
> + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> + devfreq->max_state,
> + sizeof(*devfreq->freq_table),
> + GFP_KERNEL);
> + if (!devfreq->freq_table)
> return -ENOMEM;
> - }
>
> - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> if (IS_ERR(opp)) {
> - devm_kfree(devfreq->dev.parent, profile->freq_table);
> - profile->max_state = 0;
> + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> return PTR_ERR(opp);
> }
> dev_pm_opp_put(opp);
> - profile->freq_table[i] = freq;
> + devfreq->freq_table[i] = freq;
> }
>
> return 0;
> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>
> if (lev != prev_lev) {
> devfreq->stats.trans_table[
> - (prev_lev * devfreq->profile->max_state) + lev]++;
> + (prev_lev * devfreq->max_state) + lev]++;
> devfreq->stats.total_trans++;
> }
>
> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> if (err < 0)
> goto err_dev;
> mutex_lock(&devfreq->lock);
> + } else {
> + devfreq->freq_table = devfreq->profile->freq_table;
> + devfreq->max_state = devfreq->profile->max_state;
> }
>
> devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>
> devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> array3_size(sizeof(unsigned int),
> - devfreq->profile->max_state,
> - devfreq->profile->max_state),
> + devfreq->max_state,
> + devfreq->max_state),
> GFP_KERNEL);
> if (!devfreq->stats.trans_table) {
> mutex_unlock(&devfreq->lock);
> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> }
>
> devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> - devfreq->profile->max_state,
> + devfreq->max_state,
> sizeof(*devfreq->stats.time_in_state),
> GFP_KERNEL);
> if (!devfreq->stats.time_in_state) {
> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>
> mutex_lock(&df->lock);
>
> - for (i = 0; i < df->profile->max_state; i++)
> + for (i = 0; i < df->max_state; i++)
> count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> - "%lu ", df->profile->freq_table[i]);
> + "%lu ", df->freq_table[i]);
>
> mutex_unlock(&df->lock);
> /* Truncate the trailing space */
> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>
> if (!df->profile)
> return -EINVAL;
> - max_state = df->profile->max_state;
> + max_state = df->max_state;
>
> if (max_state == 0)
> return sprintf(buf, "Not Supported.\n");
> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> len += sprintf(buf + len, " :");
> for (i = 0; i < max_state; i++)
> len += sprintf(buf + len, "%10lu",
> - df->profile->freq_table[i]);
> + df->freq_table[i]);
>
> len += sprintf(buf + len, " time(ms)\n");
>
> for (i = 0; i < max_state; i++) {
> - if (df->profile->freq_table[i]
> - == df->previous_freq) {
> + if (df->freq_table[i] == df->previous_freq)
> len += sprintf(buf + len, "*");
> - } else {
> + else
> len += sprintf(buf + len, " ");
> - }
> - len += sprintf(buf + len, "%10lu:",
> - df->profile->freq_table[i]);
> +
> + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> for (j = 0; j < max_state; j++)
> len += sprintf(buf + len, "%10u",
> df->stats.trans_table[(i * max_state) + j]);
> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> if (!df->profile)
> return -EINVAL;
>
> - if (df->profile->max_state == 0)
> + if (df->max_state == 0)
> return count;
>
> err = kstrtoint(buf, 10, &value);
> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> return -EINVAL;
>
> mutex_lock(&df->lock);
> - memset(df->stats.time_in_state, 0, (df->profile->max_state *
> + memset(df->stats.time_in_state, 0, (df->max_state *
> sizeof(*df->stats.time_in_state)));
> memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> - df->profile->max_state,
> - df->profile->max_state));
> + df->max_state,
> + df->max_state));
> df->stats.total_trans = 0;
> df->stats.last_update = get_jiffies_64();
> mutex_unlock(&df->lock);
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index dcc9dd518197..1810966fd61d 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -145,18 +145,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> goto out;
>
> /* Use interpolation if required opps is not available */
> - for (i = 0; i < parent_devfreq->profile->max_state; i++)
> - if (parent_devfreq->profile->freq_table[i] == *freq)
> + for (i = 0; i < parent_devfreq->max_state; i++)
> + if (parent_devfreq->freq_table[i] == *freq)
> break;
>
> - if (i == parent_devfreq->profile->max_state)
> + if (i == parent_devfreq->max_state)
> return -EINVAL;
>
> - if (i < devfreq->profile->max_state) {
> - child_freq = devfreq->profile->freq_table[i];
> + if (i < devfreq->max_state) {
> + child_freq = devfreq->freq_table[i];
> } else {
> - count = devfreq->profile->max_state;
> - child_freq = devfreq->profile->freq_table[count - 1];
> + count = devfreq->max_state;
> + child_freq = devfreq->freq_table[count - 1];
> }
>
> out:
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index dc10bee75a72..770a7532655c 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -185,6 +185,10 @@ struct devfreq {
> struct notifier_block nb;
> struct delayed_work work;
>
> + /* devfreq local freq_table */
> + unsigned long *freq_table;
> + unsigned int max_state;
> +
> unsigned long previous_freq;
> struct devfreq_dev_status last_status;
>
--
Best Regards,
Samsung Electronics
Chanwoo Choi
On Sat, Jun 18, 2022 at 04:33:19AM +0900, Chanwoo Choi wrote:
> Hi,
>
> Thanks for the good catch.
>
>
> I think that this patch fixes issue of commit 0ec09ac2cebe
> ("PM / devfreq: Set the freq_table of devfreq device").
>
Should I add this to the Fixes list?
> When some devfreq driver without using passive governor
> faces on the all errors including PROBE_DEFER
> after executing devfreq_add_device, this issue will happen.
>
> Also need to check the devfreq device driver using freq_table
> which was allocated dynamically. In case of devfreq/drivers/exynos-bus.c
> used the devfreq->profile->freq_table after devfreq_add_device.
>
> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> > Currently we reference the freq_table to the profile defined one and we
> > make changes on it. Devfreq never supported PROBE_DEFER before the cpu
> > based scaling support to the passive governor and assumed that a devfreq
> > device could only had error and be done with it.
> > Now that a device can PROBE_DEFER a rework to the freq_table logic is
> > required.
> >
> > If a device PROBE_DEFER on the GOV_START, the freq_table is already set
> > in the device profile struct and its init is skipped. This is due to the
> > fact that it's common for devs to declare this kind of struct static.
> > This cause the devfreq logic to find a freq table declared (freq_table
> > not NULL) with random data and poiting to the old addrs freed by devm.
> > This problem CAN be solved by devs by clearing the freq_table in their
> > profile struct on driver exit path but it should not be trusted and it
> > looks to use a flawed logic.
> >
> > A better solution is to move the freq_table and max_state to the
> > devfreq struct and never change the profile struct.
> > This permit to correctly handle PROBE_DEFER since the devfreq struct is
> > reallocated and contains new values.
> > Also the profile struct should only be used to init the driver and should
> > not be used by the devfreq to write the freq_table if it's not provided
> > by the driver.
> >
>
> If possible, could you explain the patch description more simply?
> Maybe, just focus on the 'freq_table' issue by device managed functions (devs).
>
Sure!
>
> > Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> > Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> > ---
> > drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
> > drivers/devfreq/governor_passive.c | 14 +++---
> > include/linux/devfreq.h | 4 ++
> > 3 files changed, 45 insertions(+), 44 deletions(-)
> >
> > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> > index 01474daf4548..2e2b3b414d67 100644
> > --- a/drivers/devfreq/devfreq.c
> > +++ b/drivers/devfreq/devfreq.c
> > @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > unsigned long *min_freq,
> > unsigned long *max_freq)
> > {
> > - unsigned long *freq_table = devfreq->profile->freq_table;
> > + unsigned long *freq_table = devfreq->freq_table;
> > s32 qos_min_freq, qos_max_freq;
> >
> > lockdep_assert_held(&devfreq->lock);
> > @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > * The devfreq drivers can initialize this in either ascending or
> > * descending order and devfreq core supports both.
> > */
> > - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> > + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> > *min_freq = freq_table[0];
> > - *max_freq = freq_table[devfreq->profile->max_state - 1];
> > + *max_freq = freq_table[devfreq->max_state - 1];
> > } else {
> > - *min_freq = freq_table[devfreq->profile->max_state - 1];
> > + *min_freq = freq_table[devfreq->max_state - 1];
> > *max_freq = freq_table[0];
> > }
> >
> > @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> > {
> > int lev;
> >
> > - for (lev = 0; lev < devfreq->profile->max_state; lev++)
> > - if (freq == devfreq->profile->freq_table[lev])
> > + for (lev = 0; lev < devfreq->max_state; lev++)
> > + if (freq == devfreq->freq_table[lev])
> > return lev;
> >
> > return -EINVAL;
> > @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> >
> > static int set_freq_table(struct devfreq *devfreq)
> > {
> > - struct devfreq_dev_profile *profile = devfreq->profile;
> > struct dev_pm_opp *opp;
> > unsigned long freq;
> > int i, count;
> > @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> > if (count <= 0)
> > return -EINVAL;
> >
> > - profile->max_state = count;
> > - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> > - profile->max_state,
> > - sizeof(*profile->freq_table),
> > - GFP_KERNEL);
> > - if (!profile->freq_table) {
> > - profile->max_state = 0;
> > + devfreq->max_state = count;
> > + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> > + devfreq->max_state,
> > + sizeof(*devfreq->freq_table),
> > + GFP_KERNEL);
> > + if (!devfreq->freq_table)
> > return -ENOMEM;
> > - }
> >
> > - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> > + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> > opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> > if (IS_ERR(opp)) {
> > - devm_kfree(devfreq->dev.parent, profile->freq_table);
> > - profile->max_state = 0;
> > + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> > return PTR_ERR(opp);
> > }
> > dev_pm_opp_put(opp);
> > - profile->freq_table[i] = freq;
> > + devfreq->freq_table[i] = freq;
> > }
> >
> > return 0;
> > @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
> >
> > if (lev != prev_lev) {
> > devfreq->stats.trans_table[
> > - (prev_lev * devfreq->profile->max_state) + lev]++;
> > + (prev_lev * devfreq->max_state) + lev]++;
> > devfreq->stats.total_trans++;
> > }
> >
> > @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > if (err < 0)
> > goto err_dev;
> > mutex_lock(&devfreq->lock);
> > + } else {
> > + devfreq->freq_table = devfreq->profile->freq_table;
> > + devfreq->max_state = devfreq->profile->max_state;
> > }
> >
> > devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> > @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >
> > devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> > array3_size(sizeof(unsigned int),
> > - devfreq->profile->max_state,
> > - devfreq->profile->max_state),
> > + devfreq->max_state,
> > + devfreq->max_state),
> > GFP_KERNEL);
> > if (!devfreq->stats.trans_table) {
> > mutex_unlock(&devfreq->lock);
> > @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > }
> >
> > devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> > - devfreq->profile->max_state,
> > + devfreq->max_state,
> > sizeof(*devfreq->stats.time_in_state),
> > GFP_KERNEL);
> > if (!devfreq->stats.time_in_state) {
> > @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
> >
> > mutex_lock(&df->lock);
> >
> > - for (i = 0; i < df->profile->max_state; i++)
> > + for (i = 0; i < df->max_state; i++)
> > count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> > - "%lu ", df->profile->freq_table[i]);
> > + "%lu ", df->freq_table[i]);
> >
> > mutex_unlock(&df->lock);
> > /* Truncate the trailing space */
> > @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
> >
> > if (!df->profile)
> > return -EINVAL;
> > - max_state = df->profile->max_state;
> > + max_state = df->max_state;
> >
> > if (max_state == 0)
> > return sprintf(buf, "Not Supported.\n");
> > @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> > len += sprintf(buf + len, " :");
> > for (i = 0; i < max_state; i++)
> > len += sprintf(buf + len, "%10lu",
> > - df->profile->freq_table[i]);
> > + df->freq_table[i]);
> >
> > len += sprintf(buf + len, " time(ms)\n");
> >
> > for (i = 0; i < max_state; i++) {
> > - if (df->profile->freq_table[i]
> > - == df->previous_freq) {
> > + if (df->freq_table[i] == df->previous_freq)
> > len += sprintf(buf + len, "*");
> > - } else {
> > + else
> > len += sprintf(buf + len, " ");
> > - }
> > - len += sprintf(buf + len, "%10lu:",
> > - df->profile->freq_table[i]);
> > +
> > + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> > for (j = 0; j < max_state; j++)
> > len += sprintf(buf + len, "%10u",
> > df->stats.trans_table[(i * max_state) + j]);
> > @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> > if (!df->profile)
> > return -EINVAL;
> >
> > - if (df->profile->max_state == 0)
> > + if (df->max_state == 0)
> > return count;
> >
> > err = kstrtoint(buf, 10, &value);
> > @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> > return -EINVAL;
> >
> > mutex_lock(&df->lock);
> > - memset(df->stats.time_in_state, 0, (df->profile->max_state *
> > + memset(df->stats.time_in_state, 0, (df->max_state *
> > sizeof(*df->stats.time_in_state)));
> > memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> > - df->profile->max_state,
> > - df->profile->max_state));
> > + df->max_state,
> > + df->max_state));
> > df->stats.total_trans = 0;
> > df->stats.last_update = get_jiffies_64();
> > mutex_unlock(&df->lock);
> > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> > index dcc9dd518197..1810966fd61d 100644
> > --- a/drivers/devfreq/governor_passive.c
> > +++ b/drivers/devfreq/governor_passive.c
> > @@ -145,18 +145,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> > goto out;
> >
> > /* Use interpolation if required opps is not available */
> > - for (i = 0; i < parent_devfreq->profile->max_state; i++)
> > - if (parent_devfreq->profile->freq_table[i] == *freq)
> > + for (i = 0; i < parent_devfreq->max_state; i++)
> > + if (parent_devfreq->freq_table[i] == *freq)
> > break;
> >
> > - if (i == parent_devfreq->profile->max_state)
> > + if (i == parent_devfreq->max_state)
> > return -EINVAL;
> >
> > - if (i < devfreq->profile->max_state) {
> > - child_freq = devfreq->profile->freq_table[i];
> > + if (i < devfreq->max_state) {
> > + child_freq = devfreq->freq_table[i];
> > } else {
> > - count = devfreq->profile->max_state;
> > - child_freq = devfreq->profile->freq_table[count - 1];
> > + count = devfreq->max_state;
> > + child_freq = devfreq->freq_table[count - 1];
> > }
> >
> > out:
> > diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> > index dc10bee75a72..770a7532655c 100644
> > --- a/include/linux/devfreq.h
> > +++ b/include/linux/devfreq.h
> > @@ -185,6 +185,10 @@ struct devfreq {
> > struct notifier_block nb;
> > struct delayed_work work;
> >
> > + /* devfreq local freq_table */
> > + unsigned long *freq_table;
> > + unsigned int max_state;
> > +
> > unsigned long previous_freq;
> > struct devfreq_dev_status last_status;
> >
>
>
> --
> Best Regards,
> Samsung Electronics
> Chanwoo Choi
--
Ansuel
On 22. 6. 18. 04:38, Christian Marangi wrote:
> On Sat, Jun 18, 2022 at 04:33:19AM +0900, Chanwoo Choi wrote:
>> Hi,
>>
>> Thanks for the good catch.
>>
>>
>> I think that this patch fixes issue of commit 0ec09ac2cebe
>> ("PM / devfreq: Set the freq_table of devfreq device").
>>
>
> Should I add this to the Fixes list?
Replace fixes commit information with commit 0ec09ac2cebe.
I'd like you to make the patch separated from this series
and then send them to linux-stable mailling list tool
because I think this patch should be maintained by linux stable patches.
>
>> When some devfreq driver without using passive governor
>> faces on the all errors including PROBE_DEFER
>> after executing devfreq_add_device, this issue will happen.
>>
>> Also need to check the devfreq device driver using freq_table
>> which was allocated dynamically. In case of devfreq/drivers/exynos-bus.c
>> used the devfreq->profile->freq_table after devfreq_add_device.
>>
>> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
>>> Currently we reference the freq_table to the profile defined one and we
>>> make changes on it. Devfreq never supported PROBE_DEFER before the cpu
>>> based scaling support to the passive governor and assumed that a devfreq
>>> device could only had error and be done with it.
>>> Now that a device can PROBE_DEFER a rework to the freq_table logic is
>>> required.
>>>
>>> If a device PROBE_DEFER on the GOV_START, the freq_table is already set
>>> in the device profile struct and its init is skipped. This is due to the
>>> fact that it's common for devs to declare this kind of struct static.
>>> This cause the devfreq logic to find a freq table declared (freq_table
>>> not NULL) with random data and poiting to the old addrs freed by devm.
>> > This problem CAN be solved by devs by clearing the freq_table in their
>>> profile struct on driver exit path but it should not be trusted and it
>>> looks to use a flawed logic.
>>>
>>> A better solution is to move the freq_table and max_state to the
>>> devfreq struct and never change the profile struct.
>>> This permit to correctly handle PROBE_DEFER since the devfreq struct is
>>> reallocated and contains new values.
>>> Also the profile struct should only be used to init the driver and should
>>> not be used by the devfreq to write the freq_table if it's not provided
>>> by the driver.
>>>
>>
>> If possible, could you explain the patch description more simply?
>> Maybe, just focus on the 'freq_table' issue by device managed functions (devs).
>>
>
> Sure!
Thanks.
>
>>
>>> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
>>> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
>>> ---
>>> drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
>>> drivers/devfreq/governor_passive.c | 14 +++---
>>> include/linux/devfreq.h | 4 ++
>>> 3 files changed, 45 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>>> index 01474daf4548..2e2b3b414d67 100644
>>> --- a/drivers/devfreq/devfreq.c
>>> +++ b/drivers/devfreq/devfreq.c
>>> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>>> unsigned long *min_freq,
>>> unsigned long *max_freq)
>>> {
>>> - unsigned long *freq_table = devfreq->profile->freq_table;
>>> + unsigned long *freq_table = devfreq->freq_table;
>>> s32 qos_min_freq, qos_max_freq;
>>>
>>> lockdep_assert_held(&devfreq->lock);
>>> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>>> * The devfreq drivers can initialize this in either ascending or
>>> * descending order and devfreq core supports both.
>>> */
>>> - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
>>> + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
>>> *min_freq = freq_table[0];
>>> - *max_freq = freq_table[devfreq->profile->max_state - 1];
>>> + *max_freq = freq_table[devfreq->max_state - 1];
>>> } else {
>>> - *min_freq = freq_table[devfreq->profile->max_state - 1];
>>> + *min_freq = freq_table[devfreq->max_state - 1];
>>> *max_freq = freq_table[0];
>>> }
>>>
>>> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>>> {
>>> int lev;
>>>
>>> - for (lev = 0; lev < devfreq->profile->max_state; lev++)
>>> - if (freq == devfreq->profile->freq_table[lev])
>>> + for (lev = 0; lev < devfreq->max_state; lev++)
>>> + if (freq == devfreq->freq_table[lev])
>>> return lev;
>>>
>>> return -EINVAL;
>>> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>>>
>>> static int set_freq_table(struct devfreq *devfreq)
>>> {
>>> - struct devfreq_dev_profile *profile = devfreq->profile;
>>> struct dev_pm_opp *opp;
>>> unsigned long freq;
>>> int i, count;
>>> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
>>> if (count <= 0)
>>> return -EINVAL;
>>>
>>> - profile->max_state = count;
>>> - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
>>> - profile->max_state,
>>> - sizeof(*profile->freq_table),
>>> - GFP_KERNEL);
>>> - if (!profile->freq_table) {
>>> - profile->max_state = 0;
>>> + devfreq->max_state = count;
>>> + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
>>> + devfreq->max_state,
>>> + sizeof(*devfreq->freq_table),
>>> + GFP_KERNEL);
>>> + if (!devfreq->freq_table)
>>> return -ENOMEM;
>>> - }
>>>
>>> - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
>>> + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
>>> opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
>>> if (IS_ERR(opp)) {
>>> - devm_kfree(devfreq->dev.parent, profile->freq_table);
>>> - profile->max_state = 0;
>>> + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
>>> return PTR_ERR(opp);
>>> }
>>> dev_pm_opp_put(opp);
>>> - profile->freq_table[i] = freq;
>>> + devfreq->freq_table[i] = freq;
>>> }
>>>
>>> return 0;
>>> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>>>
>>> if (lev != prev_lev) {
>>> devfreq->stats.trans_table[
>>> - (prev_lev * devfreq->profile->max_state) + lev]++;
>>> + (prev_lev * devfreq->max_state) + lev]++;
>>> devfreq->stats.total_trans++;
>>> }
>>>
>>> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>> if (err < 0)
>>> goto err_dev;
>>> mutex_lock(&devfreq->lock);
>>> + } else {
>>> + devfreq->freq_table = devfreq->profile->freq_table;
>>> + devfreq->max_state = devfreq->profile->max_state;
>>> }
>>>
>>> devfreq->scaling_min_freq = find_available_min_freq(devfreq);
>>> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>>
>>> devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
>>> array3_size(sizeof(unsigned int),
>>> - devfreq->profile->max_state,
>>> - devfreq->profile->max_state),
>>> + devfreq->max_state,
>>> + devfreq->max_state),
>>> GFP_KERNEL);
>>> if (!devfreq->stats.trans_table) {
>>> mutex_unlock(&devfreq->lock);
>>> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>> }
>>>
>>> devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
>>> - devfreq->profile->max_state,
>>> + devfreq->max_state,
>>> sizeof(*devfreq->stats.time_in_state),
>>> GFP_KERNEL);
>>> if (!devfreq->stats.time_in_state) {
>>> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>>>
>>> mutex_lock(&df->lock);
>>>
>>> - for (i = 0; i < df->profile->max_state; i++)
>>> + for (i = 0; i < df->max_state; i++)
>>> count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
>>> - "%lu ", df->profile->freq_table[i]);
>>> + "%lu ", df->freq_table[i]);
>>>
>>> mutex_unlock(&df->lock);
>>> /* Truncate the trailing space */
>>> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>>>
>>> if (!df->profile)
>>> return -EINVAL;
>>> - max_state = df->profile->max_state;
>>> + max_state = df->max_state;
>>>
>>> if (max_state == 0)
>>> return sprintf(buf, "Not Supported.\n");
>>> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
>>> len += sprintf(buf + len, " :");
>>> for (i = 0; i < max_state; i++)
>>> len += sprintf(buf + len, "%10lu",
>>> - df->profile->freq_table[i]);
>>> + df->freq_table[i]);
>>>
>>> len += sprintf(buf + len, " time(ms)\n");
>>>
>>> for (i = 0; i < max_state; i++) {
>>> - if (df->profile->freq_table[i]
>>> - == df->previous_freq) {
>>> + if (df->freq_table[i] == df->previous_freq)
>>> len += sprintf(buf + len, "*");
>>> - } else {
>>> + else
>>> len += sprintf(buf + len, " ");
>>> - }
>>> - len += sprintf(buf + len, "%10lu:",
>>> - df->profile->freq_table[i]);
>>> +
>>> + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
>>> for (j = 0; j < max_state; j++)
>>> len += sprintf(buf + len, "%10u",
>>> df->stats.trans_table[(i * max_state) + j]);
>>> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
>>> if (!df->profile)
>>> return -EINVAL;
>>>
>>> - if (df->profile->max_state == 0)
>>> + if (df->max_state == 0)
>>> return count;
>>>
>>> err = kstrtoint(buf, 10, &value);
>>> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
>>> return -EINVAL;
>>>
>>> mutex_lock(&df->lock);
>>> - memset(df->stats.time_in_state, 0, (df->profile->max_state *
>>> + memset(df->stats.time_in_state, 0, (df->max_state *
>>> sizeof(*df->stats.time_in_state)));
>>> memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
>>> - df->profile->max_state,
>>> - df->profile->max_state));
>>> + df->max_state,
>>> + df->max_state));
>>> df->stats.total_trans = 0;
>>> df->stats.last_update = get_jiffies_64();
>>> mutex_unlock(&df->lock);
>>> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
>>> index dcc9dd518197..1810966fd61d 100644
>>> --- a/drivers/devfreq/governor_passive.c
>>> +++ b/drivers/devfreq/governor_passive.c
>>> @@ -145,18 +145,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
>>> goto out;
>>>
>>> /* Use interpolation if required opps is not available */
>>> - for (i = 0; i < parent_devfreq->profile->max_state; i++)
>>> - if (parent_devfreq->profile->freq_table[i] == *freq)
>>> + for (i = 0; i < parent_devfreq->max_state; i++)
>>> + if (parent_devfreq->freq_table[i] == *freq)
>>> break;
>>>
>>> - if (i == parent_devfreq->profile->max_state)
>>> + if (i == parent_devfreq->max_state)
>>> return -EINVAL;
>>>
>>> - if (i < devfreq->profile->max_state) {
>>> - child_freq = devfreq->profile->freq_table[i];
>>> + if (i < devfreq->max_state) {
>>> + child_freq = devfreq->freq_table[i];
>>> } else {
>>> - count = devfreq->profile->max_state;
>>> - child_freq = devfreq->profile->freq_table[count - 1];
>>> + count = devfreq->max_state;
>>> + child_freq = devfreq->freq_table[count - 1];
>>> }
>>>
>>> out:
>>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>>> index dc10bee75a72..770a7532655c 100644
>>> --- a/include/linux/devfreq.h
>>> +++ b/include/linux/devfreq.h
>>> @@ -185,6 +185,10 @@ struct devfreq {
>>> struct notifier_block nb;
>>> struct delayed_work work;
>>>
>>> + /* devfreq local freq_table */
This comment is not necessary. Please remove it.
>>> + unsigned long *freq_table;
>>> + unsigned int max_state;
And need to add the description of newly added variables
to 'struct devfreq' structure description.
>>> +
>>> unsigned long previous_freq;
>>> struct devfreq_dev_status last_status;
>>>
>>
>>
>> --
>> Best Regards,
>> Samsung Electronics
>> Chanwoo Choi
>
--
Best Regards,
Samsung Electronics
Chanwoo Choi
On Sat, Jun 18, 2022 at 04:08:35AM +0900, Chanwoo Choi wrote:
> On 22. 6. 15. 18:20, Ansuel Smith wrote:
> > On Wed, Jun 15, 2022 at 04:11:13PM +0900, Chanwoo Choi wrote:
> >> On 22. 6. 15. 08:09, Christian 'Ansuel' Marangi wrote:
> >>> When the cpufreq passive register path from the passive governor fails,
> >>> the cpufreq_passive_unregister is called and a kernel WARNING is always
> >>> reported.
>
> >>> This is caused by the fact that the devfreq driver already call the
> >>> governor unregister with the GOV_STOP, for this reason the second
> >>> cpufreq_passive_unregister always return error and a WARN is printed
> >>> from the WARN_ON function.
>
> >>> Remove the unregister call from the error handling of the cpufreq register
> >>> notifier as it's fundamentally wrong and already handled by the devfreq
> >>> core code.
>
> If possible, could you make the patch description more simply?
>
> >>>
> >>> Fixes: a03dacb0316f ("PM / devfreq: Add cpu based scaling support to passive governor")
> >>> Signed-off-by: Christian 'Ansuel' Marangi <[email protected]>
> >>> ---
> >>> drivers/devfreq/governor_passive.c | 1 -
> >>> 1 file changed, 1 deletion(-)
> >>>
> >>> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> >>> index 95de336f20d5..dcc9dd518197 100644
> >>> --- a/drivers/devfreq/governor_passive.c
> >>> +++ b/drivers/devfreq/governor_passive.c
> >>> @@ -331,7 +331,6 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
> >>> err_put_policy:
> >>> cpufreq_cpu_put(policy);
> >>> err:
> >>> - WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
> >>>
> >>> return ret;
> >>> }
> >>
> >> I think that it is necessary to free the resource when error happen.
> >
> > Thing is that it should not be done in the register. Following the flow
> > of the devfreq core code, if a gov fails to START, the gov STOP is
> > called and we correctly free our resources. In the current
> > implementation we call the free 2 times and the second time will always
> > print error as the notifier is already unregistered.
> >
> >> Also, after merging the your patch1, I think that cpufreq_passive_unregister_notifier(devfreq)
> >> will not return error. Instead, just 0 for success.
> >
> > With path1 we removed the error with the parent_cpu_data deletion but
> > the unregister error is still there.
> >
> >>
> >> Instead, 'err_free_cpu_data' and 'err_put_policy' goto statement are wrong exception
> >> handling. If fix the exception handling code in cpufreq_passive_register_notifier
> >> as following and with your patch1, I'll handle the resource for free/un-registration
> >> when error happen during cpufreq_passive_register_notifier.
> >>
> >
> > Don't know the main problem here is calling unregister 2 times.
>
> Ah. I understood. To fix the error path handling with unregister function
> is called twice, I think that need to to fix it as following:
>
>
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index a35b39ac656c..8f38a63beefc 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -289,22 +289,25 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
> parent_cpu_data = kzalloc(sizeof(*parent_cpu_data),
> GFP_KERNEL);
> if (!parent_cpu_data) {
> + cpufreq_cpu_put(policy);
> ret = -ENOMEM;
> - goto err_put_policy;
> + goto err;
> }
>
> cpu_dev = get_cpu_device(cpu);
> if (!cpu_dev) {
> dev_err(dev, "failed to get cpu device\n");
> + cpufreq_cpu_put(policy);
> ret = -ENODEV;
> - goto err_free_cpu_data;
> + goto err;
> }
>
> opp_table = dev_pm_opp_get_opp_table(cpu_dev);
> if (IS_ERR(opp_table)) {
> dev_err(dev, "failed to get opp_table of cpu%d\n", cpu);
> + cpufreq_cpu_put(policy);
> ret = PTR_ERR(opp_table);
> - goto err_free_cpu_data;
> + goto err;
> }
>
> parent_cpu_data->dev = cpu_dev;
> @@ -324,15 +327,7 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
> if (ret)
> dev_err(dev, "failed to update the frequency\n");
>
> - return ret;
> -
> -err_free_cpu_data:
> - kfree(parent_cpu_data);
Wait! This would leak the just allocated parent_cpu_data in case of
error since we return before it's added to the cpu_data_list and it
won't be freed on the unregister function.
I'm sending this patch (since the other got merged) with the description
reworked. Think there isn't another way to have the code linear and
remove the ""double return"".
> -err_put_policy:
> - cpufreq_cpu_put(policy);
> err:
> - WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
> -
> return ret;
> }
>
>
> --
> Best Regards,
> Samsung Electronics
> Chanwoo Choi
--
Ansuel