2022-06-19 22:19:28

by Christian Marangi

[permalink] [raw]
Subject: [PATCH] PM / devfreq: Rework freq_table to be local to devfreq struct

On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
is never reset and may be leaved in an undefined state.

This comes from the fact that we store the freq_table in the driver
profile struct that is commonly defined as static and not reset on
PROBE_DEFER.
We currently skip the reinit of the freq_table if we found
it's already defined since a driver may declare his own freq_table.

This logic is flawed in the case devfreq core generate a freq_table, set
it in the profile struct and then PROBE_DEFER, freeing the freq_table.
In this case devfreq will found a NOT NULL freq_table that has been
freed, skip the freq_table generation and probe the driver based on the
wrong table.

To fix this and correctly handle PROBE_DEFER, use a local freq_table and
max_state in the devfreq struct and never modify the freq_table present
in the profile struct if it does provide it.

Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
Cc: [email protected]
Signed-off-by: Christian Marangi <[email protected]>
---
drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
drivers/devfreq/governor_passive.c | 14 +++---
include/linux/devfreq.h | 5 +++
3 files changed, 46 insertions(+), 44 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 01474daf4548..2e2b3b414d67 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
unsigned long *min_freq,
unsigned long *max_freq)
{
- unsigned long *freq_table = devfreq->profile->freq_table;
+ unsigned long *freq_table = devfreq->freq_table;
s32 qos_min_freq, qos_max_freq;

lockdep_assert_held(&devfreq->lock);
@@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
* The devfreq drivers can initialize this in either ascending or
* descending order and devfreq core supports both.
*/
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
+ if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
*min_freq = freq_table[0];
- *max_freq = freq_table[devfreq->profile->max_state - 1];
+ *max_freq = freq_table[devfreq->max_state - 1];
} else {
- *min_freq = freq_table[devfreq->profile->max_state - 1];
+ *min_freq = freq_table[devfreq->max_state - 1];
*max_freq = freq_table[0];
}

@@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
{
int lev;

- for (lev = 0; lev < devfreq->profile->max_state; lev++)
- if (freq == devfreq->profile->freq_table[lev])
+ for (lev = 0; lev < devfreq->max_state; lev++)
+ if (freq == devfreq->freq_table[lev])
return lev;

return -EINVAL;
@@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)

static int set_freq_table(struct devfreq *devfreq)
{
- struct devfreq_dev_profile *profile = devfreq->profile;
struct dev_pm_opp *opp;
unsigned long freq;
int i, count;
@@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
if (count <= 0)
return -EINVAL;

- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
- profile->max_state,
- sizeof(*profile->freq_table),
- GFP_KERNEL);
- if (!profile->freq_table) {
- profile->max_state = 0;
+ devfreq->max_state = count;
+ devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
+ devfreq->max_state,
+ sizeof(*devfreq->freq_table),
+ GFP_KERNEL);
+ if (!devfreq->freq_table)
return -ENOMEM;
- }

- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
+ for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
if (IS_ERR(opp)) {
- devm_kfree(devfreq->dev.parent, profile->freq_table);
- profile->max_state = 0;
+ devm_kfree(devfreq->dev.parent, devfreq->freq_table);
return PTR_ERR(opp);
}
dev_pm_opp_put(opp);
- profile->freq_table[i] = freq;
+ devfreq->freq_table[i] = freq;
}

return 0;
@@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)

if (lev != prev_lev) {
devfreq->stats.trans_table[
- (prev_lev * devfreq->profile->max_state) + lev]++;
+ (prev_lev * devfreq->max_state) + lev]++;
devfreq->stats.total_trans++;
}

@@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
if (err < 0)
goto err_dev;
mutex_lock(&devfreq->lock);
+ } else {
+ devfreq->freq_table = devfreq->profile->freq_table;
+ devfreq->max_state = devfreq->profile->max_state;
}

devfreq->scaling_min_freq = find_available_min_freq(devfreq);
@@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,

devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
array3_size(sizeof(unsigned int),
- devfreq->profile->max_state,
- devfreq->profile->max_state),
+ devfreq->max_state,
+ devfreq->max_state),
GFP_KERNEL);
if (!devfreq->stats.trans_table) {
mutex_unlock(&devfreq->lock);
@@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
}

devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
- devfreq->profile->max_state,
+ devfreq->max_state,
sizeof(*devfreq->stats.time_in_state),
GFP_KERNEL);
if (!devfreq->stats.time_in_state) {
@@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,

mutex_lock(&df->lock);

- for (i = 0; i < df->profile->max_state; i++)
+ for (i = 0; i < df->max_state; i++)
count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
- "%lu ", df->profile->freq_table[i]);
+ "%lu ", df->freq_table[i]);

mutex_unlock(&df->lock);
/* Truncate the trailing space */
@@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,

if (!df->profile)
return -EINVAL;
- max_state = df->profile->max_state;
+ max_state = df->max_state;

if (max_state == 0)
return sprintf(buf, "Not Supported.\n");
@@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
len += sprintf(buf + len, " :");
for (i = 0; i < max_state; i++)
len += sprintf(buf + len, "%10lu",
- df->profile->freq_table[i]);
+ df->freq_table[i]);

len += sprintf(buf + len, " time(ms)\n");

for (i = 0; i < max_state; i++) {
- if (df->profile->freq_table[i]
- == df->previous_freq) {
+ if (df->freq_table[i] == df->previous_freq)
len += sprintf(buf + len, "*");
- } else {
+ else
len += sprintf(buf + len, " ");
- }
- len += sprintf(buf + len, "%10lu:",
- df->profile->freq_table[i]);
+
+ len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
for (j = 0; j < max_state; j++)
len += sprintf(buf + len, "%10u",
df->stats.trans_table[(i * max_state) + j]);
@@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
if (!df->profile)
return -EINVAL;

- if (df->profile->max_state == 0)
+ if (df->max_state == 0)
return count;

err = kstrtoint(buf, 10, &value);
@@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
return -EINVAL;

mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
+ memset(df->stats.time_in_state, 0, (df->max_state *
sizeof(*df->stats.time_in_state)));
memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
- df->profile->max_state,
- df->profile->max_state));
+ df->max_state,
+ df->max_state));
df->stats.total_trans = 0;
df->stats.last_update = get_jiffies_64();
mutex_unlock(&df->lock);
diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index 72c67979ebe1..ce24a262aa16 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
goto out;

/* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
- if (parent_devfreq->profile->freq_table[i] == *freq)
+ for (i = 0; i < parent_devfreq->max_state; i++)
+ if (parent_devfreq->freq_table[i] == *freq)
break;

- if (i == parent_devfreq->profile->max_state)
+ if (i == parent_devfreq->max_state)
return -EINVAL;

- if (i < devfreq->profile->max_state) {
- child_freq = devfreq->profile->freq_table[i];
+ if (i < devfreq->max_state) {
+ child_freq = devfreq->freq_table[i];
} else {
- count = devfreq->profile->max_state;
- child_freq = devfreq->profile->freq_table[count - 1];
+ count = devfreq->max_state;
+ child_freq = devfreq->freq_table[count - 1];
}

out:
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index dc10bee75a72..34aab4dd336c 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -148,6 +148,8 @@ struct devfreq_stats {
* reevaluate operable frequencies. Devfreq users may use
* devfreq.nb to the corresponding register notifier call chain.
* @work: delayed work for load monitoring.
+ * @freq_table: current frequency table used by the devfreq driver.
+ * @max_state: count of entry present in the frequency table.
* @previous_freq: previously configured frequency value.
* @last_status: devfreq user device info, performance statistics
* @data: Private data of the governor. The devfreq framework does not
@@ -185,6 +187,9 @@ struct devfreq {
struct notifier_block nb;
struct delayed_work work;

+ unsigned long *freq_table;
+ unsigned int max_state;
+
unsigned long previous_freq;
struct devfreq_dev_status last_status;

--
2.36.1


2022-06-29 19:48:32

by Chanwoo Choi

[permalink] [raw]
Subject: Re: [PATCH] PM / devfreq: Rework freq_table to be local to devfreq struct

On 22. 6. 20. 07:03, Christian Marangi wrote:
> On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> is never reset and may be leaved in an undefined state.
>
> This comes from the fact that we store the freq_table in the driver
> profile struct that is commonly defined as static and not reset on
> PROBE_DEFER.
> We currently skip the reinit of the freq_table if we found
> it's already defined since a driver may declare his own freq_table.
>
> This logic is flawed in the case devfreq core generate a freq_table, set
> it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> In this case devfreq will found a NOT NULL freq_table that has been
> freed, skip the freq_table generation and probe the driver based on the
> wrong table.
>
> To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> max_state in the devfreq struct and never modify the freq_table present
> in the profile struct if it does provide it.
>
> Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> Cc: [email protected]
> Signed-off-by: Christian Marangi <[email protected]>
> ---
> drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
> drivers/devfreq/governor_passive.c | 14 +++---
> include/linux/devfreq.h | 5 +++
> 3 files changed, 46 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 01474daf4548..2e2b3b414d67 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> unsigned long *min_freq,
> unsigned long *max_freq)
> {
> - unsigned long *freq_table = devfreq->profile->freq_table;
> + unsigned long *freq_table = devfreq->freq_table;
> s32 qos_min_freq, qos_max_freq;
>
> lockdep_assert_held(&devfreq->lock);
> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> * The devfreq drivers can initialize this in either ascending or
> * descending order and devfreq core supports both.
> */
> - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> *min_freq = freq_table[0];
> - *max_freq = freq_table[devfreq->profile->max_state - 1];
> + *max_freq = freq_table[devfreq->max_state - 1];
> } else {
> - *min_freq = freq_table[devfreq->profile->max_state - 1];
> + *min_freq = freq_table[devfreq->max_state - 1];
> *max_freq = freq_table[0];
> }
>
> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> {
> int lev;
>
> - for (lev = 0; lev < devfreq->profile->max_state; lev++)
> - if (freq == devfreq->profile->freq_table[lev])
> + for (lev = 0; lev < devfreq->max_state; lev++)
> + if (freq == devfreq->freq_table[lev])
> return lev;
>
> return -EINVAL;
> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>
> static int set_freq_table(struct devfreq *devfreq)
> {
> - struct devfreq_dev_profile *profile = devfreq->profile;
> struct dev_pm_opp *opp;
> unsigned long freq;
> int i, count;
> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> if (count <= 0)
> return -EINVAL;
>
> - profile->max_state = count;
> - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> - profile->max_state,
> - sizeof(*profile->freq_table),
> - GFP_KERNEL);
> - if (!profile->freq_table) {
> - profile->max_state = 0;
> + devfreq->max_state = count;
> + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> + devfreq->max_state,
> + sizeof(*devfreq->freq_table),
> + GFP_KERNEL);
> + if (!devfreq->freq_table)
> return -ENOMEM;
> - }
>
> - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> if (IS_ERR(opp)) {
> - devm_kfree(devfreq->dev.parent, profile->freq_table);
> - profile->max_state = 0;
> + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> return PTR_ERR(opp);
> }
> dev_pm_opp_put(opp);
> - profile->freq_table[i] = freq;
> + devfreq->freq_table[i] = freq;
> }
>
> return 0;
> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>
> if (lev != prev_lev) {
> devfreq->stats.trans_table[
> - (prev_lev * devfreq->profile->max_state) + lev]++;
> + (prev_lev * devfreq->max_state) + lev]++;
> devfreq->stats.total_trans++;
> }
>
> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> if (err < 0)
> goto err_dev;
> mutex_lock(&devfreq->lock);
> + } else {
> + devfreq->freq_table = devfreq->profile->freq_table;
> + devfreq->max_state = devfreq->profile->max_state;
> }
>
> devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>
> devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> array3_size(sizeof(unsigned int),
> - devfreq->profile->max_state,
> - devfreq->profile->max_state),
> + devfreq->max_state,
> + devfreq->max_state),
> GFP_KERNEL);
> if (!devfreq->stats.trans_table) {
> mutex_unlock(&devfreq->lock);
> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> }
>
> devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> - devfreq->profile->max_state,
> + devfreq->max_state,
> sizeof(*devfreq->stats.time_in_state),
> GFP_KERNEL);
> if (!devfreq->stats.time_in_state) {
> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>
> mutex_lock(&df->lock);
>
> - for (i = 0; i < df->profile->max_state; i++)
> + for (i = 0; i < df->max_state; i++)
> count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> - "%lu ", df->profile->freq_table[i]);
> + "%lu ", df->freq_table[i]);
>
> mutex_unlock(&df->lock);
> /* Truncate the trailing space */
> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>
> if (!df->profile)
> return -EINVAL;
> - max_state = df->profile->max_state;
> + max_state = df->max_state;
>
> if (max_state == 0)
> return sprintf(buf, "Not Supported.\n");
> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> len += sprintf(buf + len, " :");
> for (i = 0; i < max_state; i++)
> len += sprintf(buf + len, "%10lu",
> - df->profile->freq_table[i]);
> + df->freq_table[i]);
>
> len += sprintf(buf + len, " time(ms)\n");
>
> for (i = 0; i < max_state; i++) {
> - if (df->profile->freq_table[i]
> - == df->previous_freq) {
> + if (df->freq_table[i] == df->previous_freq)
> len += sprintf(buf + len, "*");
> - } else {
> + else
> len += sprintf(buf + len, " ");
> - }
> - len += sprintf(buf + len, "%10lu:",
> - df->profile->freq_table[i]);
> +
> + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> for (j = 0; j < max_state; j++)
> len += sprintf(buf + len, "%10u",
> df->stats.trans_table[(i * max_state) + j]);
> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> if (!df->profile)
> return -EINVAL;
>
> - if (df->profile->max_state == 0)
> + if (df->max_state == 0)
> return count;
>
> err = kstrtoint(buf, 10, &value);
> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> return -EINVAL;
>
> mutex_lock(&df->lock);
> - memset(df->stats.time_in_state, 0, (df->profile->max_state *
> + memset(df->stats.time_in_state, 0, (df->max_state *
> sizeof(*df->stats.time_in_state)));
> memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> - df->profile->max_state,
> - df->profile->max_state));
> + df->max_state,
> + df->max_state));
> df->stats.total_trans = 0;
> df->stats.last_update = get_jiffies_64();
> mutex_unlock(&df->lock);
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index 72c67979ebe1..ce24a262aa16 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> goto out;
>
> /* Use interpolation if required opps is not available */
> - for (i = 0; i < parent_devfreq->profile->max_state; i++)
> - if (parent_devfreq->profile->freq_table[i] == *freq)
> + for (i = 0; i < parent_devfreq->max_state; i++)
> + if (parent_devfreq->freq_table[i] == *freq)
> break;
>
> - if (i == parent_devfreq->profile->max_state)
> + if (i == parent_devfreq->max_state)
> return -EINVAL;
>
> - if (i < devfreq->profile->max_state) {
> - child_freq = devfreq->profile->freq_table[i];
> + if (i < devfreq->max_state) {
> + child_freq = devfreq->freq_table[i];
> } else {
> - count = devfreq->profile->max_state;
> - child_freq = devfreq->profile->freq_table[count - 1];
> + count = devfreq->max_state;
> + child_freq = devfreq->freq_table[count - 1];
> }
>
> out:
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index dc10bee75a72..34aab4dd336c 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -148,6 +148,8 @@ struct devfreq_stats {
> * reevaluate operable frequencies. Devfreq users may use
> * devfreq.nb to the corresponding register notifier call chain.
> * @work: delayed work for load monitoring.
> + * @freq_table: current frequency table used by the devfreq driver.
> + * @max_state: count of entry present in the frequency table.
> * @previous_freq: previously configured frequency value.
> * @last_status: devfreq user device info, performance statistics
> * @data: Private data of the governor. The devfreq framework does not
> @@ -185,6 +187,9 @@ struct devfreq {
> struct notifier_block nb;
> struct delayed_work work;
>
> + unsigned long *freq_table;
> + unsigned int max_state;
> +
> unsigned long previous_freq;
> struct devfreq_dev_status last_status;
>

Applied it. Thanks.

--
Best Regards,
Samsung Electronics
Chanwoo Choi

2022-07-01 08:09:22

by Marek Szyprowski

[permalink] [raw]
Subject: Re: [PATCH] PM / devfreq: Rework freq_table to be local to devfreq struct

Hi All,

On 20.06.2022 00:03, Christian Marangi wrote:
> On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> is never reset and may be leaved in an undefined state.
>
> This comes from the fact that we store the freq_table in the driver
> profile struct that is commonly defined as static and not reset on
> PROBE_DEFER.
> We currently skip the reinit of the freq_table if we found
> it's already defined since a driver may declare his own freq_table.
>
> This logic is flawed in the case devfreq core generate a freq_table, set
> it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> In this case devfreq will found a NOT NULL freq_table that has been
> freed, skip the freq_table generation and probe the driver based on the
> wrong table.
>
> To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> max_state in the devfreq struct and never modify the freq_table present
> in the profile struct if it does provide it.
>
> Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> Cc: [email protected]
> Signed-off-by: Christian Marangi <[email protected]>
> ---

This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM /
devfreq: Rework freq_table to be local to devfreq struct").
Unfortunately it causes the following regression on my Exynos based test
systems:

8<--- cut here ---
Unable to handle kernel NULL pointer dereference at virtual address 00000000
[00000000] *pgd=00000000
Internal error: Oops: 5 [#1] PREEMPT SMP ARM
Modules linked in:
CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
Hardware name: Samsung Exynos (Flattened Device Tree)
Workqueue: events_unbound deferred_probe_work_func
PC is at exynos_bus_probe+0x604/0x684
LR is at device_add+0x14c/0x908
pc : [<c090aef4>]    lr : [<c06cf77c>]    psr: 80000053
...
Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
Stack: (0xf0a15d30 to 0xf0a16000)
...
 exynos_bus_probe from platform_probe+0x5c/0xb8
 platform_probe from really_probe+0xe0/0x414
 really_probe from __driver_probe_device+0xa0/0x208
 __driver_probe_device from driver_probe_device+0x30/0xc0
 driver_probe_device from __device_attach_driver+0xa4/0x11c
 __device_attach_driver from bus_for_each_drv+0x7c/0xc0
 bus_for_each_drv from __device_attach+0xac/0x20c
 __device_attach from bus_probe_device+0x88/0x90
 bus_probe_device from deferred_probe_work_func+0x98/0xe0
 deferred_probe_work_func from process_one_work+0x288/0x774
 process_one_work from worker_thread+0x44/0x504
 worker_thread from kthread+0xf4/0x128
 kthread from ret_from_fork+0x14/0x2c
Exception stack(0xf0a15fb0 to 0xf0a15ff8)
...
---[ end trace 0000000000000000 ]---

This issue is caused by bus->devfreq->profile->freq_table being NULL here:

https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451


> drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
> drivers/devfreq/governor_passive.c | 14 +++---
> include/linux/devfreq.h | 5 +++
> 3 files changed, 46 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 01474daf4548..2e2b3b414d67 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> unsigned long *min_freq,
> unsigned long *max_freq)
> {
> - unsigned long *freq_table = devfreq->profile->freq_table;
> + unsigned long *freq_table = devfreq->freq_table;
> s32 qos_min_freq, qos_max_freq;
>
> lockdep_assert_held(&devfreq->lock);
> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> * The devfreq drivers can initialize this in either ascending or
> * descending order and devfreq core supports both.
> */
> - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> *min_freq = freq_table[0];
> - *max_freq = freq_table[devfreq->profile->max_state - 1];
> + *max_freq = freq_table[devfreq->max_state - 1];
> } else {
> - *min_freq = freq_table[devfreq->profile->max_state - 1];
> + *min_freq = freq_table[devfreq->max_state - 1];
> *max_freq = freq_table[0];
> }
>
> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> {
> int lev;
>
> - for (lev = 0; lev < devfreq->profile->max_state; lev++)
> - if (freq == devfreq->profile->freq_table[lev])
> + for (lev = 0; lev < devfreq->max_state; lev++)
> + if (freq == devfreq->freq_table[lev])
> return lev;
>
> return -EINVAL;
> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>
> static int set_freq_table(struct devfreq *devfreq)
> {
> - struct devfreq_dev_profile *profile = devfreq->profile;
> struct dev_pm_opp *opp;
> unsigned long freq;
> int i, count;
> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> if (count <= 0)
> return -EINVAL;
>
> - profile->max_state = count;
> - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> - profile->max_state,
> - sizeof(*profile->freq_table),
> - GFP_KERNEL);
> - if (!profile->freq_table) {
> - profile->max_state = 0;
> + devfreq->max_state = count;
> + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> + devfreq->max_state,
> + sizeof(*devfreq->freq_table),
> + GFP_KERNEL);
> + if (!devfreq->freq_table)
> return -ENOMEM;
> - }
>
> - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> if (IS_ERR(opp)) {
> - devm_kfree(devfreq->dev.parent, profile->freq_table);
> - profile->max_state = 0;
> + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> return PTR_ERR(opp);
> }
> dev_pm_opp_put(opp);
> - profile->freq_table[i] = freq;
> + devfreq->freq_table[i] = freq;
> }
>
> return 0;
> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>
> if (lev != prev_lev) {
> devfreq->stats.trans_table[
> - (prev_lev * devfreq->profile->max_state) + lev]++;
> + (prev_lev * devfreq->max_state) + lev]++;
> devfreq->stats.total_trans++;
> }
>
> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> if (err < 0)
> goto err_dev;
> mutex_lock(&devfreq->lock);
> + } else {
> + devfreq->freq_table = devfreq->profile->freq_table;
> + devfreq->max_state = devfreq->profile->max_state;
> }
>
> devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>
> devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> array3_size(sizeof(unsigned int),
> - devfreq->profile->max_state,
> - devfreq->profile->max_state),
> + devfreq->max_state,
> + devfreq->max_state),
> GFP_KERNEL);
> if (!devfreq->stats.trans_table) {
> mutex_unlock(&devfreq->lock);
> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> }
>
> devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> - devfreq->profile->max_state,
> + devfreq->max_state,
> sizeof(*devfreq->stats.time_in_state),
> GFP_KERNEL);
> if (!devfreq->stats.time_in_state) {
> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>
> mutex_lock(&df->lock);
>
> - for (i = 0; i < df->profile->max_state; i++)
> + for (i = 0; i < df->max_state; i++)
> count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> - "%lu ", df->profile->freq_table[i]);
> + "%lu ", df->freq_table[i]);
>
> mutex_unlock(&df->lock);
> /* Truncate the trailing space */
> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>
> if (!df->profile)
> return -EINVAL;
> - max_state = df->profile->max_state;
> + max_state = df->max_state;
>
> if (max_state == 0)
> return sprintf(buf, "Not Supported.\n");
> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> len += sprintf(buf + len, " :");
> for (i = 0; i < max_state; i++)
> len += sprintf(buf + len, "%10lu",
> - df->profile->freq_table[i]);
> + df->freq_table[i]);
>
> len += sprintf(buf + len, " time(ms)\n");
>
> for (i = 0; i < max_state; i++) {
> - if (df->profile->freq_table[i]
> - == df->previous_freq) {
> + if (df->freq_table[i] == df->previous_freq)
> len += sprintf(buf + len, "*");
> - } else {
> + else
> len += sprintf(buf + len, " ");
> - }
> - len += sprintf(buf + len, "%10lu:",
> - df->profile->freq_table[i]);
> +
> + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> for (j = 0; j < max_state; j++)
> len += sprintf(buf + len, "%10u",
> df->stats.trans_table[(i * max_state) + j]);
> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> if (!df->profile)
> return -EINVAL;
>
> - if (df->profile->max_state == 0)
> + if (df->max_state == 0)
> return count;
>
> err = kstrtoint(buf, 10, &value);
> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> return -EINVAL;
>
> mutex_lock(&df->lock);
> - memset(df->stats.time_in_state, 0, (df->profile->max_state *
> + memset(df->stats.time_in_state, 0, (df->max_state *
> sizeof(*df->stats.time_in_state)));
> memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> - df->profile->max_state,
> - df->profile->max_state));
> + df->max_state,
> + df->max_state));
> df->stats.total_trans = 0;
> df->stats.last_update = get_jiffies_64();
> mutex_unlock(&df->lock);
> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> index 72c67979ebe1..ce24a262aa16 100644
> --- a/drivers/devfreq/governor_passive.c
> +++ b/drivers/devfreq/governor_passive.c
> @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> goto out;
>
> /* Use interpolation if required opps is not available */
> - for (i = 0; i < parent_devfreq->profile->max_state; i++)
> - if (parent_devfreq->profile->freq_table[i] == *freq)
> + for (i = 0; i < parent_devfreq->max_state; i++)
> + if (parent_devfreq->freq_table[i] == *freq)
> break;
>
> - if (i == parent_devfreq->profile->max_state)
> + if (i == parent_devfreq->max_state)
> return -EINVAL;
>
> - if (i < devfreq->profile->max_state) {
> - child_freq = devfreq->profile->freq_table[i];
> + if (i < devfreq->max_state) {
> + child_freq = devfreq->freq_table[i];
> } else {
> - count = devfreq->profile->max_state;
> - child_freq = devfreq->profile->freq_table[count - 1];
> + count = devfreq->max_state;
> + child_freq = devfreq->freq_table[count - 1];
> }
>
> out:
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index dc10bee75a72..34aab4dd336c 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -148,6 +148,8 @@ struct devfreq_stats {
> * reevaluate operable frequencies. Devfreq users may use
> * devfreq.nb to the corresponding register notifier call chain.
> * @work: delayed work for load monitoring.
> + * @freq_table: current frequency table used by the devfreq driver.
> + * @max_state: count of entry present in the frequency table.
> * @previous_freq: previously configured frequency value.
> * @last_status: devfreq user device info, performance statistics
> * @data: Private data of the governor. The devfreq framework does not
> @@ -185,6 +187,9 @@ struct devfreq {
> struct notifier_block nb;
> struct delayed_work work;
>
> + unsigned long *freq_table;
> + unsigned int max_state;
> +
> unsigned long previous_freq;
> struct devfreq_dev_status last_status;
>

Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland

2022-07-01 12:02:48

by Christian Marangi

[permalink] [raw]
Subject: Re: [PATCH] PM / devfreq: Rework freq_table to be local to devfreq struct

On Fri, Jul 01, 2022 at 01:28:50PM +0200, Christian Marangi wrote:
> On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
> > Hi All,
> >
> > On 20.06.2022 00:03, Christian Marangi wrote:
> > > On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> > > is never reset and may be leaved in an undefined state.
> > >
> > > This comes from the fact that we store the freq_table in the driver
> > > profile struct that is commonly defined as static and not reset on
> > > PROBE_DEFER.
> > > We currently skip the reinit of the freq_table if we found
> > > it's already defined since a driver may declare his own freq_table.
> > >
> > > This logic is flawed in the case devfreq core generate a freq_table, set
> > > it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> > > In this case devfreq will found a NOT NULL freq_table that has been
> > > freed, skip the freq_table generation and probe the driver based on the
> > > wrong table.
> > >
> > > To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> > > max_state in the devfreq struct and never modify the freq_table present
> > > in the profile struct if it does provide it.
> > >
> > > Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> > > Cc: [email protected]
> > > Signed-off-by: Christian Marangi <[email protected]>
> > > ---
> >
> > This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM /
> > devfreq: Rework freq_table to be local to devfreq struct").
> > Unfortunately it causes the following regression on my Exynos based test
> > systems:
> >
> > 8<--- cut here ---
> > Unable to handle kernel NULL pointer dereference at virtual address 00000000
> > [00000000] *pgd=00000000
> > Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> > Modules linked in:
> > CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
> > Hardware name: Samsung Exynos (Flattened Device Tree)
> > Workqueue: events_unbound deferred_probe_work_func
> > PC is at exynos_bus_probe+0x604/0x684
> > LR is at device_add+0x14c/0x908
> > pc : [<c090aef4>]??? lr : [<c06cf77c>]??? psr: 80000053
> > ...
> > Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
> > Stack: (0xf0a15d30 to 0xf0a16000)
> > ...
> > ?exynos_bus_probe from platform_probe+0x5c/0xb8
> > ?platform_probe from really_probe+0xe0/0x414
> > ?really_probe from __driver_probe_device+0xa0/0x208
> > ?__driver_probe_device from driver_probe_device+0x30/0xc0
> > ?driver_probe_device from __device_attach_driver+0xa4/0x11c
> > ?__device_attach_driver from bus_for_each_drv+0x7c/0xc0
> > ?bus_for_each_drv from __device_attach+0xac/0x20c
> > ?__device_attach from bus_probe_device+0x88/0x90
> > ?bus_probe_device from deferred_probe_work_func+0x98/0xe0
> > ?deferred_probe_work_func from process_one_work+0x288/0x774
> > ?process_one_work from worker_thread+0x44/0x504
> > ?worker_thread from kthread+0xf4/0x128
> > ?kthread from ret_from_fork+0x14/0x2c
> > Exception stack(0xf0a15fb0 to 0xf0a15ff8)
> > ...
> > ---[ end trace 0000000000000000 ]---
> >
> > This issue is caused by bus->devfreq->profile->freq_table being NULL here:
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451
> >
> >
>
> I just checked this and the bug is caused by a simple pr_info...
>
> Can you test the following patch just to make sure?
>
> diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
> index b5615e667e31..79725bbb4bb0 100644
> --- a/drivers/devfreq/exynos-bus.c
> +++ b/drivers/devfreq/exynos-bus.c
> @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev)
> }
> }
>
> - max_state = bus->devfreq->profile->max_state;
> - min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
> - max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
> + max_state = bus->devfreq->max_state;
> + min_freq = (bus->devfreq->freq_table[0] / 1000);
> + max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000);
> pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
> dev_name(dev), min_freq, max_freq);
>
>

(BTW patch is ready, just waiting for your test and I will send it)

> > > drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
> > > drivers/devfreq/governor_passive.c | 14 +++---
> > > include/linux/devfreq.h | 5 +++
> > > 3 files changed, 46 insertions(+), 44 deletions(-)
> > >
> > > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> > > index 01474daf4548..2e2b3b414d67 100644
> > > --- a/drivers/devfreq/devfreq.c
> > > +++ b/drivers/devfreq/devfreq.c
> > > @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > > unsigned long *min_freq,
> > > unsigned long *max_freq)
> > > {
> > > - unsigned long *freq_table = devfreq->profile->freq_table;
> > > + unsigned long *freq_table = devfreq->freq_table;
> > > s32 qos_min_freq, qos_max_freq;
> > >
> > > lockdep_assert_held(&devfreq->lock);
> > > @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > > * The devfreq drivers can initialize this in either ascending or
> > > * descending order and devfreq core supports both.
> > > */
> > > - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> > > + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> > > *min_freq = freq_table[0];
> > > - *max_freq = freq_table[devfreq->profile->max_state - 1];
> > > + *max_freq = freq_table[devfreq->max_state - 1];
> > > } else {
> > > - *min_freq = freq_table[devfreq->profile->max_state - 1];
> > > + *min_freq = freq_table[devfreq->max_state - 1];
> > > *max_freq = freq_table[0];
> > > }
> > >
> > > @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> > > {
> > > int lev;
> > >
> > > - for (lev = 0; lev < devfreq->profile->max_state; lev++)
> > > - if (freq == devfreq->profile->freq_table[lev])
> > > + for (lev = 0; lev < devfreq->max_state; lev++)
> > > + if (freq == devfreq->freq_table[lev])
> > > return lev;
> > >
> > > return -EINVAL;
> > > @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> > >
> > > static int set_freq_table(struct devfreq *devfreq)
> > > {
> > > - struct devfreq_dev_profile *profile = devfreq->profile;
> > > struct dev_pm_opp *opp;
> > > unsigned long freq;
> > > int i, count;
> > > @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> > > if (count <= 0)
> > > return -EINVAL;
> > >
> > > - profile->max_state = count;
> > > - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> > > - profile->max_state,
> > > - sizeof(*profile->freq_table),
> > > - GFP_KERNEL);
> > > - if (!profile->freq_table) {
> > > - profile->max_state = 0;
> > > + devfreq->max_state = count;
> > > + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> > > + devfreq->max_state,
> > > + sizeof(*devfreq->freq_table),
> > > + GFP_KERNEL);
> > > + if (!devfreq->freq_table)
> > > return -ENOMEM;
> > > - }
> > >
> > > - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> > > + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> > > opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> > > if (IS_ERR(opp)) {
> > > - devm_kfree(devfreq->dev.parent, profile->freq_table);
> > > - profile->max_state = 0;
> > > + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> > > return PTR_ERR(opp);
> > > }
> > > dev_pm_opp_put(opp);
> > > - profile->freq_table[i] = freq;
> > > + devfreq->freq_table[i] = freq;
> > > }
> > >
> > > return 0;
> > > @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
> > >
> > > if (lev != prev_lev) {
> > > devfreq->stats.trans_table[
> > > - (prev_lev * devfreq->profile->max_state) + lev]++;
> > > + (prev_lev * devfreq->max_state) + lev]++;
> > > devfreq->stats.total_trans++;
> > > }
> > >
> > > @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > > if (err < 0)
> > > goto err_dev;
> > > mutex_lock(&devfreq->lock);
> > > + } else {
> > > + devfreq->freq_table = devfreq->profile->freq_table;
> > > + devfreq->max_state = devfreq->profile->max_state;
> > > }
> > >
> > > devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> > > @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > >
> > > devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> > > array3_size(sizeof(unsigned int),
> > > - devfreq->profile->max_state,
> > > - devfreq->profile->max_state),
> > > + devfreq->max_state,
> > > + devfreq->max_state),
> > > GFP_KERNEL);
> > > if (!devfreq->stats.trans_table) {
> > > mutex_unlock(&devfreq->lock);
> > > @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > > }
> > >
> > > devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> > > - devfreq->profile->max_state,
> > > + devfreq->max_state,
> > > sizeof(*devfreq->stats.time_in_state),
> > > GFP_KERNEL);
> > > if (!devfreq->stats.time_in_state) {
> > > @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
> > >
> > > mutex_lock(&df->lock);
> > >
> > > - for (i = 0; i < df->profile->max_state; i++)
> > > + for (i = 0; i < df->max_state; i++)
> > > count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> > > - "%lu ", df->profile->freq_table[i]);
> > > + "%lu ", df->freq_table[i]);
> > >
> > > mutex_unlock(&df->lock);
> > > /* Truncate the trailing space */
> > > @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
> > >
> > > if (!df->profile)
> > > return -EINVAL;
> > > - max_state = df->profile->max_state;
> > > + max_state = df->max_state;
> > >
> > > if (max_state == 0)
> > > return sprintf(buf, "Not Supported.\n");
> > > @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> > > len += sprintf(buf + len, " :");
> > > for (i = 0; i < max_state; i++)
> > > len += sprintf(buf + len, "%10lu",
> > > - df->profile->freq_table[i]);
> > > + df->freq_table[i]);
> > >
> > > len += sprintf(buf + len, " time(ms)\n");
> > >
> > > for (i = 0; i < max_state; i++) {
> > > - if (df->profile->freq_table[i]
> > > - == df->previous_freq) {
> > > + if (df->freq_table[i] == df->previous_freq)
> > > len += sprintf(buf + len, "*");
> > > - } else {
> > > + else
> > > len += sprintf(buf + len, " ");
> > > - }
> > > - len += sprintf(buf + len, "%10lu:",
> > > - df->profile->freq_table[i]);
> > > +
> > > + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> > > for (j = 0; j < max_state; j++)
> > > len += sprintf(buf + len, "%10u",
> > > df->stats.trans_table[(i * max_state) + j]);
> > > @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> > > if (!df->profile)
> > > return -EINVAL;
> > >
> > > - if (df->profile->max_state == 0)
> > > + if (df->max_state == 0)
> > > return count;
> > >
> > > err = kstrtoint(buf, 10, &value);
> > > @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> > > return -EINVAL;
> > >
> > > mutex_lock(&df->lock);
> > > - memset(df->stats.time_in_state, 0, (df->profile->max_state *
> > > + memset(df->stats.time_in_state, 0, (df->max_state *
> > > sizeof(*df->stats.time_in_state)));
> > > memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> > > - df->profile->max_state,
> > > - df->profile->max_state));
> > > + df->max_state,
> > > + df->max_state));
> > > df->stats.total_trans = 0;
> > > df->stats.last_update = get_jiffies_64();
> > > mutex_unlock(&df->lock);
> > > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> > > index 72c67979ebe1..ce24a262aa16 100644
> > > --- a/drivers/devfreq/governor_passive.c
> > > +++ b/drivers/devfreq/governor_passive.c
> > > @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> > > goto out;
> > >
> > > /* Use interpolation if required opps is not available */
> > > - for (i = 0; i < parent_devfreq->profile->max_state; i++)
> > > - if (parent_devfreq->profile->freq_table[i] == *freq)
> > > + for (i = 0; i < parent_devfreq->max_state; i++)
> > > + if (parent_devfreq->freq_table[i] == *freq)
> > > break;
> > >
> > > - if (i == parent_devfreq->profile->max_state)
> > > + if (i == parent_devfreq->max_state)
> > > return -EINVAL;
> > >
> > > - if (i < devfreq->profile->max_state) {
> > > - child_freq = devfreq->profile->freq_table[i];
> > > + if (i < devfreq->max_state) {
> > > + child_freq = devfreq->freq_table[i];
> > > } else {
> > > - count = devfreq->profile->max_state;
> > > - child_freq = devfreq->profile->freq_table[count - 1];
> > > + count = devfreq->max_state;
> > > + child_freq = devfreq->freq_table[count - 1];
> > > }
> > >
> > > out:
> > > diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> > > index dc10bee75a72..34aab4dd336c 100644
> > > --- a/include/linux/devfreq.h
> > > +++ b/include/linux/devfreq.h
> > > @@ -148,6 +148,8 @@ struct devfreq_stats {
> > > * reevaluate operable frequencies. Devfreq users may use
> > > * devfreq.nb to the corresponding register notifier call chain.
> > > * @work: delayed work for load monitoring.
> > > + * @freq_table: current frequency table used by the devfreq driver.
> > > + * @max_state: count of entry present in the frequency table.
> > > * @previous_freq: previously configured frequency value.
> > > * @last_status: devfreq user device info, performance statistics
> > > * @data: Private data of the governor. The devfreq framework does not
> > > @@ -185,6 +187,9 @@ struct devfreq {
> > > struct notifier_block nb;
> > > struct delayed_work work;
> > >
> > > + unsigned long *freq_table;
> > > + unsigned int max_state;
> > > +
> > > unsigned long previous_freq;
> > > struct devfreq_dev_status last_status;
> > >
> >
> > Best regards
> > --
> > Marek Szyprowski, PhD
> > Samsung R&D Institute Poland
> >
>
> --
> Ansuel

--
Ansuel

2022-07-01 12:06:30

by Christian Marangi

[permalink] [raw]
Subject: Re: [PATCH] PM / devfreq: Rework freq_table to be local to devfreq struct

On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
> Hi All,
>
> On 20.06.2022 00:03, Christian Marangi wrote:
> > On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
> > is never reset and may be leaved in an undefined state.
> >
> > This comes from the fact that we store the freq_table in the driver
> > profile struct that is commonly defined as static and not reset on
> > PROBE_DEFER.
> > We currently skip the reinit of the freq_table if we found
> > it's already defined since a driver may declare his own freq_table.
> >
> > This logic is flawed in the case devfreq core generate a freq_table, set
> > it in the profile struct and then PROBE_DEFER, freeing the freq_table.
> > In this case devfreq will found a NOT NULL freq_table that has been
> > freed, skip the freq_table generation and probe the driver based on the
> > wrong table.
> >
> > To fix this and correctly handle PROBE_DEFER, use a local freq_table and
> > max_state in the devfreq struct and never modify the freq_table present
> > in the profile struct if it does provide it.
> >
> > Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
> > Cc: [email protected]
> > Signed-off-by: Christian Marangi <[email protected]>
> > ---
>
> This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM /
> devfreq: Rework freq_table to be local to devfreq struct").
> Unfortunately it causes the following regression on my Exynos based test
> systems:
>
> 8<--- cut here ---
> Unable to handle kernel NULL pointer dereference at virtual address 00000000
> [00000000] *pgd=00000000
> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
> Modules linked in:
> CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
> Hardware name: Samsung Exynos (Flattened Device Tree)
> Workqueue: events_unbound deferred_probe_work_func
> PC is at exynos_bus_probe+0x604/0x684
> LR is at device_add+0x14c/0x908
> pc : [<c090aef4>]??? lr : [<c06cf77c>]??? psr: 80000053
> ...
> Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
> Stack: (0xf0a15d30 to 0xf0a16000)
> ...
> ?exynos_bus_probe from platform_probe+0x5c/0xb8
> ?platform_probe from really_probe+0xe0/0x414
> ?really_probe from __driver_probe_device+0xa0/0x208
> ?__driver_probe_device from driver_probe_device+0x30/0xc0
> ?driver_probe_device from __device_attach_driver+0xa4/0x11c
> ?__device_attach_driver from bus_for_each_drv+0x7c/0xc0
> ?bus_for_each_drv from __device_attach+0xac/0x20c
> ?__device_attach from bus_probe_device+0x88/0x90
> ?bus_probe_device from deferred_probe_work_func+0x98/0xe0
> ?deferred_probe_work_func from process_one_work+0x288/0x774
> ?process_one_work from worker_thread+0x44/0x504
> ?worker_thread from kthread+0xf4/0x128
> ?kthread from ret_from_fork+0x14/0x2c
> Exception stack(0xf0a15fb0 to 0xf0a15ff8)
> ...
> ---[ end trace 0000000000000000 ]---
>
> This issue is caused by bus->devfreq->profile->freq_table being NULL here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451
>
>

I just checked this and the bug is caused by a simple pr_info...

Can you test the following patch just to make sure?

diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index b5615e667e31..79725bbb4bb0 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev)
}
}

- max_state = bus->devfreq->profile->max_state;
- min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
- max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
+ max_state = bus->devfreq->max_state;
+ min_freq = (bus->devfreq->freq_table[0] / 1000);
+ max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000);
pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
dev_name(dev), min_freq, max_freq);


> > drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
> > drivers/devfreq/governor_passive.c | 14 +++---
> > include/linux/devfreq.h | 5 +++
> > 3 files changed, 46 insertions(+), 44 deletions(-)
> >
> > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> > index 01474daf4548..2e2b3b414d67 100644
> > --- a/drivers/devfreq/devfreq.c
> > +++ b/drivers/devfreq/devfreq.c
> > @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > unsigned long *min_freq,
> > unsigned long *max_freq)
> > {
> > - unsigned long *freq_table = devfreq->profile->freq_table;
> > + unsigned long *freq_table = devfreq->freq_table;
> > s32 qos_min_freq, qos_max_freq;
> >
> > lockdep_assert_held(&devfreq->lock);
> > @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
> > * The devfreq drivers can initialize this in either ascending or
> > * descending order and devfreq core supports both.
> > */
> > - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
> > + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
> > *min_freq = freq_table[0];
> > - *max_freq = freq_table[devfreq->profile->max_state - 1];
> > + *max_freq = freq_table[devfreq->max_state - 1];
> > } else {
> > - *min_freq = freq_table[devfreq->profile->max_state - 1];
> > + *min_freq = freq_table[devfreq->max_state - 1];
> > *max_freq = freq_table[0];
> > }
> >
> > @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> > {
> > int lev;
> >
> > - for (lev = 0; lev < devfreq->profile->max_state; lev++)
> > - if (freq == devfreq->profile->freq_table[lev])
> > + for (lev = 0; lev < devfreq->max_state; lev++)
> > + if (freq == devfreq->freq_table[lev])
> > return lev;
> >
> > return -EINVAL;
> > @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
> >
> > static int set_freq_table(struct devfreq *devfreq)
> > {
> > - struct devfreq_dev_profile *profile = devfreq->profile;
> > struct dev_pm_opp *opp;
> > unsigned long freq;
> > int i, count;
> > @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
> > if (count <= 0)
> > return -EINVAL;
> >
> > - profile->max_state = count;
> > - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
> > - profile->max_state,
> > - sizeof(*profile->freq_table),
> > - GFP_KERNEL);
> > - if (!profile->freq_table) {
> > - profile->max_state = 0;
> > + devfreq->max_state = count;
> > + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
> > + devfreq->max_state,
> > + sizeof(*devfreq->freq_table),
> > + GFP_KERNEL);
> > + if (!devfreq->freq_table)
> > return -ENOMEM;
> > - }
> >
> > - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
> > + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
> > opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
> > if (IS_ERR(opp)) {
> > - devm_kfree(devfreq->dev.parent, profile->freq_table);
> > - profile->max_state = 0;
> > + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
> > return PTR_ERR(opp);
> > }
> > dev_pm_opp_put(opp);
> > - profile->freq_table[i] = freq;
> > + devfreq->freq_table[i] = freq;
> > }
> >
> > return 0;
> > @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
> >
> > if (lev != prev_lev) {
> > devfreq->stats.trans_table[
> > - (prev_lev * devfreq->profile->max_state) + lev]++;
> > + (prev_lev * devfreq->max_state) + lev]++;
> > devfreq->stats.total_trans++;
> > }
> >
> > @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > if (err < 0)
> > goto err_dev;
> > mutex_lock(&devfreq->lock);
> > + } else {
> > + devfreq->freq_table = devfreq->profile->freq_table;
> > + devfreq->max_state = devfreq->profile->max_state;
> > }
> >
> > devfreq->scaling_min_freq = find_available_min_freq(devfreq);
> > @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
> >
> > devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
> > array3_size(sizeof(unsigned int),
> > - devfreq->profile->max_state,
> > - devfreq->profile->max_state),
> > + devfreq->max_state,
> > + devfreq->max_state),
> > GFP_KERNEL);
> > if (!devfreq->stats.trans_table) {
> > mutex_unlock(&devfreq->lock);
> > @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
> > }
> >
> > devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
> > - devfreq->profile->max_state,
> > + devfreq->max_state,
> > sizeof(*devfreq->stats.time_in_state),
> > GFP_KERNEL);
> > if (!devfreq->stats.time_in_state) {
> > @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
> >
> > mutex_lock(&df->lock);
> >
> > - for (i = 0; i < df->profile->max_state; i++)
> > + for (i = 0; i < df->max_state; i++)
> > count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
> > - "%lu ", df->profile->freq_table[i]);
> > + "%lu ", df->freq_table[i]);
> >
> > mutex_unlock(&df->lock);
> > /* Truncate the trailing space */
> > @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
> >
> > if (!df->profile)
> > return -EINVAL;
> > - max_state = df->profile->max_state;
> > + max_state = df->max_state;
> >
> > if (max_state == 0)
> > return sprintf(buf, "Not Supported.\n");
> > @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
> > len += sprintf(buf + len, " :");
> > for (i = 0; i < max_state; i++)
> > len += sprintf(buf + len, "%10lu",
> > - df->profile->freq_table[i]);
> > + df->freq_table[i]);
> >
> > len += sprintf(buf + len, " time(ms)\n");
> >
> > for (i = 0; i < max_state; i++) {
> > - if (df->profile->freq_table[i]
> > - == df->previous_freq) {
> > + if (df->freq_table[i] == df->previous_freq)
> > len += sprintf(buf + len, "*");
> > - } else {
> > + else
> > len += sprintf(buf + len, " ");
> > - }
> > - len += sprintf(buf + len, "%10lu:",
> > - df->profile->freq_table[i]);
> > +
> > + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
> > for (j = 0; j < max_state; j++)
> > len += sprintf(buf + len, "%10u",
> > df->stats.trans_table[(i * max_state) + j]);
> > @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
> > if (!df->profile)
> > return -EINVAL;
> >
> > - if (df->profile->max_state == 0)
> > + if (df->max_state == 0)
> > return count;
> >
> > err = kstrtoint(buf, 10, &value);
> > @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
> > return -EINVAL;
> >
> > mutex_lock(&df->lock);
> > - memset(df->stats.time_in_state, 0, (df->profile->max_state *
> > + memset(df->stats.time_in_state, 0, (df->max_state *
> > sizeof(*df->stats.time_in_state)));
> > memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
> > - df->profile->max_state,
> > - df->profile->max_state));
> > + df->max_state,
> > + df->max_state));
> > df->stats.total_trans = 0;
> > df->stats.last_update = get_jiffies_64();
> > mutex_unlock(&df->lock);
> > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
> > index 72c67979ebe1..ce24a262aa16 100644
> > --- a/drivers/devfreq/governor_passive.c
> > +++ b/drivers/devfreq/governor_passive.c
> > @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
> > goto out;
> >
> > /* Use interpolation if required opps is not available */
> > - for (i = 0; i < parent_devfreq->profile->max_state; i++)
> > - if (parent_devfreq->profile->freq_table[i] == *freq)
> > + for (i = 0; i < parent_devfreq->max_state; i++)
> > + if (parent_devfreq->freq_table[i] == *freq)
> > break;
> >
> > - if (i == parent_devfreq->profile->max_state)
> > + if (i == parent_devfreq->max_state)
> > return -EINVAL;
> >
> > - if (i < devfreq->profile->max_state) {
> > - child_freq = devfreq->profile->freq_table[i];
> > + if (i < devfreq->max_state) {
> > + child_freq = devfreq->freq_table[i];
> > } else {
> > - count = devfreq->profile->max_state;
> > - child_freq = devfreq->profile->freq_table[count - 1];
> > + count = devfreq->max_state;
> > + child_freq = devfreq->freq_table[count - 1];
> > }
> >
> > out:
> > diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> > index dc10bee75a72..34aab4dd336c 100644
> > --- a/include/linux/devfreq.h
> > +++ b/include/linux/devfreq.h
> > @@ -148,6 +148,8 @@ struct devfreq_stats {
> > * reevaluate operable frequencies. Devfreq users may use
> > * devfreq.nb to the corresponding register notifier call chain.
> > * @work: delayed work for load monitoring.
> > + * @freq_table: current frequency table used by the devfreq driver.
> > + * @max_state: count of entry present in the frequency table.
> > * @previous_freq: previously configured frequency value.
> > * @last_status: devfreq user device info, performance statistics
> > * @data: Private data of the governor. The devfreq framework does not
> > @@ -185,6 +187,9 @@ struct devfreq {
> > struct notifier_block nb;
> > struct delayed_work work;
> >
> > + unsigned long *freq_table;
> > + unsigned int max_state;
> > +
> > unsigned long previous_freq;
> > struct devfreq_dev_status last_status;
> >
>
> Best regards
> --
> Marek Szyprowski, PhD
> Samsung R&D Institute Poland
>

--
Ansuel

2022-07-01 12:35:59

by Marek Szyprowski

[permalink] [raw]
Subject: Re: [PATCH] PM / devfreq: Rework freq_table to be local to devfreq struct

On 01.07.2022 13:28, Christian Marangi wrote:
> On Fri, Jul 01, 2022 at 10:01:52AM +0200, Marek Szyprowski wrote:
>> On 20.06.2022 00:03, Christian Marangi wrote:
>>> On a devfreq PROBE_DEFER, the freq_table in the driver profile struct,
>>> is never reset and may be leaved in an undefined state.
>>>
>>> This comes from the fact that we store the freq_table in the driver
>>> profile struct that is commonly defined as static and not reset on
>>> PROBE_DEFER.
>>> We currently skip the reinit of the freq_table if we found
>>> it's already defined since a driver may declare his own freq_table.
>>>
>>> This logic is flawed in the case devfreq core generate a freq_table, set
>>> it in the profile struct and then PROBE_DEFER, freeing the freq_table.
>>> In this case devfreq will found a NOT NULL freq_table that has been
>>> freed, skip the freq_table generation and probe the driver based on the
>>> wrong table.
>>>
>>> To fix this and correctly handle PROBE_DEFER, use a local freq_table and
>>> max_state in the devfreq struct and never modify the freq_table present
>>> in the profile struct if it does provide it.
>>>
>>> Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device")
>>> Cc: [email protected]
>>> Signed-off-by: Christian Marangi <[email protected]>
>>> ---
>> This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM /
>> devfreq: Rework freq_table to be local to devfreq struct").
>> Unfortunately it causes the following regression on my Exynos based test
>> systems:
>>
>> 8<--- cut here ---
>> Unable to handle kernel NULL pointer dereference at virtual address 00000000
>> [00000000] *pgd=00000000
>> Internal error: Oops: 5 [#1] PREEMPT SMP ARM
>> Modules linked in:
>> CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312
>> Hardware name: Samsung Exynos (Flattened Device Tree)
>> Workqueue: events_unbound deferred_probe_work_func
>> PC is at exynos_bus_probe+0x604/0x684
>> LR is at device_add+0x14c/0x908
>> pc : [<c090aef4>]    lr : [<c06cf77c>]    psr: 80000053
>> ...
>> Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval))
>> Stack: (0xf0a15d30 to 0xf0a16000)
>> ...
>>  exynos_bus_probe from platform_probe+0x5c/0xb8
>>  platform_probe from really_probe+0xe0/0x414
>>  really_probe from __driver_probe_device+0xa0/0x208
>>  __driver_probe_device from driver_probe_device+0x30/0xc0
>>  driver_probe_device from __device_attach_driver+0xa4/0x11c
>>  __device_attach_driver from bus_for_each_drv+0x7c/0xc0
>>  bus_for_each_drv from __device_attach+0xac/0x20c
>>  __device_attach from bus_probe_device+0x88/0x90
>>  bus_probe_device from deferred_probe_work_func+0x98/0xe0
>>  deferred_probe_work_func from process_one_work+0x288/0x774
>>  process_one_work from worker_thread+0x44/0x504
>>  worker_thread from kthread+0xf4/0x128
>>  kthread from ret_from_fork+0x14/0x2c
>> Exception stack(0xf0a15fb0 to 0xf0a15ff8)
>> ...
>> ---[ end trace 0000000000000000 ]---
>>
>> This issue is caused by bus->devfreq->profile->freq_table being NULL here:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451
>>
>>
> I just checked this and the bug is caused by a simple pr_info...
>
> Can you test the following patch just to make sure?

Yes, this fixes the issue. Thanks! Feel free to add:

Reported-by: Marek Szyprowski <[email protected]>

Tested-by: Marek Szyprowski <[email protected]>

> diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
> index b5615e667e31..79725bbb4bb0 100644
> --- a/drivers/devfreq/exynos-bus.c
> +++ b/drivers/devfreq/exynos-bus.c
> @@ -447,9 +447,9 @@ static int exynos_bus_probe(struct platform_device *pdev)
> }
> }
>
> - max_state = bus->devfreq->profile->max_state;
> - min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
> - max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
> + max_state = bus->devfreq->max_state;
> + min_freq = (bus->devfreq->freq_table[0] / 1000);
> + max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000);
> pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
> dev_name(dev), min_freq, max_freq);
>
>
>>> drivers/devfreq/devfreq.c | 71 ++++++++++++++----------------
>>> drivers/devfreq/governor_passive.c | 14 +++---
>>> include/linux/devfreq.h | 5 +++
>>> 3 files changed, 46 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
>>> index 01474daf4548..2e2b3b414d67 100644
>>> --- a/drivers/devfreq/devfreq.c
>>> +++ b/drivers/devfreq/devfreq.c
>>> @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>>> unsigned long *min_freq,
>>> unsigned long *max_freq)
>>> {
>>> - unsigned long *freq_table = devfreq->profile->freq_table;
>>> + unsigned long *freq_table = devfreq->freq_table;
>>> s32 qos_min_freq, qos_max_freq;
>>>
>>> lockdep_assert_held(&devfreq->lock);
>>> @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq,
>>> * The devfreq drivers can initialize this in either ascending or
>>> * descending order and devfreq core supports both.
>>> */
>>> - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
>>> + if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
>>> *min_freq = freq_table[0];
>>> - *max_freq = freq_table[devfreq->profile->max_state - 1];
>>> + *max_freq = freq_table[devfreq->max_state - 1];
>>> } else {
>>> - *min_freq = freq_table[devfreq->profile->max_state - 1];
>>> + *min_freq = freq_table[devfreq->max_state - 1];
>>> *max_freq = freq_table[0];
>>> }
>>>
>>> @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>>> {
>>> int lev;
>>>
>>> - for (lev = 0; lev < devfreq->profile->max_state; lev++)
>>> - if (freq == devfreq->profile->freq_table[lev])
>>> + for (lev = 0; lev < devfreq->max_state; lev++)
>>> + if (freq == devfreq->freq_table[lev])
>>> return lev;
>>>
>>> return -EINVAL;
>>> @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq)
>>>
>>> static int set_freq_table(struct devfreq *devfreq)
>>> {
>>> - struct devfreq_dev_profile *profile = devfreq->profile;
>>> struct dev_pm_opp *opp;
>>> unsigned long freq;
>>> int i, count;
>>> @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq)
>>> if (count <= 0)
>>> return -EINVAL;
>>>
>>> - profile->max_state = count;
>>> - profile->freq_table = devm_kcalloc(devfreq->dev.parent,
>>> - profile->max_state,
>>> - sizeof(*profile->freq_table),
>>> - GFP_KERNEL);
>>> - if (!profile->freq_table) {
>>> - profile->max_state = 0;
>>> + devfreq->max_state = count;
>>> + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
>>> + devfreq->max_state,
>>> + sizeof(*devfreq->freq_table),
>>> + GFP_KERNEL);
>>> + if (!devfreq->freq_table)
>>> return -ENOMEM;
>>> - }
>>>
>>> - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
>>> + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
>>> opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
>>> if (IS_ERR(opp)) {
>>> - devm_kfree(devfreq->dev.parent, profile->freq_table);
>>> - profile->max_state = 0;
>>> + devm_kfree(devfreq->dev.parent, devfreq->freq_table);
>>> return PTR_ERR(opp);
>>> }
>>> dev_pm_opp_put(opp);
>>> - profile->freq_table[i] = freq;
>>> + devfreq->freq_table[i] = freq;
>>> }
>>>
>>> return 0;
>>> @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
>>>
>>> if (lev != prev_lev) {
>>> devfreq->stats.trans_table[
>>> - (prev_lev * devfreq->profile->max_state) + lev]++;
>>> + (prev_lev * devfreq->max_state) + lev]++;
>>> devfreq->stats.total_trans++;
>>> }
>>>
>>> @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>> if (err < 0)
>>> goto err_dev;
>>> mutex_lock(&devfreq->lock);
>>> + } else {
>>> + devfreq->freq_table = devfreq->profile->freq_table;
>>> + devfreq->max_state = devfreq->profile->max_state;
>>> }
>>>
>>> devfreq->scaling_min_freq = find_available_min_freq(devfreq);
>>> @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>>
>>> devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
>>> array3_size(sizeof(unsigned int),
>>> - devfreq->profile->max_state,
>>> - devfreq->profile->max_state),
>>> + devfreq->max_state,
>>> + devfreq->max_state),
>>> GFP_KERNEL);
>>> if (!devfreq->stats.trans_table) {
>>> mutex_unlock(&devfreq->lock);
>>> @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
>>> }
>>>
>>> devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
>>> - devfreq->profile->max_state,
>>> + devfreq->max_state,
>>> sizeof(*devfreq->stats.time_in_state),
>>> GFP_KERNEL);
>>> if (!devfreq->stats.time_in_state) {
>>> @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d,
>>>
>>> mutex_lock(&df->lock);
>>>
>>> - for (i = 0; i < df->profile->max_state; i++)
>>> + for (i = 0; i < df->max_state; i++)
>>> count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
>>> - "%lu ", df->profile->freq_table[i]);
>>> + "%lu ", df->freq_table[i]);
>>>
>>> mutex_unlock(&df->lock);
>>> /* Truncate the trailing space */
>>> @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev,
>>>
>>> if (!df->profile)
>>> return -EINVAL;
>>> - max_state = df->profile->max_state;
>>> + max_state = df->max_state;
>>>
>>> if (max_state == 0)
>>> return sprintf(buf, "Not Supported.\n");
>>> @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev,
>>> len += sprintf(buf + len, " :");
>>> for (i = 0; i < max_state; i++)
>>> len += sprintf(buf + len, "%10lu",
>>> - df->profile->freq_table[i]);
>>> + df->freq_table[i]);
>>>
>>> len += sprintf(buf + len, " time(ms)\n");
>>>
>>> for (i = 0; i < max_state; i++) {
>>> - if (df->profile->freq_table[i]
>>> - == df->previous_freq) {
>>> + if (df->freq_table[i] == df->previous_freq)
>>> len += sprintf(buf + len, "*");
>>> - } else {
>>> + else
>>> len += sprintf(buf + len, " ");
>>> - }
>>> - len += sprintf(buf + len, "%10lu:",
>>> - df->profile->freq_table[i]);
>>> +
>>> + len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
>>> for (j = 0; j < max_state; j++)
>>> len += sprintf(buf + len, "%10u",
>>> df->stats.trans_table[(i * max_state) + j]);
>>> @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev,
>>> if (!df->profile)
>>> return -EINVAL;
>>>
>>> - if (df->profile->max_state == 0)
>>> + if (df->max_state == 0)
>>> return count;
>>>
>>> err = kstrtoint(buf, 10, &value);
>>> @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev,
>>> return -EINVAL;
>>>
>>> mutex_lock(&df->lock);
>>> - memset(df->stats.time_in_state, 0, (df->profile->max_state *
>>> + memset(df->stats.time_in_state, 0, (df->max_state *
>>> sizeof(*df->stats.time_in_state)));
>>> memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
>>> - df->profile->max_state,
>>> - df->profile->max_state));
>>> + df->max_state,
>>> + df->max_state));
>>> df->stats.total_trans = 0;
>>> df->stats.last_update = get_jiffies_64();
>>> mutex_unlock(&df->lock);
>>> diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
>>> index 72c67979ebe1..ce24a262aa16 100644
>>> --- a/drivers/devfreq/governor_passive.c
>>> +++ b/drivers/devfreq/governor_passive.c
>>> @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq,
>>> goto out;
>>>
>>> /* Use interpolation if required opps is not available */
>>> - for (i = 0; i < parent_devfreq->profile->max_state; i++)
>>> - if (parent_devfreq->profile->freq_table[i] == *freq)
>>> + for (i = 0; i < parent_devfreq->max_state; i++)
>>> + if (parent_devfreq->freq_table[i] == *freq)
>>> break;
>>>
>>> - if (i == parent_devfreq->profile->max_state)
>>> + if (i == parent_devfreq->max_state)
>>> return -EINVAL;
>>>
>>> - if (i < devfreq->profile->max_state) {
>>> - child_freq = devfreq->profile->freq_table[i];
>>> + if (i < devfreq->max_state) {
>>> + child_freq = devfreq->freq_table[i];
>>> } else {
>>> - count = devfreq->profile->max_state;
>>> - child_freq = devfreq->profile->freq_table[count - 1];
>>> + count = devfreq->max_state;
>>> + child_freq = devfreq->freq_table[count - 1];
>>> }
>>>
>>> out:
>>> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
>>> index dc10bee75a72..34aab4dd336c 100644
>>> --- a/include/linux/devfreq.h
>>> +++ b/include/linux/devfreq.h
>>> @@ -148,6 +148,8 @@ struct devfreq_stats {
>>> * reevaluate operable frequencies. Devfreq users may use
>>> * devfreq.nb to the corresponding register notifier call chain.
>>> * @work: delayed work for load monitoring.
>>> + * @freq_table: current frequency table used by the devfreq driver.
>>> + * @max_state: count of entry present in the frequency table.
>>> * @previous_freq: previously configured frequency value.
>>> * @last_status: devfreq user device info, performance statistics
>>> * @data: Private data of the governor. The devfreq framework does not
>>> @@ -185,6 +187,9 @@ struct devfreq {
>>> struct notifier_block nb;
>>> struct delayed_work work;
>>>
>>> + unsigned long *freq_table;
>>> + unsigned int max_state;
>>> +
>>> unsigned long previous_freq;
>>> struct devfreq_dev_status last_status;
>>>
>> Best regards
>> --
>> Marek Szyprowski, PhD
>> Samsung R&D Institute Poland
>>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland