2024-02-02 09:58:08

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH V2 00/11] hwmon: (coretemp) Fixes, improvements and support for large core count

Patch 1/11 is a bug fix that should be considered as stable material.
Patch 2/11 fixes a user visible sysfs attribute name change.
Patch 3/11 is a quick fix to allow coretemp driver to probe more than
128 cores.
Patch 4/11 - 10/11 are a series of improvements aim to simplify the
code logic and remove unnecessary macros, variables and
structure fields, and make it easier for patch 11/11.
Patch 11/11 converts coretemp driver to use dynamic memory allocation
for core temp_data, so that it is easy to remove the
hardcoded core count limitation when _num_cores_per_package
become available and reliable, which is WIP in
https://lore.kernel.org/all/[email protected]/

I can split the first three patches into a separate patch set if needed.

Patch seris V1 has been posted at
https://lore.kernel.org/all/[email protected]/

thanks,
rui

----------------------------------------------------------------
Zhang Rui (11):
hwmon: (coretemp) Fix out-of-bounds memory access in create_core_data()
hwmon: (coretemp) Fix bogus core to attr mapping
hwmon: (coretemp) Enlarge per package core count limit
hwmon: (coretemp) Introduce enum for attr index
hwmon: (coretemp) Remove unnecessary dependency of array index
hwmon: (coretemp) Replace sensor_device_attribute with device_attribute
hwmon: (coretemp) Remove redundant pdata->cpu_map[]
hwmon: (coretemp) Abstract core_temp helpers
hwmon: (coretemp) Split package temp_data and core temp_data
hwmon: (coretemp) Remove redundant temp_data->is_pkg_data
hwmon: (coretemp) Use dynamic allocated memory for core temp_data

drivers/hwmon/coretemp.c | 219 ++++++++++++++++++++++++++---------------------
1 file changed, 120 insertions(+), 99 deletions(-)



2024-02-02 10:02:57

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH V2 11/11] hwmon: (coretemp) Use dynamic allocated memory for core temp_data

The total memory needed for saving per core temperature data depends on
the number of cores in a package. Using static allocated memory wastes
memories on systems with low per package core count.

Improve the code to use dynamic allocated memory so that it can be
improved further when per package core count information becomes
available.

No functional change intended.

Signed-off-by: Zhang Rui <[email protected]>
---
drivers/hwmon/coretemp.c | 24 +++++++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index e548f2145449..27c98c7faf32 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -91,10 +91,11 @@ struct temp_data {
struct platform_data {
struct device *hwmon_dev;
u16 pkg_id;
+ int nr_cores;
struct ida ida;
struct cpumask cpumask;
struct temp_data *pkg_data;
- struct temp_data *core_data[NUM_REAL_CORES];
+ struct temp_data **core_data;
struct device_attribute name_attr;
};

@@ -480,6 +481,20 @@ init_temp_data(struct platform_data *pdata, unsigned int cpu, int pkg_flag)
{
struct temp_data *tdata;

+ if (!pdata->core_data) {
+ /*
+ * TODO:
+ * The information of actual possible cores in a package is broken for now.
+ * Will replace hardcoded NUM_REAL_CORES with actual per package core count
+ * when this information becomes available.
+ */
+ pdata->nr_cores = NUM_REAL_CORES;
+ pdata->core_data = kcalloc(pdata->nr_cores, sizeof(struct temp_data *),
+ GFP_KERNEL);
+ if (!pdata->core_data)
+ return NULL;
+ }
+
tdata = kzalloc(sizeof(struct temp_data), GFP_KERNEL);
if (!tdata)
return NULL;
@@ -489,7 +504,7 @@ init_temp_data(struct platform_data *pdata, unsigned int cpu, int pkg_flag)
/* Use tdata->index as indicator of package temp data */
tdata->index = -1;
} else {
- tdata->index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL);
+ tdata->index = ida_alloc_max(&pdata->ida, pdata->nr_cores - 1, GFP_KERNEL);
if (tdata->index < 0) {
kfree(tdata);
return NULL;
@@ -510,6 +525,9 @@ static void destroy_temp_data(struct platform_data *pdata, struct temp_data *tda
{
if (is_pkg_temp_data(tdata)) {
pdata->pkg_data = NULL;
+ kfree(pdata->core_data);
+ pdata->core_data = NULL;
+ pdata->nr_cores = 0;
} else {
pdata->core_data[tdata->index] = NULL;
ida_free(&pdata->ida, tdata->index);
@@ -525,7 +543,7 @@ static struct temp_data *get_temp_data(struct platform_data *pdata, int cpu)
if (cpu < 0)
return pdata->pkg_data;

- for (i = 0; i < NUM_REAL_CORES; i++) {
+ for (i = 0; i < pdata->nr_cores; i++) {
if (pdata->core_data[i] &&
pdata->core_data[i]->cpu_core_id == topology_core_id(cpu))
return pdata->core_data[i];
--
2.34.1


2024-02-02 10:04:04

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH V2 07/11] hwmon: (coretemp) Remove redundant pdata->cpu_map[]

pdata->cpu_map[] saves the mapping between cpu core id and the index in
pdata->core_data[]. This is used to find the temp_data structure using
cpu_core_id, by traversing the pdata->cpu_map[] array. But the same goal
can be achieved by traversing the pdata->core_temp[] array directly.

Remove redundant pdata->cpu_map[].

No functional change.

Signed-off-by: Zhang Rui <[email protected]>
---
drivers/hwmon/coretemp.c | 16 ++++++----------
1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index cdd1e069d5c1..29ee8e0c0fe9 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -96,7 +96,6 @@ struct temp_data {
struct platform_data {
struct device *hwmon_dev;
u16 pkg_id;
- u16 cpu_map[NUM_REAL_CORES];
struct ida ida;
struct cpumask cpumask;
struct temp_data *core_data[MAX_CORE_DATA];
@@ -517,7 +516,6 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
if (index < 0)
return index;

- pdata->cpu_map[index] = topology_core_id(cpu);
index += BASE_SYSFS_ATTR_NO;
}

@@ -696,7 +694,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
struct platform_device *pdev = coretemp_get_pdev(cpu);
struct platform_data *pd;
struct temp_data *tdata;
- int i, indx = -1, target;
+ int i, target;

/* No need to tear down any interfaces for suspend */
if (cpuhp_tasks_frozen)
@@ -707,18 +705,16 @@ static int coretemp_cpu_offline(unsigned int cpu)
if (!pd->hwmon_dev)
return 0;

- for (i = 0; i < NUM_REAL_CORES; i++) {
- if (pd->cpu_map[i] == topology_core_id(cpu)) {
- indx = i + BASE_SYSFS_ATTR_NO;
+ for (i = BASE_SYSFS_ATTR_NO; i < MAX_CORE_DATA; i++) {
+ if (pd->core_data[i] && pd->core_data[i]->cpu_core_id == topology_core_id(cpu))
break;
- }
}

/* Too many cores and this core is not populated, just return */
- if (indx < 0)
+ if (i == MAX_CORE_DATA)
return 0;

- tdata = pd->core_data[indx];
+ tdata = pd->core_data[i];

cpumask_clear_cpu(cpu, &pd->cpumask);

@@ -729,7 +725,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
*/
target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
if (target >= nr_cpu_ids) {
- coretemp_remove_core(pd, indx);
+ coretemp_remove_core(pd, i);
} else if (tdata && tdata->cpu == cpu) {
mutex_lock(&tdata->update_lock);
tdata->cpu = target;
--
2.34.1


2024-02-02 18:16:01

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH V2 00/11] hwmon: (coretemp) Fixes, improvements and support for large core count

On 2/2/24 01:21, Zhang Rui wrote:
> Patch 1/11 is a bug fix that should be considered as stable material.
> Patch 2/11 fixes a user visible sysfs attribute name change.
> Patch 3/11 is a quick fix to allow coretemp driver to probe more than
> 128 cores.
> Patch 4/11 - 10/11 are a series of improvements aim to simplify the
> code logic and remove unnecessary macros, variables and
> structure fields, and make it easier for patch 11/11.
> Patch 11/11 converts coretemp driver to use dynamic memory allocation
> for core temp_data, so that it is easy to remove the
> hardcoded core count limitation when _num_cores_per_package
> become available and reliable, which is WIP in
> https://lore.kernel.org/all/[email protected]/
>
> I can split the first three patches into a separate patch set if needed.
>
> Patch seris V1 has been posted at
> https://lore.kernel.org/all/[email protected]/
>

Change log ?

Guenter

> thanks,
> rui
>
> ----------------------------------------------------------------
> Zhang Rui (11):
> hwmon: (coretemp) Fix out-of-bounds memory access in create_core_data()
> hwmon: (coretemp) Fix bogus core to attr mapping
> hwmon: (coretemp) Enlarge per package core count limit
> hwmon: (coretemp) Introduce enum for attr index
> hwmon: (coretemp) Remove unnecessary dependency of array index
> hwmon: (coretemp) Replace sensor_device_attribute with device_attribute
> hwmon: (coretemp) Remove redundant pdata->cpu_map[]
> hwmon: (coretemp) Abstract core_temp helpers
> hwmon: (coretemp) Split package temp_data and core temp_data
> hwmon: (coretemp) Remove redundant temp_data->is_pkg_data
> hwmon: (coretemp) Use dynamic allocated memory for core temp_data
>
> drivers/hwmon/coretemp.c | 219 ++++++++++++++++++++++++++---------------------
> 1 file changed, 120 insertions(+), 99 deletions(-)
>
>


2024-02-03 05:40:26

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH V2 00/11] hwmon: (coretemp) Fixes, improvements and support for large core count

On Fri, 2024-02-02 at 10:15 -0800, Guenter Roeck wrote:
> On 2/2/24 01:21, Zhang Rui wrote:
> > Patch 1/11 is a bug fix that should be considered as stable
> > material.
> > Patch 2/11 fixes a user visible sysfs attribute name change.
> > Patch 3/11 is a quick fix to allow coretemp driver to probe more
> > than
> >             128 cores.
> > Patch 4/11 - 10/11 are a series of improvements aim to simplify the
> >             code logic and remove unnecessary macros, variables and
> >             structure fields, and make it easier for patch 11/11.
> > Patch 11/11 converts coretemp driver to use dynamic memory
> > allocation
> >             for core temp_data, so that it is easy to remove the
> >             hardcoded core count limitation when
> > _num_cores_per_package
> >             become available and reliable, which is WIP in
> >            
> > https://lore.kernel.org/all/[email protected]/
> >
> > I can split the first three patches into a separate patch set if
> > needed.
> >
> > Patch seris V1 has been posted at
> > https://lore.kernel.org/all/[email protected]/
> >
>
> Change log ?

Changes since V1:
- Add two new fixes for issues found during code rewrite.
- Reorder enum coretemp_attr_index to better represent the 
relationship between each element and keep their value in ascending
order. Suggested by Ashok.
- Replace sensor_device_attribute with sensor_device_attribute.
Suggested by Ashok.
- Use a dynamic allocated array to save the per core temperature info
instead of a list. Suggested by Guenter.
- Simplify the logic for handling pkg temp_data and core temp_data
and remove unused marcos.

thanks,
rui

>
> Guenter
>
> > thanks,
> > rui
> >
> > ----------------------------------------------------------------
> > Zhang Rui (11):
> >        hwmon: (coretemp) Fix out-of-bounds memory access in
> > create_core_data()
> >        hwmon: (coretemp) Fix bogus core to attr mapping
> >        hwmon: (coretemp) Enlarge per package core count limit
> >        hwmon: (coretemp) Introduce enum for attr index
> >        hwmon: (coretemp) Remove unnecessary dependency of array
> > index
> >        hwmon: (coretemp) Replace sensor_device_attribute with
> > device_attribute
> >        hwmon: (coretemp) Remove redundant pdata->cpu_map[]
> >        hwmon: (coretemp) Abstract core_temp helpers
> >        hwmon: (coretemp) Split package temp_data and core temp_data
> >        hwmon: (coretemp) Remove redundant temp_data->is_pkg_data
> >        hwmon: (coretemp) Use dynamic allocated memory for core
> > temp_data
> >
> >   drivers/hwmon/coretemp.c | 219 ++++++++++++++++++++++++++--------
> > -------------
> >   1 file changed, 120 insertions(+), 99 deletions(-)
> >
> >
>
>

2024-02-04 14:49:55

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH V2 07/11] hwmon: (coretemp) Remove redundant pdata->cpu_map[]

On Fri, Feb 02, 2024 at 05:21:40PM +0800, Zhang Rui wrote:
> pdata->cpu_map[] saves the mapping between cpu core id and the index in
> pdata->core_data[]. This is used to find the temp_data structure using
> cpu_core_id, by traversing the pdata->cpu_map[] array. But the same goal
> can be achieved by traversing the pdata->core_temp[] array directly.
>
> Remove redundant pdata->cpu_map[].
>
> No functional change.
>
> Signed-off-by: Zhang Rui <[email protected]>

Applied.

Thanks,
Guenter

> ---
> drivers/hwmon/coretemp.c | 16 ++++++----------
> 1 file changed, 6 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index cdd1e069d5c1..29ee8e0c0fe9 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -96,7 +96,6 @@ struct temp_data {
> struct platform_data {
> struct device *hwmon_dev;
> u16 pkg_id;
> - u16 cpu_map[NUM_REAL_CORES];
> struct ida ida;
> struct cpumask cpumask;
> struct temp_data *core_data[MAX_CORE_DATA];
> @@ -517,7 +516,6 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> if (index < 0)
> return index;
>
> - pdata->cpu_map[index] = topology_core_id(cpu);
> index += BASE_SYSFS_ATTR_NO;
> }
>
> @@ -696,7 +694,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
> struct platform_device *pdev = coretemp_get_pdev(cpu);
> struct platform_data *pd;
> struct temp_data *tdata;
> - int i, indx = -1, target;
> + int i, target;
>
> /* No need to tear down any interfaces for suspend */
> if (cpuhp_tasks_frozen)
> @@ -707,18 +705,16 @@ static int coretemp_cpu_offline(unsigned int cpu)
> if (!pd->hwmon_dev)
> return 0;
>
> - for (i = 0; i < NUM_REAL_CORES; i++) {
> - if (pd->cpu_map[i] == topology_core_id(cpu)) {
> - indx = i + BASE_SYSFS_ATTR_NO;
> + for (i = BASE_SYSFS_ATTR_NO; i < MAX_CORE_DATA; i++) {
> + if (pd->core_data[i] && pd->core_data[i]->cpu_core_id == topology_core_id(cpu))
> break;
> - }
> }
>
> /* Too many cores and this core is not populated, just return */
> - if (indx < 0)
> + if (i == MAX_CORE_DATA)
> return 0;
>
> - tdata = pd->core_data[indx];
> + tdata = pd->core_data[i];
>
> cpumask_clear_cpu(cpu, &pd->cpumask);
>
> @@ -729,7 +725,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
> */
> target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
> if (target >= nr_cpu_ids) {
> - coretemp_remove_core(pd, indx);
> + coretemp_remove_core(pd, i);
> } else if (tdata && tdata->cpu == cpu) {
> mutex_lock(&tdata->update_lock);
> tdata->cpu = target;

2024-02-04 14:51:06

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH V2 11/11] hwmon: (coretemp) Use dynamic allocated memory for core temp_data

On Fri, Feb 02, 2024 at 05:21:44PM +0800, Zhang Rui wrote:
> The total memory needed for saving per core temperature data depends on
> the number of cores in a package. Using static allocated memory wastes
> memories on systems with low per package core count.
>
> Improve the code to use dynamic allocated memory so that it can be
> improved further when per package core count information becomes
> available.
>
> No functional change intended.
>
> Signed-off-by: Zhang Rui <[email protected]>

Applied.

Thanks,
Guenter