2023-11-27 13:17:32

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH 0/3] hwmon: (coretemp) Fix core count limitation

Currently, coretemp driver only supports 128 cores per package.
This loses some core temperature information on systems that have more
than 128 cores per package.
[ 58.685033] coretemp coretemp.0: Adding Core 128 failed
[ 58.692009] coretemp coretemp.0: Adding Core 129 failed

To get rid of the fixed length pdata->core_data[] array,
Patch 1/3 and 2/3 removes the dependency of array index in sysfs
attribute callbacks.
Patch 3/3 removes the pdata->core_data[] array and use a per package
list to maintain the per core temperature infomation instead.

-rui

----------------------------------------------------------------
Zhang Rui (3):
hwmon: (coretemp) Introduce enum for attr index
hwmon: (coretemp) Remove unnecessary dependency of array index
hwmon: (coretemp) Fix core count limitation

drivers/hwmon/coretemp.c | 137 +++++++++++++++++++++++------------------------
1 file changed, 67 insertions(+), 70 deletions(-)


2023-11-27 13:17:33

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH 1/3] hwmon: (coretemp) Introduce enum for attr index

Introduce enum coretemp_attr_index to better describe the index of each
sensor attribute and the maximum number of basic/possible attributes.

No functional change.

Signed-off-by: Zhang Rui <[email protected]>
---
drivers/hwmon/coretemp.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index ba82d1e79c13..6053ed3761c2 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -43,10 +43,18 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
-#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
-#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)

+enum coretemp_attr_index {
+ ATTR_LABEL,
+ ATTR_CRIT_ALARM,
+ ATTR_TEMP,
+ ATTR_TJMAX,
+ ATTR_TTARGET,
+ TOTAL_ATTRS, /* Maximum no of possible attrs */
+ MAX_CORE_ATTRS = ATTR_TJMAX + 1 /* Maximum no of basic attrs */
+};
+
#ifdef CONFIG_SMP
#define for_each_sibling(i, cpu) \
for_each_cpu(i, topology_sibling_cpumask(cpu))
--
2.34.1

2023-11-27 13:17:46

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

Currently, coretemp driver only supports 128 cores per package.
This loses some core temperation information on systems that have more
than 128 cores per package.
[ 58.685033] coretemp coretemp.0: Adding Core 128 failed
[ 58.692009] coretemp coretemp.0: Adding Core 129 failed

Fix the problem by using a per package list to maintain the per core
temp_data instead of the fixed length pdata->core_data[] array.

Signed-off-by: Zhang Rui <[email protected]>
---
drivers/hwmon/coretemp.c | 110 ++++++++++++++++++---------------------
1 file changed, 52 insertions(+), 58 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index cef43fedbd58..1bb1a6e4b07b 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -39,11 +39,7 @@ static int force_tjmax;
module_param_named(tjmax, force_tjmax, int, 0444);
MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");

-#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
-#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
-#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)

enum coretemp_attr_index {
ATTR_LABEL,
@@ -90,17 +86,17 @@ struct temp_data {
struct attribute *attrs[TOTAL_ATTRS + 1];
struct attribute_group attr_group;
struct mutex update_lock;
+ struct list_head node;
};

/* Platform Data per Physical CPU */
struct platform_data {
struct device *hwmon_dev;
u16 pkg_id;
- u16 cpu_map[NUM_REAL_CORES];
- struct ida ida;
struct cpumask cpumask;
- struct temp_data *core_data[MAX_CORE_DATA];
struct device_attribute name_attr;
+ struct mutex core_data_lock;
+ struct list_head core_data_list;
};

struct tjmax_pci {
@@ -491,6 +487,23 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
return tdata;
}

+static struct temp_data *get_tdata(struct platform_data *pdata, int cpu)
+{
+ struct temp_data *tdata;
+
+ mutex_lock(&pdata->core_data_lock);
+ list_for_each_entry(tdata, &pdata->core_data_list, node) {
+ if (cpu >= 0 && !tdata->is_pkg_data && tdata->cpu_core_id == topology_core_id(cpu))
+ goto found;
+ if (cpu < 0 && tdata->is_pkg_data)
+ goto found;
+ }
+ tdata = NULL;
+found:
+ mutex_unlock(&pdata->core_data_lock);
+ return tdata;
+}
+
static int create_core_data(struct platform_device *pdev, unsigned int cpu,
int pkg_flag)
{
@@ -498,37 +511,29 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
struct platform_data *pdata = platform_get_drvdata(pdev);
struct cpuinfo_x86 *c = &cpu_data(cpu);
u32 eax, edx;
- int err, index, attr_no;
+ int err, attr_no;

if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
return 0;

+ tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
+ if (tdata)
+ return -EEXIST;
+
+ tdata = init_temp_data(cpu, pkg_flag);
+ if (!tdata)
+ return -ENOMEM;
+
/*
* Find attr number for sysfs:
* We map the attr number to core id of the CPU
* The attr number is always core id + 2
* The Pkgtemp will always show up as temp1_*, if available
*/
- if (pkg_flag) {
- attr_no = PKG_SYSFS_ATTR_NO;
- } else {
- index = ida_alloc(&pdata->ida, GFP_KERNEL);
- if (index < 0)
- return index;
- pdata->cpu_map[index] = topology_core_id(cpu);
- attr_no = index + BASE_SYSFS_ATTR_NO;
- }
-
- if (attr_no > MAX_CORE_DATA - 1) {
- err = -ERANGE;
- goto ida_free;
- }
-
- tdata = init_temp_data(cpu, pkg_flag);
- if (!tdata) {
- err = -ENOMEM;
- goto ida_free;
- }
+ if (pkg_flag)
+ attr_no = 1;
+ else
+ attr_no = tdata->cpu_core_id + 2;

/* Test if we can access the status register */
err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
@@ -547,20 +552,18 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
if (get_ttarget(tdata, &pdev->dev) >= 0)
tdata->attr_size++;

- pdata->core_data[attr_no] = tdata;
-
/* Create sysfs interfaces */
err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
if (err)
goto exit_free;

+ mutex_lock(&pdata->core_data_lock);
+ list_add(&tdata->node, &pdata->core_data_list);
+ mutex_unlock(&pdata->core_data_lock);
+
return 0;
exit_free:
- pdata->core_data[attr_no] = NULL;
kfree(tdata);
-ida_free:
- if (!pkg_flag)
- ida_free(&pdata->ida, index);
return err;
}

@@ -571,9 +574,9 @@ coretemp_add_core(struct platform_device *pdev, unsigned int cpu, int pkg_flag)
dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
}

-static void coretemp_remove_core(struct platform_data *pdata, int indx)
+static void coretemp_remove_core(struct platform_data *pdata, int cpu)
{
- struct temp_data *tdata = pdata->core_data[indx];
+ struct temp_data *tdata = get_tdata(pdata, cpu);

/* if we errored on add then this is already gone */
if (!tdata)
@@ -582,11 +585,11 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
/* Remove the sysfs attributes */
sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);

- kfree(pdata->core_data[indx]);
- pdata->core_data[indx] = NULL;
+ mutex_lock(&pdata->core_data_lock);
+ list_del(&tdata->node);
+ mutex_unlock(&pdata->core_data_lock);

- if (indx >= BASE_SYSFS_ATTR_NO)
- ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
+ kfree(tdata);
}

static int coretemp_device_add(int zoneid)
@@ -601,7 +604,8 @@ static int coretemp_device_add(int zoneid)
return -ENOMEM;

pdata->pkg_id = zoneid;
- ida_init(&pdata->ida);
+ mutex_init(&pdata->core_data_lock);
+ INIT_LIST_HEAD(&pdata->core_data_list);

pdev = platform_device_alloc(DRVNAME, zoneid);
if (!pdev) {
@@ -629,7 +633,6 @@ static void coretemp_device_remove(int zoneid)
struct platform_device *pdev = zone_devices[zoneid];
struct platform_data *pdata = platform_get_drvdata(pdev);

- ida_destroy(&pdata->ida);
kfree(pdata);
platform_device_unregister(pdev);
}
@@ -699,7 +702,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
struct platform_device *pdev = coretemp_get_pdev(cpu);
struct platform_data *pd;
struct temp_data *tdata;
- int i, indx = -1, target;
+ int target;

/* No need to tear down any interfaces for suspend */
if (cpuhp_tasks_frozen)
@@ -710,19 +713,10 @@ static int coretemp_cpu_offline(unsigned int cpu)
if (!pd->hwmon_dev)
return 0;

- for (i = 0; i < NUM_REAL_CORES; i++) {
- if (pd->cpu_map[i] == topology_core_id(cpu)) {
- indx = i + BASE_SYSFS_ATTR_NO;
- break;
- }
- }
-
- /* Too many cores and this core is not populated, just return */
- if (indx < 0)
+ tdata = get_tdata(pd, cpu);
+ if (!tdata)
return 0;

- tdata = pd->core_data[indx];
-
cpumask_clear_cpu(cpu, &pd->cpumask);

/*
@@ -732,20 +726,20 @@ static int coretemp_cpu_offline(unsigned int cpu)
*/
target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
if (target >= nr_cpu_ids) {
- coretemp_remove_core(pd, indx);
- } else if (tdata && tdata->cpu == cpu) {
+ coretemp_remove_core(pd, cpu);
+ } else if (tdata->cpu == cpu) {
mutex_lock(&tdata->update_lock);
tdata->cpu = target;
mutex_unlock(&tdata->update_lock);
}

+ tdata = get_tdata(pd, -1);
/*
* If all cores in this pkg are offline, remove the interface.
*/
- tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
if (cpumask_empty(&pd->cpumask)) {
if (tdata)
- coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
+ coretemp_remove_core(pd, -1);
hwmon_device_unregister(pd->hwmon_dev);
pd->hwmon_dev = NULL;
return 0;
--
2.34.1

2023-11-27 13:18:12

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH 2/3] hwmon: (coretemp) Remove unnecessary dependency of array index

When sensor_device_attribute pointer is available, use container_of() to
get the temp_data address.

This removes the unnecessary dependency of cached index in
pdata->core_data[].

Signed-off-by: Zhang Rui <[email protected]>
---
drivers/hwmon/coretemp.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 6053ed3761c2..cef43fedbd58 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -342,7 +342,7 @@ static ssize_t show_label(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_LABEL]);

if (tdata->is_pkg_data)
return sprintf(buf, "Package id %u\n", pdata->pkg_id);
@@ -355,8 +355,7 @@ static ssize_t show_crit_alarm(struct device *dev,
{
u32 eax, edx;
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_CRIT_ALARM]);

mutex_lock(&tdata->update_lock);
rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
@@ -369,8 +368,7 @@ static ssize_t show_tjmax(struct device *dev,
struct device_attribute *devattr, char *buf)
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TJMAX]);
int tjmax;

mutex_lock(&tdata->update_lock);
@@ -384,8 +382,7 @@ static ssize_t show_ttarget(struct device *dev,
struct device_attribute *devattr, char *buf)
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TTARGET]);
int ttarget;

mutex_lock(&tdata->update_lock);
@@ -402,8 +399,7 @@ static ssize_t show_temp(struct device *dev,
{
u32 eax, edx;
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
- struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TEMP]);
int tjmax;

mutex_lock(&tdata->update_lock);
@@ -445,7 +441,6 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
- tdata->sd_attrs[i].index = attr_no;
tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr;
}
tdata->attr_group.attrs = tdata->attrs;
--
2.34.1

2023-11-30 21:54:09

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 1/3] hwmon: (coretemp) Introduce enum for attr index

On Mon, Nov 27, 2023 at 09:16:49PM +0800, Zhang Rui wrote:
> Introduce enum coretemp_attr_index to better describe the index of each
> sensor attribute and the maximum number of basic/possible attributes.
>
> No functional change.
>
> Signed-off-by: Zhang Rui <[email protected]>
> ---
> drivers/hwmon/coretemp.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index ba82d1e79c13..6053ed3761c2 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -43,10 +43,18 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
> #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
> #define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
> #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
> -#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
> -#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
> #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>
> +enum coretemp_attr_index {
> + ATTR_LABEL,
> + ATTR_CRIT_ALARM,
> + ATTR_TEMP,
> + ATTR_TJMAX,
> + ATTR_TTARGET,
> + TOTAL_ATTRS, /* Maximum no of possible attrs */
> + MAX_CORE_ATTRS = ATTR_TJMAX + 1 /* Maximum no of basic attrs */

This seems odd. TOTAL_ATTRS being the last entry seems fine, but defining a
MAX_CORE_ATTR the way above sounds a bit hacky.

> +};
> +
> #ifdef CONFIG_SMP
> #define for_each_sibling(i, cpu) \
> for_each_cpu(i, topology_sibling_cpumask(cpu))
> --
> 2.34.1
>

2023-12-01 01:29:42

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 2/3] hwmon: (coretemp) Remove unnecessary dependency of array index

On Mon, Nov 27, 2023 at 09:16:50PM +0800, Zhang Rui wrote:
> When sensor_device_attribute pointer is available, use container_of() to
> get the temp_data address.
>
> This removes the unnecessary dependency of cached index in
> pdata->core_data[].
>
> Signed-off-by: Zhang Rui <[email protected]>
> ---
> drivers/hwmon/coretemp.c | 15 +++++----------
> 1 file changed, 5 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index 6053ed3761c2..cef43fedbd58 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -342,7 +342,7 @@ static ssize_t show_label(struct device *dev,
> {
> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> struct platform_data *pdata = dev_get_drvdata(dev);
> - struct temp_data *tdata = pdata->core_data[attr->index];
> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_LABEL]);
>
> if (tdata->is_pkg_data)
> return sprintf(buf, "Package id %u\n", pdata->pkg_id);
> @@ -355,8 +355,7 @@ static ssize_t show_crit_alarm(struct device *dev,
> {
> u32 eax, edx;
> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> - struct platform_data *pdata = dev_get_drvdata(dev);
> - struct temp_data *tdata = pdata->core_data[attr->index];
> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_CRIT_ALARM]);
>
> mutex_lock(&tdata->update_lock);
> rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
> @@ -369,8 +368,7 @@ static ssize_t show_tjmax(struct device *dev,
> struct device_attribute *devattr, char *buf)
> {
> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> - struct platform_data *pdata = dev_get_drvdata(dev);
> - struct temp_data *tdata = pdata->core_data[attr->index];
> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TJMAX]);
> int tjmax;
>
> mutex_lock(&tdata->update_lock);
> @@ -384,8 +382,7 @@ static ssize_t show_ttarget(struct device *dev,
> struct device_attribute *devattr, char *buf)
> {
> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> - struct platform_data *pdata = dev_get_drvdata(dev);
> - struct temp_data *tdata = pdata->core_data[attr->index];
> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TTARGET]);
> int ttarget;
>
> mutex_lock(&tdata->update_lock);
> @@ -402,8 +399,7 @@ static ssize_t show_temp(struct device *dev,
> {
> u32 eax, edx;
> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> - struct platform_data *pdata = dev_get_drvdata(dev);
> - struct temp_data *tdata = pdata->core_data[attr->index];
> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TEMP]);
> int tjmax;
>
> mutex_lock(&tdata->update_lock);
> @@ -445,7 +441,6 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
> tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
> tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
> tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
> - tdata->sd_attrs[i].index = attr_no;

I was naively thinking if we could nuke that "index". I can see that used
in couple macros, but seems like we can lose it?

Completely untested.. and uncertain :-)


diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h
index d896713359cd..4855893f9401 100644
--- a/include/linux/hwmon-sysfs.h
+++ b/include/linux/hwmon-sysfs.h
@@ -12,36 +12,35 @@

struct sensor_device_attribute{
struct device_attribute dev_attr;
- int index;
};
#define to_sensor_dev_attr(_dev_attr) \
container_of(_dev_attr, struct sensor_device_attribute, dev_attr)

-#define SENSOR_ATTR(_name, _mode, _show, _store, _index) \
+#define SENSOR_ATTR(_name, _mode, _show, _store) \
{ .dev_attr = __ATTR(_name, _mode, _show, _store), \
- .index = _index }
+ }

-#define SENSOR_ATTR_RO(_name, _func, _index) \
+#define SENSOR_ATTR_RO(_name, _func) \
SENSOR_ATTR(_name, 0444, _func##_show, NULL, _index)

-#define SENSOR_ATTR_RW(_name, _func, _index) \
- SENSOR_ATTR(_name, 0644, _func##_show, _func##_store, _index)
+#define SENSOR_ATTR_RW(_name, _func) \
+ SENSOR_ATTR(_name, 0644, _func##_show, _func##_store)

-#define SENSOR_ATTR_WO(_name, _func, _index) \
- SENSOR_ATTR(_name, 0200, NULL, _func##_store, _index)
+#define SENSOR_ATTR_WO(_name, _func) \
+ SENSOR_ATTR(_name, 0200, NULL, _func##_store)

-#define SENSOR_DEVICE_ATTR(_name, _mode, _show, _store, _index) \
+#define SENSOR_DEVICE_ATTR(_name, _mode, _show, _store) \
struct sensor_device_attribute sensor_dev_attr_##_name \
- = SENSOR_ATTR(_name, _mode, _show, _store, _index)
+ = SENSOR_ATTR(_name, _mode, _show, _store)

-#define SENSOR_DEVICE_ATTR_RO(_name, _func, _index) \
- SENSOR_DEVICE_ATTR(_name, 0444, _func##_show, NULL, _index)
+#define SENSOR_DEVICE_ATTR_RO(_name, _func) \
+ SENSOR_DEVICE_ATTR(_name, 0444, _func##_show, NULL)

#define SENSOR_DEVICE_ATTR_RW(_name, _func, _index) \
- SENSOR_DEVICE_ATTR(_name, 0644, _func##_show, _func##_store, _index)
+ SENSOR_DEVICE_ATTR(_name, 0644, _func##_show, _func##_store)

-#define SENSOR_DEVICE_ATTR_WO(_name, _func, _index) \
- SENSOR_DEVICE_ATTR(_name, 0200, NULL, _func##_store, _index)
+#define SENSOR_DEVICE_ATTR_WO(_name, _func) \
+ SENSOR_DEVICE_ATTR(_name, 0200, NULL, _func##_store)

struct sensor_device_attribute_2 {
struct device_attribute dev_attr;
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 975da8e7f2a9..c3bbf2f7d6eb 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -239,7 +239,7 @@ hwm_power1_max_interval_store(struct device *dev,

static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
hwm_power1_max_interval_show,
- hwm_power1_max_interval_store, 0);
+ hwm_power1_max_interval_store);

static struct attribute *hwm_attributes[] = {
&sensor_dev_attr_power1_max_interval.dev_attr.attr,

2023-12-01 02:11:14

by Ashok Raj

[permalink] [raw]
Subject: Re: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> Currently, coretemp driver only supports 128 cores per package.
> This loses some core temperation information on systems that have more

s/temperation/temperature

> than 128 cores per package.
> [ 58.685033] coretemp coretemp.0: Adding Core 128 failed
> [ 58.692009] coretemp coretemp.0: Adding Core 129 failed
>
> Fix the problem by using a per package list to maintain the per core
> temp_data instead of the fixed length pdata->core_data[] array.
>
> Signed-off-by: Zhang Rui <[email protected]>
> ---
> drivers/hwmon/coretemp.c | 110 ++++++++++++++++++---------------------
> 1 file changed, 52 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index cef43fedbd58..1bb1a6e4b07b 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -39,11 +39,7 @@ static int force_tjmax;
> module_param_named(tjmax, force_tjmax, int, 0444);
> MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>
> -#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
> -#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
> -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
> #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
> -#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>
> enum coretemp_attr_index {
> ATTR_LABEL,
> @@ -90,17 +86,17 @@ struct temp_data {
> struct attribute *attrs[TOTAL_ATTRS + 1];
> struct attribute_group attr_group;
> struct mutex update_lock;
> + struct list_head node;
> };
>
> /* Platform Data per Physical CPU */
> struct platform_data {
> struct device *hwmon_dev;
> u16 pkg_id;
> - u16 cpu_map[NUM_REAL_CORES];
> - struct ida ida;
> struct cpumask cpumask;
> - struct temp_data *core_data[MAX_CORE_DATA];
> struct device_attribute name_attr;
> + struct mutex core_data_lock;
> + struct list_head core_data_list;
> };
>
> struct tjmax_pci {
> @@ -491,6 +487,23 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
> return tdata;
> }
>
> +static struct temp_data *get_tdata(struct platform_data *pdata, int cpu)
> +{
> + struct temp_data *tdata;
> +
> + mutex_lock(&pdata->core_data_lock);
> + list_for_each_entry(tdata, &pdata->core_data_list, node) {
> + if (cpu >= 0 && !tdata->is_pkg_data && tdata->cpu_core_id == topology_core_id(cpu))
> + goto found;
> + if (cpu < 0 && tdata->is_pkg_data)
> + goto found;
> + }
> + tdata = NULL;

What used to be an array, is now a list? Is it possible to get the number
of cores_per_package during initialization and allocate the per-core? You
can still get them indexing from core_id and you can possibly lose the
mutex and search?

I don't know this code well enough... Just a thought.

> +found:
> + mutex_unlock(&pdata->core_data_lock);
> + return tdata;
> +}
> +
> static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> int pkg_flag)
> {
> @@ -498,37 +511,29 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> struct platform_data *pdata = platform_get_drvdata(pdev);
> struct cpuinfo_x86 *c = &cpu_data(cpu);
> u32 eax, edx;
> - int err, index, attr_no;
> + int err, attr_no;
>
> if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> return 0;
>
> + tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> + if (tdata)
> + return -EEXIST;
> +
> + tdata = init_temp_data(cpu, pkg_flag);

Is temp_data per_cpu or per_core? Wasn't sure if temp_data needs a CPU
number there along with cpu_core_id


> + if (!tdata)
> + return -ENOMEM;
> +
> /*
> * Find attr number for sysfs:
> * We map the attr number to core id of the CPU
> * The attr number is always core id + 2
> * The Pkgtemp will always show up as temp1_*, if available
> */
> - if (pkg_flag) {
> - attr_no = PKG_SYSFS_ATTR_NO;
> - } else {
> - index = ida_alloc(&pdata->ida, GFP_KERNEL);
> - if (index < 0)
> - return index;
> - pdata->cpu_map[index] = topology_core_id(cpu);
> - attr_no = index + BASE_SYSFS_ATTR_NO;
> - }
> -
> - if (attr_no > MAX_CORE_DATA - 1) {
> - err = -ERANGE;
> - goto ida_free;
> - }
> -
> - tdata = init_temp_data(cpu, pkg_flag);
> - if (!tdata) {
> - err = -ENOMEM;
> - goto ida_free;
> - }
> + if (pkg_flag)
> + attr_no = 1;
> + else
> + attr_no = tdata->cpu_core_id + 2;
>
> /* Test if we can access the status register */
> err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
> @@ -547,20 +552,18 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> if (get_ttarget(tdata, &pdev->dev) >= 0)
> tdata->attr_size++;
>
> - pdata->core_data[attr_no] = tdata;
> -
> /* Create sysfs interfaces */
> err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
> if (err)
> goto exit_free;
>
> + mutex_lock(&pdata->core_data_lock);
> + list_add(&tdata->node, &pdata->core_data_list);
> + mutex_unlock(&pdata->core_data_lock);
> +
> return 0;
> exit_free:
> - pdata->core_data[attr_no] = NULL;
> kfree(tdata);
> -ida_free:
> - if (!pkg_flag)
> - ida_free(&pdata->ida, index);
> return err;
> }
>
> @@ -571,9 +574,9 @@ coretemp_add_core(struct platform_device *pdev, unsigned int cpu, int pkg_flag)
> dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
> }
>
> -static void coretemp_remove_core(struct platform_data *pdata, int indx)
> +static void coretemp_remove_core(struct platform_data *pdata, int cpu)
> {
> - struct temp_data *tdata = pdata->core_data[indx];
> + struct temp_data *tdata = get_tdata(pdata, cpu);
>
> /* if we errored on add then this is already gone */
> if (!tdata)
> @@ -582,11 +585,11 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
> /* Remove the sysfs attributes */
> sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
>
> - kfree(pdata->core_data[indx]);
> - pdata->core_data[indx] = NULL;
> + mutex_lock(&pdata->core_data_lock);
> + list_del(&tdata->node);
> + mutex_unlock(&pdata->core_data_lock);
>
> - if (indx >= BASE_SYSFS_ATTR_NO)
> - ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
> + kfree(tdata);
> }
>
> static int coretemp_device_add(int zoneid)
> @@ -601,7 +604,8 @@ static int coretemp_device_add(int zoneid)
> return -ENOMEM;
>
> pdata->pkg_id = zoneid;
> - ida_init(&pdata->ida);
> + mutex_init(&pdata->core_data_lock);
> + INIT_LIST_HEAD(&pdata->core_data_list);
>
> pdev = platform_device_alloc(DRVNAME, zoneid);
> if (!pdev) {
> @@ -629,7 +633,6 @@ static void coretemp_device_remove(int zoneid)
> struct platform_device *pdev = zone_devices[zoneid];
> struct platform_data *pdata = platform_get_drvdata(pdev);
>
> - ida_destroy(&pdata->ida);
> kfree(pdata);
> platform_device_unregister(pdev);
> }
> @@ -699,7 +702,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
> struct platform_device *pdev = coretemp_get_pdev(cpu);
> struct platform_data *pd;
> struct temp_data *tdata;
> - int i, indx = -1, target;
> + int target;
>
> /* No need to tear down any interfaces for suspend */
> if (cpuhp_tasks_frozen)
> @@ -710,19 +713,10 @@ static int coretemp_cpu_offline(unsigned int cpu)
> if (!pd->hwmon_dev)
> return 0;
>
> - for (i = 0; i < NUM_REAL_CORES; i++) {
> - if (pd->cpu_map[i] == topology_core_id(cpu)) {
> - indx = i + BASE_SYSFS_ATTR_NO;
> - break;
> - }
> - }
> -
> - /* Too many cores and this core is not populated, just return */
> - if (indx < 0)
> + tdata = get_tdata(pd, cpu);
> + if (!tdata)
> return 0;
>
> - tdata = pd->core_data[indx];
> -
> cpumask_clear_cpu(cpu, &pd->cpumask);
>
> /*
> @@ -732,20 +726,20 @@ static int coretemp_cpu_offline(unsigned int cpu)
> */
> target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
> if (target >= nr_cpu_ids) {
> - coretemp_remove_core(pd, indx);
> - } else if (tdata && tdata->cpu == cpu) {
> + coretemp_remove_core(pd, cpu);
> + } else if (tdata->cpu == cpu) {
> mutex_lock(&tdata->update_lock);
> tdata->cpu = target;
> mutex_unlock(&tdata->update_lock);
> }
>
> + tdata = get_tdata(pd, -1);
> /*
> * If all cores in this pkg are offline, remove the interface.
> */
> - tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
> if (cpumask_empty(&pd->cpumask)) {
> if (tdata)
> - coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
> + coretemp_remove_core(pd, -1);
> hwmon_device_unregister(pd->hwmon_dev);
> pd->hwmon_dev = NULL;
> return 0;
> --
> 2.34.1
>

2023-12-01 03:06:32

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

On 11/27/23 05:16, Zhang Rui wrote:
> Currently, coretemp driver only supports 128 cores per package.
> This loses some core temperation information on systems that have more
> than 128 cores per package.
> [ 58.685033] coretemp coretemp.0: Adding Core 128 failed
> [ 58.692009] coretemp coretemp.0: Adding Core 129 failed
>
> Fix the problem by using a per package list to maintain the per core
> temp_data instead of the fixed length pdata->core_data[] array.
>
> Signed-off-by: Zhang Rui <[email protected]>
> ---
> drivers/hwmon/coretemp.c | 110 ++++++++++++++++++---------------------
> 1 file changed, 52 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index cef43fedbd58..1bb1a6e4b07b 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -39,11 +39,7 @@ static int force_tjmax;
> module_param_named(tjmax, force_tjmax, int, 0444);
> MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>
> -#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
> -#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
> -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
> #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
> -#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>
> enum coretemp_attr_index {
> ATTR_LABEL,
> @@ -90,17 +86,17 @@ struct temp_data {
> struct attribute *attrs[TOTAL_ATTRS + 1];
> struct attribute_group attr_group;
> struct mutex update_lock;
> + struct list_head node;
> };
>
> /* Platform Data per Physical CPU */
> struct platform_data {
> struct device *hwmon_dev;
> u16 pkg_id;
> - u16 cpu_map[NUM_REAL_CORES];
> - struct ida ida;
> struct cpumask cpumask;
> - struct temp_data *core_data[MAX_CORE_DATA];
> struct device_attribute name_attr;
> + struct mutex core_data_lock;
> + struct list_head core_data_list;
> };
>
> struct tjmax_pci {
> @@ -491,6 +487,23 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
> return tdata;
> }
>
> +static struct temp_data *get_tdata(struct platform_data *pdata, int cpu)
> +{
> + struct temp_data *tdata;
> +
> + mutex_lock(&pdata->core_data_lock);
> + list_for_each_entry(tdata, &pdata->core_data_list, node) {
> + if (cpu >= 0 && !tdata->is_pkg_data && tdata->cpu_core_id == topology_core_id(cpu))
> + goto found;
> + if (cpu < 0 && tdata->is_pkg_data)
> + goto found;
> + }

I really don't like this. With 128+ cores, it gets terribly expensive.

How about calculating the number of cores in the probe function and
allocating cpu_map[] and core_data[] instead ?

Thanks,
Guenter

2023-12-01 03:26:42

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 2/3] hwmon: (coretemp) Remove unnecessary dependency of array index

On 11/30/23 17:27, Ashok Raj wrote:
> On Mon, Nov 27, 2023 at 09:16:50PM +0800, Zhang Rui wrote:
>> When sensor_device_attribute pointer is available, use container_of() to
>> get the temp_data address.
>>
>> This removes the unnecessary dependency of cached index in
>> pdata->core_data[].
>>
>> Signed-off-by: Zhang Rui <[email protected]>
>> ---
>> drivers/hwmon/coretemp.c | 15 +++++----------
>> 1 file changed, 5 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
>> index 6053ed3761c2..cef43fedbd58 100644
>> --- a/drivers/hwmon/coretemp.c
>> +++ b/drivers/hwmon/coretemp.c
>> @@ -342,7 +342,7 @@ static ssize_t show_label(struct device *dev,
>> {
>> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
>> struct platform_data *pdata = dev_get_drvdata(dev);
>> - struct temp_data *tdata = pdata->core_data[attr->index];
>> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_LABEL]);
>>
>> if (tdata->is_pkg_data)
>> return sprintf(buf, "Package id %u\n", pdata->pkg_id);
>> @@ -355,8 +355,7 @@ static ssize_t show_crit_alarm(struct device *dev,
>> {
>> u32 eax, edx;
>> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
>> - struct platform_data *pdata = dev_get_drvdata(dev);
>> - struct temp_data *tdata = pdata->core_data[attr->index];
>> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_CRIT_ALARM]);
>>
>> mutex_lock(&tdata->update_lock);
>> rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
>> @@ -369,8 +368,7 @@ static ssize_t show_tjmax(struct device *dev,
>> struct device_attribute *devattr, char *buf)
>> {
>> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
>> - struct platform_data *pdata = dev_get_drvdata(dev);
>> - struct temp_data *tdata = pdata->core_data[attr->index];
>> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TJMAX]);
>> int tjmax;
>>
>> mutex_lock(&tdata->update_lock);
>> @@ -384,8 +382,7 @@ static ssize_t show_ttarget(struct device *dev,
>> struct device_attribute *devattr, char *buf)
>> {
>> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
>> - struct platform_data *pdata = dev_get_drvdata(dev);
>> - struct temp_data *tdata = pdata->core_data[attr->index];
>> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TTARGET]);
>> int ttarget;
>>
>> mutex_lock(&tdata->update_lock);
>> @@ -402,8 +399,7 @@ static ssize_t show_temp(struct device *dev,
>> {
>> u32 eax, edx;
>> struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
>> - struct platform_data *pdata = dev_get_drvdata(dev);
>> - struct temp_data *tdata = pdata->core_data[attr->index];
>> + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TEMP]);
>> int tjmax;
>>
>> mutex_lock(&tdata->update_lock);
>> @@ -445,7 +441,6 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
>> tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
>> tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
>> tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
>> - tdata->sd_attrs[i].index = attr_no;
>
> I was naively thinking if we could nuke that "index". I can see that used
> in couple macros, but seems like we can lose it?
>
> Completely untested.. and uncertain :-)
>

If you had suggested to replace
struct sensor_device_attribute sd_attrs[TOTAL_ATTRS];
with
struct device_attribute sd_attrs[TOTAL_ATTRS];
what you suggested may actually be possible and make sense. However,
suggesting to dump the index parameter of SENSOR_ATTR() completely
because _this_ driver may no longer need it seems to be a little excessive.

>
> diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h
> index d896713359cd..4855893f9401 100644
> --- a/include/linux/hwmon-sysfs.h
> +++ b/include/linux/hwmon-sysfs.h
> @@ -12,36 +12,35 @@
>
> struct sensor_device_attribute{
> struct device_attribute dev_attr;
> - int index;
> };
> #define to_sensor_dev_attr(_dev_attr) \
> container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
>
> -#define SENSOR_ATTR(_name, _mode, _show, _store, _index) \
> +#define SENSOR_ATTR(_name, _mode, _show, _store) \
> { .dev_attr = __ATTR(_name, _mode, _show, _store), \
> - .index = _index }
> + }
>
> -#define SENSOR_ATTR_RO(_name, _func, _index) \
> +#define SENSOR_ATTR_RO(_name, _func) \
> SENSOR_ATTR(_name, 0444, _func##_show, NULL, _index)
>
> -#define SENSOR_ATTR_RW(_name, _func, _index) \
> - SENSOR_ATTR(_name, 0644, _func##_show, _func##_store, _index)
> +#define SENSOR_ATTR_RW(_name, _func) \
> + SENSOR_ATTR(_name, 0644, _func##_show, _func##_store)
>
> -#define SENSOR_ATTR_WO(_name, _func, _index) \
> - SENSOR_ATTR(_name, 0200, NULL, _func##_store, _index)
> +#define SENSOR_ATTR_WO(_name, _func) \
> + SENSOR_ATTR(_name, 0200, NULL, _func##_store)
>
> -#define SENSOR_DEVICE_ATTR(_name, _mode, _show, _store, _index) \
> +#define SENSOR_DEVICE_ATTR(_name, _mode, _show, _store) \
> struct sensor_device_attribute sensor_dev_attr_##_name \
> - = SENSOR_ATTR(_name, _mode, _show, _store, _index)
> + = SENSOR_ATTR(_name, _mode, _show, _store)
>
> -#define SENSOR_DEVICE_ATTR_RO(_name, _func, _index) \
> - SENSOR_DEVICE_ATTR(_name, 0444, _func##_show, NULL, _index)
> +#define SENSOR_DEVICE_ATTR_RO(_name, _func) \
> + SENSOR_DEVICE_ATTR(_name, 0444, _func##_show, NULL)
>
> #define SENSOR_DEVICE_ATTR_RW(_name, _func, _index) \
> - SENSOR_DEVICE_ATTR(_name, 0644, _func##_show, _func##_store, _index)
> + SENSOR_DEVICE_ATTR(_name, 0644, _func##_show, _func##_store)
>
> -#define SENSOR_DEVICE_ATTR_WO(_name, _func, _index) \
> - SENSOR_DEVICE_ATTR(_name, 0200, NULL, _func##_store, _index)
> +#define SENSOR_DEVICE_ATTR_WO(_name, _func) \
> + SENSOR_DEVICE_ATTR(_name, 0200, NULL, _func##_store)
>
> struct sensor_device_attribute_2 {
> struct device_attribute dev_attr;
> diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> index 975da8e7f2a9..c3bbf2f7d6eb 100644
> --- a/drivers/gpu/drm/i915/i915_hwmon.c
> +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> @@ -239,7 +239,7 @@ hwm_power1_max_interval_store(struct device *dev,
>
> static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
> hwm_power1_max_interval_show,
> - hwm_power1_max_interval_store, 0);
> + hwm_power1_max_interval_store);
>

That driver could and should have used DEVICE_ATTR() instead of
SENSOR_DEVICE_ATTR(), and there are various other drivers where
that would have made sense. Actually, it should have used
DEVICE_ATTR_RW() but that is just a detail.

However, there are more than 2,000 uses of SENSOR_DEVICE_ATTR() and derived
macros in the kernel. The large majority of those do set index to values != 0,
and the affected drivers would not be happy if that argument disappeared.

Frankly, I am not entirely sure if you were serious with your suggestion.
I tried to give a serious reply, but I am not entirely sure if I succeeded.
My apologies if some sarcasm was bleeding through.

Guenter

2023-12-01 04:15:02

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 1/3] hwmon: (coretemp) Introduce enum for attr index

On 11/30/23 13:51, Ashok Raj wrote:
> On Mon, Nov 27, 2023 at 09:16:49PM +0800, Zhang Rui wrote:
>> Introduce enum coretemp_attr_index to better describe the index of each
>> sensor attribute and the maximum number of basic/possible attributes.
>>
>> No functional change.
>>
>> Signed-off-by: Zhang Rui <[email protected]>
>> ---
>> drivers/hwmon/coretemp.c | 12 ++++++++++--
>> 1 file changed, 10 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
>> index ba82d1e79c13..6053ed3761c2 100644
>> --- a/drivers/hwmon/coretemp.c
>> +++ b/drivers/hwmon/coretemp.c
>> @@ -43,10 +43,18 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>> #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
>> #define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
>> #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
>> -#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
>> -#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
>> #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>>
>> +enum coretemp_attr_index {
>> + ATTR_LABEL,
>> + ATTR_CRIT_ALARM,
>> + ATTR_TEMP,
>> + ATTR_TJMAX,
>> + ATTR_TTARGET,
>> + TOTAL_ATTRS, /* Maximum no of possible attrs */
>> + MAX_CORE_ATTRS = ATTR_TJMAX + 1 /* Maximum no of basic attrs */
>
> This seems odd. TOTAL_ATTRS being the last entry seems fine, but defining a
> MAX_CORE_ATTR the way above sounds a bit hacky.
>

Complaining is easy. What do you suggest that would be better ?

Guenter

>> +};
>> +
>> #ifdef CONFIG_SMP
>> #define for_each_sibling(i, cpu) \
>> for_each_cpu(i, topology_sibling_cpumask(cpu))
>> --
>> 2.34.1
>>
>

2023-12-01 04:34:49

by Raj, Ashok

[permalink] [raw]
Subject: Re: [PATCH 2/3] hwmon: (coretemp) Remove unnecessary dependency of array index

On Thu, Nov 30, 2023 at 07:26:31PM -0800, Guenter Roeck wrote:
> On 11/30/23 17:27, Ashok Raj wrote:
> > On Mon, Nov 27, 2023 at 09:16:50PM +0800, Zhang Rui wrote:
> > > When sensor_device_attribute pointer is available, use container_of() to
> > > get the temp_data address.
> > >
> > > This removes the unnecessary dependency of cached index in
> > > pdata->core_data[].
> > >
> > > Signed-off-by: Zhang Rui <[email protected]>
> > > ---
> > > drivers/hwmon/coretemp.c | 15 +++++----------
> > > 1 file changed, 5 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> > > index 6053ed3761c2..cef43fedbd58 100644
> > > --- a/drivers/hwmon/coretemp.c
> > > +++ b/drivers/hwmon/coretemp.c
> > > @@ -342,7 +342,7 @@ static ssize_t show_label(struct device *dev,
> > > {
> > > struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> > > struct platform_data *pdata = dev_get_drvdata(dev);
> > > - struct temp_data *tdata = pdata->core_data[attr->index];
> > > + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_LABEL]);
> > > if (tdata->is_pkg_data)
> > > return sprintf(buf, "Package id %u\n", pdata->pkg_id);
> > > @@ -355,8 +355,7 @@ static ssize_t show_crit_alarm(struct device *dev,
> > > {
> > > u32 eax, edx;
> > > struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> > > - struct platform_data *pdata = dev_get_drvdata(dev);
> > > - struct temp_data *tdata = pdata->core_data[attr->index];
> > > + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_CRIT_ALARM]);
> > > mutex_lock(&tdata->update_lock);
> > > rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
> > > @@ -369,8 +368,7 @@ static ssize_t show_tjmax(struct device *dev,
> > > struct device_attribute *devattr, char *buf)
> > > {
> > > struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> > > - struct platform_data *pdata = dev_get_drvdata(dev);
> > > - struct temp_data *tdata = pdata->core_data[attr->index];
> > > + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TJMAX]);
> > > int tjmax;
> > > mutex_lock(&tdata->update_lock);
> > > @@ -384,8 +382,7 @@ static ssize_t show_ttarget(struct device *dev,
> > > struct device_attribute *devattr, char *buf)
> > > {
> > > struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> > > - struct platform_data *pdata = dev_get_drvdata(dev);
> > > - struct temp_data *tdata = pdata->core_data[attr->index];
> > > + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TTARGET]);
> > > int ttarget;
> > > mutex_lock(&tdata->update_lock);
> > > @@ -402,8 +399,7 @@ static ssize_t show_temp(struct device *dev,
> > > {
> > > u32 eax, edx;
> > > struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> > > - struct platform_data *pdata = dev_get_drvdata(dev);
> > > - struct temp_data *tdata = pdata->core_data[attr->index];
> > > + struct temp_data *tdata = container_of(attr, struct temp_data, sd_attrs[ATTR_TEMP]);
> > > int tjmax;
> > > mutex_lock(&tdata->update_lock);
> > > @@ -445,7 +441,6 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev,
> > > tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i];
> > > tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
> > > tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
> > > - tdata->sd_attrs[i].index = attr_no;
> >
> > I was naively thinking if we could nuke that "index". I can see that used
> > in couple macros, but seems like we can lose it?
> >
> > Completely untested.. and uncertain :-)
> >
>
> If you had suggested to replace
> struct sensor_device_attribute sd_attrs[TOTAL_ATTRS];
> with
> struct device_attribute sd_attrs[TOTAL_ATTRS];
> what you suggested may actually be possible and make sense. However,
> suggesting to dump the index parameter of SENSOR_ATTR() completely
> because _this_ driver may no longer need it seems to be a little excessive.

I should have highlighted the uncertain :-).. Said naively thinking to add
color that I'm calling it blind. But what you suggest might make more
sense.

I was just suggesting if there is more cleanup that could be accomplished along
with this might be a good thing.

I tried a quick and dirty cleanup.. apparently it was more dirty I guess
:-)

>
> >
> > diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h
> > index d896713359cd..4855893f9401 100644
> > --- a/include/linux/hwmon-sysfs.h
> > +++ b/include/linux/hwmon-sysfs.h
> > @@ -12,36 +12,35 @@
> > struct sensor_device_attribute{
> > struct device_attribute dev_attr;
> > - int index;
> > };
> > #define to_sensor_dev_attr(_dev_attr) \
> > container_of(_dev_attr, struct sensor_device_attribute, dev_attr)
> > -#define SENSOR_ATTR(_name, _mode, _show, _store, _index) \
> > +#define SENSOR_ATTR(_name, _mode, _show, _store) \
> > { .dev_attr = __ATTR(_name, _mode, _show, _store), \
> > - .index = _index }
> > + }
> > -#define SENSOR_ATTR_RO(_name, _func, _index) \
> > +#define SENSOR_ATTR_RO(_name, _func) \
> > SENSOR_ATTR(_name, 0444, _func##_show, NULL, _index)
> > -#define SENSOR_ATTR_RW(_name, _func, _index) \
> > - SENSOR_ATTR(_name, 0644, _func##_show, _func##_store, _index)
> > +#define SENSOR_ATTR_RW(_name, _func) \
> > + SENSOR_ATTR(_name, 0644, _func##_show, _func##_store)
> > -#define SENSOR_ATTR_WO(_name, _func, _index) \
> > - SENSOR_ATTR(_name, 0200, NULL, _func##_store, _index)
> > +#define SENSOR_ATTR_WO(_name, _func) \
> > + SENSOR_ATTR(_name, 0200, NULL, _func##_store)
> > -#define SENSOR_DEVICE_ATTR(_name, _mode, _show, _store, _index) \
> > +#define SENSOR_DEVICE_ATTR(_name, _mode, _show, _store) \
> > struct sensor_device_attribute sensor_dev_attr_##_name \
> > - = SENSOR_ATTR(_name, _mode, _show, _store, _index)
> > + = SENSOR_ATTR(_name, _mode, _show, _store)
> > -#define SENSOR_DEVICE_ATTR_RO(_name, _func, _index) \
> > - SENSOR_DEVICE_ATTR(_name, 0444, _func##_show, NULL, _index)
> > +#define SENSOR_DEVICE_ATTR_RO(_name, _func) \
> > + SENSOR_DEVICE_ATTR(_name, 0444, _func##_show, NULL)
> > #define SENSOR_DEVICE_ATTR_RW(_name, _func, _index) \
> > - SENSOR_DEVICE_ATTR(_name, 0644, _func##_show, _func##_store, _index)
> > + SENSOR_DEVICE_ATTR(_name, 0644, _func##_show, _func##_store)
> > -#define SENSOR_DEVICE_ATTR_WO(_name, _func, _index) \
> > - SENSOR_DEVICE_ATTR(_name, 0200, NULL, _func##_store, _index)
> > +#define SENSOR_DEVICE_ATTR_WO(_name, _func) \
> > + SENSOR_DEVICE_ATTR(_name, 0200, NULL, _func##_store)
> > struct sensor_device_attribute_2 {
> > struct device_attribute dev_attr;
> > diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
> > index 975da8e7f2a9..c3bbf2f7d6eb 100644
> > --- a/drivers/gpu/drm/i915/i915_hwmon.c
> > +++ b/drivers/gpu/drm/i915/i915_hwmon.c
> > @@ -239,7 +239,7 @@ hwm_power1_max_interval_store(struct device *dev,
> > static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
> > hwm_power1_max_interval_show,
> > - hwm_power1_max_interval_store, 0);
> > + hwm_power1_max_interval_store);
>
> That driver could and should have used DEVICE_ATTR() instead of
> SENSOR_DEVICE_ATTR(), and there are various other drivers where
> that would have made sense. Actually, it should have used
> DEVICE_ATTR_RW() but that is just a detail.
>
> However, there are more than 2,000 uses of SENSOR_DEVICE_ATTR() and derived
> macros in the kernel. The large majority of those do set index to values != 0,
> and the affected drivers would not be happy if that argument disappeared.
>
> Frankly, I am not entirely sure if you were serious with your suggestion.

Certainly can't be serious.. but I was hinting at additional cleanups.. but
I picked the wrong one obviously.

> I tried to give a serious reply, but I am not entirely sure if I succeeded.
> My apologies if some sarcasm was bleeding through.

:-)... sarcasm is OK..

2023-12-01 04:47:37

by Raj, Ashok

[permalink] [raw]
Subject: Re: [PATCH 1/3] hwmon: (coretemp) Introduce enum for attr index

On Thu, Nov 30, 2023 at 08:14:48PM -0800, Guenter Roeck wrote:
> On 11/30/23 13:51, Ashok Raj wrote:
> > On Mon, Nov 27, 2023 at 09:16:49PM +0800, Zhang Rui wrote:
> > > Introduce enum coretemp_attr_index to better describe the index of each
> > > sensor attribute and the maximum number of basic/possible attributes.
> > >
> > > No functional change.
> > >
> > > Signed-off-by: Zhang Rui <[email protected]>
> > > ---
> > > drivers/hwmon/coretemp.c | 12 ++++++++++--
> > > 1 file changed, 10 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> > > index ba82d1e79c13..6053ed3761c2 100644
> > > --- a/drivers/hwmon/coretemp.c
> > > +++ b/drivers/hwmon/coretemp.c
> > > @@ -43,10 +43,18 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
> > > #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
> > > #define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
> > > #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
> > > -#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
> > > -#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
> > > #define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
> > > +enum coretemp_attr_index {
> > > + ATTR_LABEL,
> > > + ATTR_CRIT_ALARM,
> > > + ATTR_TEMP,
> > > + ATTR_TJMAX,
> > > + ATTR_TTARGET,
> > > + TOTAL_ATTRS, /* Maximum no of possible attrs */
> > > + MAX_CORE_ATTRS = ATTR_TJMAX + 1 /* Maximum no of basic attrs */
> >
> > This seems odd. TOTAL_ATTRS being the last entry seems fine, but defining a
> > MAX_CORE_ATTR the way above sounds a bit hacky.
> >
>
> Complaining is easy. What do you suggest that would be better ?
>
Fair enough.

How about

ATTR_LABEL,
ATTR_CRIT_ALARM,
ATTR_TEMP,
ATTR_TJMAX,
MAX_CORE_ATTRS, /* One more than TJMAX */
ATTR_TTARGET = MAX_CORE_ATTRS,
TOTAL_ATTRS

Each enum can be assigned any value, but this way they are just increasing
order.


2023-12-01 17:30:02

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH 1/3] hwmon: (coretemp) Introduce enum for attr index

On Thu, 2023-11-30 at 20:47 -0800, Ashok Raj wrote:
> On Thu, Nov 30, 2023 at 08:14:48PM -0800, Guenter Roeck wrote:
> > On 11/30/23 13:51, Ashok Raj wrote:
> > > On Mon, Nov 27, 2023 at 09:16:49PM +0800, Zhang Rui wrote:
> > > > Introduce enum coretemp_attr_index to better describe the index
> > > > of each
> > > > sensor attribute and the maximum number of basic/possible
> > > > attributes.
> > > >
> > > > No functional change.
> > > >
> > > > Signed-off-by: Zhang Rui <[email protected]>
> > > > ---
> > > >   drivers/hwmon/coretemp.c | 12 ++++++++++--
> > > >   1 file changed, 10 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/hwmon/coretemp.c
> > > > b/drivers/hwmon/coretemp.c
> > > > index ba82d1e79c13..6053ed3761c2 100644
> > > > --- a/drivers/hwmon/coretemp.c
> > > > +++ b/drivers/hwmon/coretemp.c
> > > > @@ -43,10 +43,18 @@ MODULE_PARM_DESC(tjmax, "TjMax value in
> > > > degrees Celsius");
> > > >   #define BASE_SYSFS_ATTR_NO    2       /* Sysfs Base attr no
> > > > for coretemp */
> > > >   #define NUM_REAL_CORES                128     /* Number of
> > > > Real cores per cpu */
> > > >   #define CORETEMP_NAME_LENGTH  28      /* String Length of
> > > > attrs */
> > > > -#define MAX_CORE_ATTRS         4       /* Maximum no of basic
> > > > attrs */
> > > > -#define TOTAL_ATTRS            (MAX_CORE_ATTRS + 1)
> > > >   #define MAX_CORE_DATA         (NUM_REAL_CORES +
> > > > BASE_SYSFS_ATTR_NO)
> > > > +enum coretemp_attr_index {
> > > > +       ATTR_LABEL,
> > > > +       ATTR_CRIT_ALARM,
> > > > +       ATTR_TEMP,
> > > > +       ATTR_TJMAX,
> > > > +       ATTR_TTARGET,
> > > > +       TOTAL_ATTRS,                    /* Maximum no of
> > > > possible attrs */
> > > > +       MAX_CORE_ATTRS = ATTR_TJMAX + 1 /* Maximum no of basic
> > > > attrs */
> > >
> > > This seems odd. TOTAL_ATTRS being the last entry seems fine, but
> > > defining a
> > > MAX_CORE_ATTR the way above sounds a bit hacky.
> > >
> >
> > Complaining is easy. What do you suggest that would be better ?
> >
> Fair enough.
>
> How about
>
> ATTR_LABEL,
> ATTR_CRIT_ALARM,
> ATTR_TEMP,
> ATTR_TJMAX,
> MAX_CORE_ATTRS,         /* One more than TJMAX */
> ATTR_TTARGET = MAX_CORE_ATTRS,
> TOTAL_ATTRS
>
> Each enum can be assigned any value, but this way they are just
> increasing
> order.

ATTR_TTARGET is the next attribute after ATTR_TJMAX so it should be
right after ATTR_TJMAX.
MAX_CORE_ATTRS is the number of basic attributes so it should be
ATTR_TJMAX + 1.
TOTAL_ATTRS is the number of possible attributes so it should be
ATTR_TTARGET + 1

ATTR_LABEL, // 0
ATTR_CRIT_ALARM, // 1
ATTR_TEMP, // 2
ATTR_TJMAX, // 3
ATTR_TTARGET, // 4
MAX_CORE_ATTRS = ATTR_TJMAX + 1, // 4
TOTAL_ATTRS = ATTR_TTARGET + 1, // 5

How about this one?

thanks,
rui

2023-12-01 17:31:49

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH 2/3] hwmon: (coretemp) Remove unnecessary dependency of array index


> > >         mutex_lock(&tdata->update_lock);
> > > @@ -445,7 +441,6 @@ static int create_core_attrs(struct temp_data
> > > *tdata, struct device *dev,
> > >                 tdata->sd_attrs[i].dev_attr.attr.name = tdata-
> > > >attr_name[i];
> > >                 tdata->sd_attrs[i].dev_attr.attr.mode = 0444;
> > >                 tdata->sd_attrs[i].dev_attr.show = rd_ptr[i];
> > > -               tdata->sd_attrs[i].index = attr_no;
> >
> > I was naively thinking if we could nuke that "index". I can see
> > that used
> > in couple macros, but seems like we can lose it?
> >
> > Completely untested.. and uncertain :-)
> >
>
> If you had suggested to replace
>         struct sensor_device_attribute sd_attrs[TOTAL_ATTRS];
> with
>         struct device_attribute sd_attrs[TOTAL_ATTRS];
> what you suggested may actually be possible and make sense.

Too late for me today.
Let me check if I can convert it to use device_attribute instead
tomorrow.

thanks,
rui

2023-12-01 17:41:38

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

On Thu, 2023-11-30 at 19:06 -0800, Guenter Roeck wrote:
> On 11/27/23 05:16, Zhang Rui wrote:
> > Currently, coretemp driver only supports 128 cores per package.
> > This loses some core temperation information on systems that have
> > more
> > than 128 cores per package.
> >   [   58.685033] coretemp coretemp.0: Adding Core 128 failed
> >   [   58.692009] coretemp coretemp.0: Adding Core 129 failed
> >
> > Fix the problem by using a per package list to maintain the per
> > core
> > temp_data instead of the fixed length pdata->core_data[] array.
> >
> > Signed-off-by: Zhang Rui <[email protected]>
> > ---
> >   drivers/hwmon/coretemp.c | 110 ++++++++++++++++++----------------
> > -----
> >   1 file changed, 52 insertions(+), 58 deletions(-)
> >
> > diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> > index cef43fedbd58..1bb1a6e4b07b 100644
> > --- a/drivers/hwmon/coretemp.c
> > +++ b/drivers/hwmon/coretemp.c
> > @@ -39,11 +39,7 @@ static int force_tjmax;
> >   module_param_named(tjmax, force_tjmax, int, 0444);
> >   MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
> >  
> > -#define PKG_SYSFS_ATTR_NO      1       /* Sysfs attribute for
> > package temp */
> > -#define BASE_SYSFS_ATTR_NO     2       /* Sysfs Base attr no for
> > coretemp */
> > -#define NUM_REAL_CORES         128     /* Number of Real cores per
> > cpu */
> >   #define CORETEMP_NAME_LENGTH  28      /* String Length of attrs
> > */
> > -#define MAX_CORE_DATA          (NUM_REAL_CORES +
> > BASE_SYSFS_ATTR_NO)
> >  
> >   enum coretemp_attr_index {
> >         ATTR_LABEL,
> > @@ -90,17 +86,17 @@ struct temp_data {
> >         struct attribute *attrs[TOTAL_ATTRS + 1];
> >         struct attribute_group attr_group;
> >         struct mutex update_lock;
> > +       struct list_head node;
> >   };
> >  
> >   /* Platform Data per Physical CPU */
> >   struct platform_data {
> >         struct device           *hwmon_dev;
> >         u16                     pkg_id;
> > -       u16                     cpu_map[NUM_REAL_CORES];
> > -       struct ida              ida;
> >         struct cpumask          cpumask;
> > -       struct temp_data        *core_data[MAX_CORE_DATA];
> >         struct device_attribute name_attr;
> > +       struct mutex            core_data_lock;
> > +       struct list_head        core_data_list;
> >   };
> >  
> >   struct tjmax_pci {
> > @@ -491,6 +487,23 @@ static struct temp_data
> > *init_temp_data(unsigned int cpu, int pkg_flag)
> >         return tdata;
> >   }
> >  
> > +static struct temp_data *get_tdata(struct platform_data *pdata,
> > int cpu)
> > +{
> > +       struct temp_data *tdata;
> > +
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_for_each_entry(tdata, &pdata->core_data_list, node) {
> > +               if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > >cpu_core_id == topology_core_id(cpu))
> > +                       goto found;
> > +               if (cpu < 0 && tdata->is_pkg_data)
> > +                       goto found;
> > +       }
>
> I really don't like this. With 128+ cores, it gets terribly
> expensive.

I think this is only invoked in the cpu online/offline code, which is
not the hot path.

>
> How about calculating the number of cores in the probe function and
> allocating cpu_map[] and core_data[] instead ?

Problem is that the number of cores is not available due to some
limitations in current CPU topology code.

Thomas has some patch series to fix this but that has not been merged
yet and we don't know when.

A simpler workaround for this issue is to change NUM_REAL_CORES to a
bigger value, and do dynamic memory allocation first. And we can change
the code to use the real number of cores later if that information
becomes available.

thanks,
rui

2023-12-01 17:47:59

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

On Thu, 2023-11-30 at 18:08 -0800, Ashok Raj wrote:
> On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> > Currently, coretemp driver only supports 128 cores per package.
> > This loses some core temperation information on systems that have
> > more
>
> s/temperation/temperature
>
> > than 128 cores per package.
> >  [   58.685033] coretemp coretemp.0: Adding Core 128 failed
> >  [   58.692009] coretemp coretemp.0: Adding Core 129 failed
> >
> > Fix the problem by using a per package list to maintain the per
> > core
> > temp_data instead of the fixed length pdata->core_data[] array.
> >
> > Signed-off-by: Zhang Rui <[email protected]>
> > ---
> >  drivers/hwmon/coretemp.c | 110 ++++++++++++++++++-----------------
> > ----
> >  1 file changed, 52 insertions(+), 58 deletions(-)
> >
> > diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> > index cef43fedbd58..1bb1a6e4b07b 100644
> > --- a/drivers/hwmon/coretemp.c
> > +++ b/drivers/hwmon/coretemp.c
> > @@ -39,11 +39,7 @@ static int force_tjmax;
> >  module_param_named(tjmax, force_tjmax, int, 0444);
> >  MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
> >  
> > -#define PKG_SYSFS_ATTR_NO      1       /* Sysfs attribute for
> > package temp */
> > -#define BASE_SYSFS_ATTR_NO     2       /* Sysfs Base attr no for
> > coretemp */
> > -#define NUM_REAL_CORES         128     /* Number of Real cores per
> > cpu */
> >  #define CORETEMP_NAME_LENGTH   28      /* String Length of attrs
> > */
> > -#define MAX_CORE_DATA          (NUM_REAL_CORES +
> > BASE_SYSFS_ATTR_NO)
> >  
> >  enum coretemp_attr_index {
> >         ATTR_LABEL,
> > @@ -90,17 +86,17 @@ struct temp_data {
> >         struct attribute *attrs[TOTAL_ATTRS + 1];
> >         struct attribute_group attr_group;
> >         struct mutex update_lock;
> > +       struct list_head node;
> >  };
> >  
> >  /* Platform Data per Physical CPU */
> >  struct platform_data {
> >         struct device           *hwmon_dev;
> >         u16                     pkg_id;
> > -       u16                     cpu_map[NUM_REAL_CORES];
> > -       struct ida              ida;
> >         struct cpumask          cpumask;
> > -       struct temp_data        *core_data[MAX_CORE_DATA];
> >         struct device_attribute name_attr;
> > +       struct mutex            core_data_lock;
> > +       struct list_head        core_data_list;
> >  };
> >  
> >  struct tjmax_pci {
> > @@ -491,6 +487,23 @@ static struct temp_data
> > *init_temp_data(unsigned int cpu, int pkg_flag)
> >         return tdata;
> >  }
> >  
> > +static struct temp_data *get_tdata(struct platform_data *pdata,
> > int cpu)
> > +{
> > +       struct temp_data *tdata;
> > +
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_for_each_entry(tdata, &pdata->core_data_list, node) {
> > +               if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > >cpu_core_id == topology_core_id(cpu))
> > +                       goto found;
> > +               if (cpu < 0 && tdata->is_pkg_data)
> > +                       goto found;
> > +       }
> > +       tdata = NULL;
>
> What used to be an array, is now a list? Is it possible to get the
> number
> of cores_per_package during initialization and allocate the per-core?
> You
> can still get them indexing from core_id and you can possibly lose
> the
> mutex and search?
>
> I don't know this code well enough... Just a thought.

yeah, sadly cores_per_package is not available for now as I mentioned
in the other email.

>
> > +found:
> > +       mutex_unlock(&pdata->core_data_lock);
> > +       return tdata;
> > +}
> > +
> >  static int create_core_data(struct platform_device *pdev, unsigned
> > int cpu,
> >                             int pkg_flag)
> >  {
> > @@ -498,37 +511,29 @@ static int create_core_data(struct
> > platform_device *pdev, unsigned int cpu,
> >         struct platform_data *pdata = platform_get_drvdata(pdev);
> >         struct cpuinfo_x86 *c = &cpu_data(cpu);
> >         u32 eax, edx;
> > -       int err, index, attr_no;
> > +       int err, attr_no;
> >  
> >         if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> >                 return 0;
> >  
> > +       tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> > +       if (tdata)
> > +               return -EEXIST;
> > +
> > +       tdata = init_temp_data(cpu, pkg_flag);
>
> Is temp_data per_cpu or per_core?

it is per_core.

> Wasn't sure if temp_data needs a CPU
> number there along with cpu_core_id

CPU number is needed to access the core temperature MSRs.

thanks,
rui

>
>
> > +       if (!tdata)
> > +               return -ENOMEM;
> > +
> >         /*
> >          * Find attr number for sysfs:
> >          * We map the attr number to core id of the CPU
> >          * The attr number is always core id + 2
> >          * The Pkgtemp will always show up as temp1_*, if available
> >          */
> > -       if (pkg_flag) {
> > -               attr_no = PKG_SYSFS_ATTR_NO;
> > -       } else {
> > -               index = ida_alloc(&pdata->ida, GFP_KERNEL);
> > -               if (index < 0)
> > -                       return index;
> > -               pdata->cpu_map[index] = topology_core_id(cpu);
> > -               attr_no = index + BASE_SYSFS_ATTR_NO;
> > -       }
> > -
> > -       if (attr_no > MAX_CORE_DATA - 1) {
> > -               err = -ERANGE;
> > -               goto ida_free;
> > -       }
> > -
> > -       tdata = init_temp_data(cpu, pkg_flag);
> > -       if (!tdata) {
> > -               err = -ENOMEM;
> > -               goto ida_free;
> > -       }
> > +       if (pkg_flag)
> > +               attr_no = 1;
> > +       else
> > +               attr_no = tdata->cpu_core_id + 2;
> >  
> >         /* Test if we can access the status register */
> >         err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax,
> > &edx);
> > @@ -547,20 +552,18 @@ static int create_core_data(struct
> > platform_device *pdev, unsigned int cpu,
> >                 if (get_ttarget(tdata, &pdev->dev) >= 0)
> >                         tdata->attr_size++;
> >  
> > -       pdata->core_data[attr_no] = tdata;
> > -
> >         /* Create sysfs interfaces */
> >         err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
> >         if (err)
> >                 goto exit_free;
> >  
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_add(&tdata->node, &pdata->core_data_list);
> > +       mutex_unlock(&pdata->core_data_lock);
> > +
> >         return 0;
> >  exit_free:
> > -       pdata->core_data[attr_no] = NULL;
> >         kfree(tdata);
> > -ida_free:
> > -       if (!pkg_flag)
> > -               ida_free(&pdata->ida, index);
> >         return err;
> >  }
> >  
> > @@ -571,9 +574,9 @@ coretemp_add_core(struct platform_device *pdev,
> > unsigned int cpu, int pkg_flag)
> >                 dev_err(&pdev->dev, "Adding Core %u failed\n",
> > cpu);
> >  }
> >  
> > -static void coretemp_remove_core(struct platform_data *pdata, int
> > indx)
> > +static void coretemp_remove_core(struct platform_data *pdata, int
> > cpu)
> >  {
> > -       struct temp_data *tdata = pdata->core_data[indx];
> > +       struct temp_data *tdata = get_tdata(pdata, cpu);
> >  
> >         /* if we errored on add then this is already gone */
> >         if (!tdata)
> > @@ -582,11 +585,11 @@ static void coretemp_remove_core(struct
> > platform_data *pdata, int indx)
> >         /* Remove the sysfs attributes */
> >         sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata-
> > >attr_group);
> >  
> > -       kfree(pdata->core_data[indx]);
> > -       pdata->core_data[indx] = NULL;
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_del(&tdata->node);
> > +       mutex_unlock(&pdata->core_data_lock);
> >  
> > -       if (indx >= BASE_SYSFS_ATTR_NO)
> > -               ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
> > +       kfree(tdata);
> >  }
> >  
> >  static int coretemp_device_add(int zoneid)
> > @@ -601,7 +604,8 @@ static int coretemp_device_add(int zoneid)
> >                 return -ENOMEM;
> >  
> >         pdata->pkg_id = zoneid;
> > -       ida_init(&pdata->ida);
> > +       mutex_init(&pdata->core_data_lock);
> > +       INIT_LIST_HEAD(&pdata->core_data_list);
> >  
> >         pdev = platform_device_alloc(DRVNAME, zoneid);
> >         if (!pdev) {
> > @@ -629,7 +633,6 @@ static void coretemp_device_remove(int zoneid)
> >         struct platform_device *pdev = zone_devices[zoneid];
> >         struct platform_data *pdata = platform_get_drvdata(pdev);
> >  
> > -       ida_destroy(&pdata->ida);
> >         kfree(pdata);
> >         platform_device_unregister(pdev);
> >  }
> > @@ -699,7 +702,7 @@ static int coretemp_cpu_offline(unsigned int
> > cpu)
> >         struct platform_device *pdev = coretemp_get_pdev(cpu);
> >         struct platform_data *pd;
> >         struct temp_data *tdata;
> > -       int i, indx = -1, target;
> > +       int target;
> >  
> >         /* No need to tear down any interfaces for suspend */
> >         if (cpuhp_tasks_frozen)
> > @@ -710,19 +713,10 @@ static int coretemp_cpu_offline(unsigned int
> > cpu)
> >         if (!pd->hwmon_dev)
> >                 return 0;
> >  
> > -       for (i = 0; i < NUM_REAL_CORES; i++) {
> > -               if (pd->cpu_map[i] == topology_core_id(cpu)) {
> > -                       indx = i + BASE_SYSFS_ATTR_NO;
> > -                       break;
> > -               }
> > -       }
> > -
> > -       /* Too many cores and this core is not populated, just
> > return */
> > -       if (indx < 0)
> > +       tdata = get_tdata(pd, cpu);
> > +       if (!tdata)
> >                 return 0;
> >  
> > -       tdata = pd->core_data[indx];
> > -
> >         cpumask_clear_cpu(cpu, &pd->cpumask);
> >  
> >         /*
> > @@ -732,20 +726,20 @@ static int coretemp_cpu_offline(unsigned int
> > cpu)
> >          */
> >         target = cpumask_any_and(&pd->cpumask,
> > topology_sibling_cpumask(cpu));
> >         if (target >= nr_cpu_ids) {
> > -               coretemp_remove_core(pd, indx);
> > -       } else if (tdata && tdata->cpu == cpu) {
> > +               coretemp_remove_core(pd, cpu);
> > +       } else if (tdata->cpu == cpu) {
> >                 mutex_lock(&tdata->update_lock);
> >                 tdata->cpu = target;
> >                 mutex_unlock(&tdata->update_lock);
> >         }
> >  
> > +       tdata = get_tdata(pd, -1);
> >         /*
> >          * If all cores in this pkg are offline, remove the
> > interface.
> >          */
> > -       tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
> >         if (cpumask_empty(&pd->cpumask)) {
> >                 if (tdata)
> > -                       coretemp_remove_core(pd,
> > PKG_SYSFS_ATTR_NO);
> > +                       coretemp_remove_core(pd, -1);
> >                 hwmon_device_unregister(pd->hwmon_dev);
> >                 pd->hwmon_dev = NULL;
> >                 return 0;
> > --
> > 2.34.1
> >

2023-12-01 22:58:40

by Raj, Ashok

[permalink] [raw]
Subject: Re: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

On Fri, Dec 01, 2023 at 09:47:30AM -0800, Zhang, Rui wrote:
> On Thu, 2023-11-30 at 18:08 -0800, Ashok Raj wrote:
> > On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> > > ?
> > > +static struct temp_data *get_tdata(struct platform_data *pdata,
> > > int cpu)
> > > +{
> > > +???????struct temp_data *tdata;
> > > +
> > > +???????mutex_lock(&pdata->core_data_lock);
> > > +???????list_for_each_entry(tdata, &pdata->core_data_list, node) {
> > > +???????????????if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > > >cpu_core_id == topology_core_id(cpu))
> > > +???????????????????????goto found;
> > > +???????????????if (cpu < 0 && tdata->is_pkg_data)
> > > +???????????????????????goto found;
> > > +???????}
> > > +???????tdata = NULL;
> >
> > What used to be an array, is now a list? Is it possible to get the
> > number
> > of cores_per_package during initialization and allocate the per-core?
> > You
> > can still get them indexing from core_id and you can possibly lose
> > the
> > mutex and search?
> >
> > I don't know this code well enough... Just a thought.
>
> yeah, sadly cores_per_package is not available for now as I mentioned
> in the other email.

Couldn't we reuse the logic from Thomas's topology work that gives this
upfront based on 0x1f?

>
> >
> > > +found:
> > > +???????mutex_unlock(&pdata->core_data_lock);
> > > +???????return tdata;
> > > +}
> > > +
> > > ?static int create_core_data(struct platform_device *pdev, unsigned
> > > int cpu,
> > > ??????????????????????????? int pkg_flag)
> > > ?{
> > > @@ -498,37 +511,29 @@ static int create_core_data(struct
> > > platform_device *pdev, unsigned int cpu,
> > > ????????struct platform_data *pdata = platform_get_drvdata(pdev);
> > > ????????struct cpuinfo_x86 *c = &cpu_data(cpu);
> > > ????????u32 eax, edx;
> > > -???????int err, index, attr_no;
> > > +???????int err, attr_no;
> > > ?
> > > ????????if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> > > ????????????????return 0;
> > > ?
> > > +???????tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> > > +???????if (tdata)
> > > +???????????????return -EEXIST;
> > > +
> > > +???????tdata = init_temp_data(cpu, pkg_flag);
> >
> > Is temp_data per_cpu or per_core?
>
> it is per_core.
>
> > Wasn't sure if temp_data needs a CPU
> > number there along with cpu_core_id
>
> CPU number is needed to access the core temperature MSRs.

What if that cpu is currently offline? maybe you can simply search
for_each_online_cpu() and match core_id from cpuinfo_x86?

2023-12-02 07:46:25

by Zhang, Rui

[permalink] [raw]
Subject: Re: [PATCH 3/3] hwmon: (coretemp) Fix core count limitation

On Fri, 2023-12-01 at 14:58 -0800, Ashok Raj wrote:
> On Fri, Dec 01, 2023 at 09:47:30AM -0800, Zhang, Rui wrote:
> > On Thu, 2023-11-30 at 18:08 -0800, Ashok Raj wrote:
> > > On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> > > >  
> > > > +static struct temp_data *get_tdata(struct platform_data
> > > > *pdata,
> > > > int cpu)
> > > > +{
> > > > +       struct temp_data *tdata;
> > > > +
> > > > +       mutex_lock(&pdata->core_data_lock);
> > > > +       list_for_each_entry(tdata, &pdata->core_data_list,
> > > > node) {
> > > > +               if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > > > > cpu_core_id == topology_core_id(cpu))
> > > > +                       goto found;
> > > > +               if (cpu < 0 && tdata->is_pkg_data)
> > > > +                       goto found;
> > > > +       }
> > > > +       tdata = NULL;
> > >
> > > What used to be an array, is now a list? Is it possible to get
> > > the
> > > number
> > > of cores_per_package during initialization and allocate the per-
> > > core?
> > > You
> > > can still get them indexing from core_id and you can possibly
> > > lose
> > > the
> > > mutex and search?
> > >
> > > I don't know this code well enough... Just a thought.
> >
> > yeah, sadly cores_per_package is not available for now as I
> > mentioned
> > in the other email.
>
> Couldn't we reuse the logic from Thomas's topology work that gives
> this
> upfront based on 0x1f?

this sounds duplicate work to me. To me, there is no such an urgent
requirement that is worth this whole effort.

>
> >
> > >
> > > > +found:
> > > > +       mutex_unlock(&pdata->core_data_lock);
> > > > +       return tdata;
> > > > +}
> > > > +
> > > >  static int create_core_data(struct platform_device *pdev,
> > > > unsigned
> > > > int cpu,
> > > >                             int pkg_flag)
> > > >  {
> > > > @@ -498,37 +511,29 @@ static int create_core_data(struct
> > > > platform_device *pdev, unsigned int cpu,
> > > >         struct platform_data *pdata =
> > > > platform_get_drvdata(pdev);
> > > >         struct cpuinfo_x86 *c = &cpu_data(cpu);
> > > >         u32 eax, edx;
> > > > -       int err, index, attr_no;
> > > > +       int err, attr_no;
> > > >  
> > > >         if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> > > >                 return 0;
> > > >  
> > > > +       tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> > > > +       if (tdata)
> > > > +               return -EEXIST;
> > > > +
> > > > +       tdata = init_temp_data(cpu, pkg_flag);
> > >
> > > Is temp_data per_cpu or per_core?
> >
> > it is per_core.
> >
> > >  Wasn't sure if temp_data needs a CPU
> > > number there along with cpu_core_id
> >
> > CPU number is needed to access the core temperature MSRs.
>
> What if that cpu is currently offline? maybe you can simply search
> for_each_online_cpu() and match core_id from cpuinfo_x86?
>
This is already handled in the cpu online/offline callbacks.
Each temp_data is always associated with an online CPU that can be used
to update the core temperature info.

thanks,
rui

2023-12-05 19:20:50

by Raj, Ashok

[permalink] [raw]
Subject: Re: [PATCH 1/3] hwmon: (coretemp) Introduce enum for attr index

On Fri, Dec 01, 2023 at 09:29:24AM -0800, Zhang, Rui wrote:

[snip]

> >
> > How about
> >
> > ATTR_LABEL,
> > ATTR_CRIT_ALARM,
> > ATTR_TEMP,
> > ATTR_TJMAX,
> > MAX_CORE_ATTRS,?????????/* One more than TJMAX */
> > ATTR_TTARGET = MAX_CORE_ATTRS,
> > TOTAL_ATTRS
> >
> > Each enum can be assigned any value, but this way they are just
> > increasing
> > order.
>
> ATTR_TTARGET is the next attribute after ATTR_TJMAX so it should be
> right after ATTR_TJMAX.
> MAX_CORE_ATTRS is the number of basic attributes so it should be
> ATTR_TJMAX + 1.
> TOTAL_ATTRS is the number of possible attributes so it should be
> ATTR_TTARGET + 1
>
> ATTR_LABEL, // 0
> ATTR_CRIT_ALARM, // 1
> ATTR_TEMP, // 2
> ATTR_TJMAX, // 3
> ATTR_TTARGET, // 4
> MAX_CORE_ATTRS = ATTR_TJMAX + 1, // 4
> TOTAL_ATTRS = ATTR_TTARGET + 1, // 5
>
> How about this one?

Sorry for the delay... yes, this sounds fine.