LinuxLists.cc - [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

2015-07-15 16:05:17

Subject: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

Removes the limits of supported CPU cores and max core ID.

Patch is based on Kirill A. Shutemov's work from 2012.

Signed-off-by: Lukasz Odzioba <[email protected]>
---
drivers/hwmon/coretemp.c | 120 ++++++++++++++++++++++++++++-----------------
1 files changed, 75 insertions(+), 45 deletions(-)

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 3e03379..c39ce14 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -52,11 +52,10 @@ module_param_named(tjmax, force_tjmax, int, 0444);
MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");

#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES 32 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 19 /* String Length of attrs */
#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
-#define MAX_CORE_DATA (NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
+#define PACKAGE_ID 1 /* Magic number of physical cpu */

#define TO_PHYS_ID(cpu) (cpu_data(cpu).phys_proc_id)
#define TO_CORE_ID(cpu) (cpu_data(cpu).cpu_core_id)
@@ -71,18 +70,20 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");

/*
* Per-Core Temperature Data
+ * @id: If this is equal PACKAGE_ID, structure contains package temperature
+ data, otherwise it is just TO_ATTR_NO(cpu)
* @last_updated: The time when the current temperature value was updated
* earlier (in jiffies).
* @cpu_core_id: The CPU Core from which temperature values should be read
* This value is passed as "id" field to rdmsr/wrmsr functions.
* @status_reg: One of IA32_THERM_STATUS or IA32_PACKAGE_THERM_STATUS,
* from where the temperature values should be read.
- * @attr_size: Total number of pre-core attrs displayed in the sysfs.
- * @is_pkg_data: If this is 1, the temp_data holds pkgtemp data.
- * Otherwise, temp_data holds coretemp data.
+ * @attr_size: Total number of per-core attrs displayed in the sysfs.
* @valid: If this is 1, the current temperature is valid.
*/
struct temp_data {
+ struct list_head list;
+ int id;
int temp;
int ttarget;
int tjmax;
@@ -104,7 +105,8 @@ struct temp_data {
struct platform_data {
struct device *hwmon_dev;
u16 phys_proc_id;
- struct temp_data *core_data[MAX_CORE_DATA];
+ struct list_head temp_data_list;
+ struct mutex temp_data_lock;
struct device_attribute name_attr;
};

@@ -117,12 +119,26 @@ struct pdev_entry {
static LIST_HEAD(pdev_list);
static DEFINE_MUTEX(pdev_list_mutex);

+static struct temp_data *get_temp_data(struct platform_data *pdata, int id)
+{
+ struct temp_data *tdata;
+
+ mutex_lock(&pdata->temp_data_lock);
+ list_for_each_entry(tdata, &pdata->temp_data_list, list)
+ if (tdata->id == id) {
+ mutex_unlock(&pdata->temp_data_lock);
+ return tdata;
+ }
+ mutex_unlock(&pdata->temp_data_lock);
+ return NULL;
+}
+
static ssize_t show_label(struct device *dev,
struct device_attribute *devattr, char *buf)
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = get_temp_data(pdata, attr->index);

if (tdata->is_pkg_data)
return sprintf(buf, "Physical id %u\n", pdata->phys_proc_id);
@@ -136,7 +152,7 @@ static ssize_t show_crit_alarm(struct device *dev,
u32 eax, edx;
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = get_temp_data(pdata, attr->index);

rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);

@@ -148,8 +164,9 @@ static ssize_t show_tjmax(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct platform_data *pdata = dev_get_drvdata(dev);
+ struct temp_data *tdata = get_temp_data(pdata, attr->index);

- return sprintf(buf, "%d\n", pdata->core_data[attr->index]->tjmax);
+ return sprintf(buf, "%d\n", tdata->tjmax);
}

static ssize_t show_ttarget(struct device *dev,
@@ -157,8 +174,9 @@ static ssize_t show_ttarget(struct device *dev,
{
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct platform_data *pdata = dev_get_drvdata(dev);
+ struct temp_data *tdata = get_temp_data(pdata, attr->index);

- return sprintf(buf, "%d\n", pdata->core_data[attr->index]->ttarget);
+ return sprintf(buf, "%d\n", tdata->ttarget);
}

static ssize_t show_temp(struct device *dev,
@@ -167,7 +185,7 @@ static ssize_t show_temp(struct device *dev,
u32 eax, edx;
struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
struct platform_data *pdata = dev_get_drvdata(dev);
- struct temp_data *tdata = pdata->core_data[attr->index];
+ struct temp_data *tdata = get_temp_data(pdata, attr->index);

mutex_lock(&tdata->update_lock);

@@ -468,7 +486,7 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
return tdata;
}

-static int create_core_data(struct platform_device *pdev, unsigned int cpu,
+static int create_temp_data(struct platform_device *pdev, unsigned int cpu,
int pkg_flag)
{
struct temp_data *tdata;
@@ -483,10 +501,7 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
* The attr number is always core id + 2
* The Pkgtemp will always show up as temp1_*, if available
*/
- attr_no = pkg_flag ? 1 : TO_ATTR_NO(cpu);
-
- if (attr_no > MAX_CORE_DATA - 1)
- return -ERANGE;
+ attr_no = pkg_flag ? PACKAGE_ID : TO_ATTR_NO(cpu);

/*
* Provide a single set of attributes for all HT siblings of a core
@@ -495,7 +510,8 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
* Skip if a HT sibling of this core is already registered.
* This is not an error.
*/
- if (pdata->core_data[attr_no] != NULL)
+ tdata = get_temp_data(pdata, attr_no);
+ if (tdata)
return 0;

tdata = init_temp_data(cpu, pkg_flag);
@@ -525,16 +541,28 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
}
}

- pdata->core_data[attr_no] = tdata;
+ tdata->id = attr_no;
+
+ get_online_cpus();
+ mutex_lock(&pdata->temp_data_lock);
+ list_add(&tdata->list, &pdata->temp_data_list);
+ mutex_unlock(&pdata->temp_data_lock);
+ put_online_cpus();

/* Create sysfs interfaces */
err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
if (err)
- goto exit_free;
+ goto exit_del;

return 0;
+
+exit_del:
+ get_online_cpus();
+ mutex_lock(&pdata->temp_data_lock);
+ list_del(&tdata->list);
+ mutex_unlock(&pdata->temp_data_lock);
+ put_online_cpus();
exit_free:
- pdata->core_data[attr_no] = NULL;
kfree(tdata);
return err;
}
@@ -547,21 +575,21 @@ static void coretemp_add_core(unsigned int cpu, int pkg_flag)
if (!pdev)
return;

- err = create_core_data(pdev, cpu, pkg_flag);
+ err = create_temp_data(pdev, cpu, pkg_flag);
if (err)
dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
}

static void coretemp_remove_core(struct platform_data *pdata,
- int indx)
+ struct temp_data *tdata)
{
- struct temp_data *tdata = pdata->core_data[indx];
-
/* Remove the sysfs attributes */
sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);

- kfree(pdata->core_data[indx]);
- pdata->core_data[indx] = NULL;
+ mutex_lock(&pdata->temp_data_lock);
+ list_del(&tdata->list);
+ kfree(tdata);
+ mutex_unlock(&pdata->temp_data_lock);
}

static int coretemp_probe(struct platform_device *pdev)
@@ -575,6 +603,8 @@ static int coretemp_probe(struct platform_device *pdev)
return -ENOMEM;

pdata->phys_proc_id = pdev->id;
+ INIT_LIST_HEAD(&pdata->temp_data_list);
+ mutex_init(&pdata->temp_data_lock);
platform_set_drvdata(pdev, pdata);

pdata->hwmon_dev = devm_hwmon_device_register_with_groups(dev, DRVNAME,
@@ -585,11 +615,12 @@ static int coretemp_probe(struct platform_device *pdev)
static int coretemp_remove(struct platform_device *pdev)
{
struct platform_data *pdata = platform_get_drvdata(pdev);
- int i;
+ struct temp_data *cur, *tmp;

- for (i = MAX_CORE_DATA - 1; i >= 0; --i)
- if (pdata->core_data[i])
- coretemp_remove_core(pdata, i);
+ get_online_cpus();
+ list_for_each_entry_safe(cur, tmp, &pdata->temp_data_list, list)
+ coretemp_remove_core(pdata, cur);
+ put_online_cpus();

return 0;
}
@@ -664,15 +695,15 @@ static void coretemp_device_remove(unsigned int cpu)

static bool is_any_core_online(struct platform_data *pdata)
{
- int i;
+ struct temp_data *tdata;

- /* Find online cores, except pkgtemp data */
- for (i = MAX_CORE_DATA - 1; i >= 0; --i) {
- if (pdata->core_data[i] &&
- !pdata->core_data[i]->is_pkg_data) {
+ mutex_lock(&pdata->temp_data_lock);
+ list_for_each_entry(tdata, &pdata->temp_data_list, list)
+ if (tdata->id != PACKAGE_ID) {
+ mutex_unlock(&pdata->temp_data_lock);
return true;
}
- }
+ mutex_unlock(&pdata->temp_data_lock);
return false;
}

@@ -720,9 +751,10 @@ static void get_core_online(unsigned int cpu)

static void put_core_offline(unsigned int cpu)
{
- int i, indx;
+ int i, attr_no;
struct platform_data *pdata;
struct platform_device *pdev = coretemp_get_pdev(cpu);
+ struct temp_data *tdata;

/* If the physical CPU device does not exist, just return */
if (!pdev)
@@ -730,15 +762,13 @@ static void put_core_offline(unsigned int cpu)

pdata = platform_get_drvdata(pdev);

- indx = TO_ATTR_NO(cpu);
-
- /* The core id is too big, just return */
- if (indx > MAX_CORE_DATA - 1)
- return;
-
- if (pdata->core_data[indx] && pdata->core_data[indx]->cpu == cpu)
- coretemp_remove_core(pdata, indx);
+ attr_no = TO_ATTR_NO(cpu);

+ get_online_cpus();
+ tdata = get_temp_data(pdata, attr_no);
+ if (tdata && tdata->cpu == cpu)
+ coretemp_remove_core(pdata, tdata);
+ put_online_cpus();
/*
* If a HT sibling of a core is taken offline, but another HT sibling
* of the same core is still online, register the alternate sibling.
--
1.7.1

--------------------------------------------------------------------

Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek
przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by
others is strictly prohibited.

2015-07-15 21:07:39

by Jean Delvare

[permalink] [raw]

Subject: Re: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

On Wed, 15 Jul 2015 18:04:13 +0200, Lukasz Odzioba wrote:
> Removes the limits of supported CPU cores and max core ID.

I see the benefit of removing the arbitrary limit, but why use a list
instead of a dynamically allocated array? This is turning a O(1)
algorithm into a O(n) algorithm. I know n isn't too large in this case
but I still consider it bad practice if it can be avoided.

Do you expect core IDs to become arbitrarily large? Significantly
larger than the core count?

You need a better patch description for sure. Saying what the patch
does isn't sufficient, you need to explain why this is needed and why
this is the right way to do it.

--
Jean Delvare
SUSE L3 Support

2015-07-16 13:17:25

by Lukasz Odzioba

[permalink] [raw]

Subject: RE: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

On Wednesday, July 15, 2015 11:08 PM Jean Delvare wrote:
> I see the benefit of removing the arbitrary limit, but why use a list
> instead of a dynamically allocated array? This is turning a O(1)
> algorithm into a O(n) algorithm. I know n isn't too large in this case
> but I still consider it bad practice if it can be avoided.

This patch tries to solve two problems which are present on current hardware:
-cpus with more than 32 cores
-core id is greater than NUM_REAL_CORES

In both cases it ends up with the following error in dmesg:
coretemp coretemp.0: Adding Core XXX failed

We could just bump NUM_REAL_CORES to 128 like we did in 2012 and call it
solved, but the problem will come back eventually and it is waste of
memory for cpus with handful of cores.

If there is way to obtain maximum core id during module initialization,
then we can allocate array and keep O(1) access. If we can't figure out
maximum core id then we can increase size of the array when new cores are
added. The problem with this is that core id enumeration can be sparse so
again we have waste of memory.

> Do you expect core IDs to become arbitrarily large?
> Significantly larger than the core count?

The question is what does significantly mean.
According to Documentation/cputopology.txt:
---
2) /sys/devices/system/cpu/cpuX/topology/core_id:

the CPU core ID of cpuX. Typically it is the hardware platform's
identifier (rather than the kernel's). The actual value is
architecture and platform dependent.
---

Even now we can have one core present with id like 60 (think of Xeon Phi).
I haven't seen in the wild insane core ids like thousands, but I don't see
a reason why we shouldn't handle it in a proper manner.

Current patch does not use more memory than it is needed, but the pitfall is
that it increased access complexity from O(1) to O(n). We could slide another
patch on top of this one to reduce access complexity from O(n) to O(logn)
by using i.e. radix tree. I preferred to send functional fix in the first
place, and then work on optimization if there is a concern about it.
Forgive me if it is not appropriate.

> You need a better patch description for sure. Saying what the patch does
> isn't sufficient, you need to explain why this is needed and why this is
> the right way to do it.

Thanks, I'll address that in the next revision, but first we need to figure out
the way to go. If this patch is not appropriate, then it is a follow up to
start discussion on how to fix those two problems I mentioned.

Thanks,
Lukas
--------------------------------------------------------------------

Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | Kapital zakladowy 200.000 PLN.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i moze zawierac informacje poufne. W razie przypadkowego otrzymania tej wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; jakiekolwiek
przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). If you are not the intended recipient, please contact the sender and delete all copies; any review or distribution by
others is strictly prohibited.

2015-07-17 16:55:08

by Guenter Roeck

[permalink] [raw]

Subject: Re: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

On 07/16/2015 06:17 AM, Odzioba, Lukasz wrote:
> On Wednesday, July 15, 2015 11:08 PM Jean Delvare wrote:
>> I see the benefit of removing the arbitrary limit, but why use a list
>> instead of a dynamically allocated array? This is turning a O(1)
>> algorithm into a O(n) algorithm. I know n isn't too large in this case
>> but I still consider it bad practice if it can be avoided.
>
> This patch tries to solve two problems which are present on current hardware:
> -cpus with more than 32 cores
> -core id is greater than NUM_REAL_CORES
>
> In both cases it ends up with the following error in dmesg:
> coretemp coretemp.0: Adding Core XXX failed
>
> We could just bump NUM_REAL_CORES to 128 like we did in 2012 and call it
> solved, but the problem will come back eventually and it is waste of
> memory for cpus with handful of cores.
>
> If there is way to obtain maximum core id during module initialization,
> then we can allocate array and keep O(1) access. If we can't figure out
> maximum core id then we can increase size of the array when new cores are
> added. The problem with this is that core id enumeration can be sparse so
> again we have waste of memory.
>
>> Do you expect core IDs to become arbitrarily large?
>> Significantly larger than the core count?
>
> The question is what does significantly mean.
> According to Documentation/cputopology.txt:
> ---
> 2) /sys/devices/system/cpu/cpuX/topology/core_id:
>
> the CPU core ID of cpuX. Typically it is the hardware platform's
> identifier (rather than the kernel's). The actual value is
> architecture and platform dependent.
> ---
>
> Even now we can have one core present with id like 60 (think of Xeon Phi).
> I haven't seen in the wild insane core ids like thousands, but I don't see
> a reason why we shouldn't handle it in a proper manner.
>
> Current patch does not use more memory than it is needed, but the pitfall is
> that it increased access complexity from O(1) to O(n). We could slide another
> patch on top of this one to reduce access complexity from O(n) to O(logn)
> by using i.e. radix tree. I preferred to send functional fix in the first
> place, and then work on optimization if there is a concern about it.
> Forgive me if it is not appropriate.
>

You don't really explain why your approach would be better than allocating
an array of pointers to struct temp_data and increasing its size using
krealloc if needed.

Guenter

2015-07-17 17:28:26

by Lukasz Odzioba

[permalink] [raw]

Subject: RE: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

From: Guenter Roeck [mailto:[email protected]]
On Friday, July 17, 2015 6:55 PM Guenter Roeck wrote:

> You don't really explain why your approach would be better than
> allocating an array of pointers to struct temp_data and increasing
> its size using krealloc if needed.

Let's consider two cases of such implementation:
a) we use array of pointers with O(n) access algorithm
b) we use array of pointers with O(1) access algorithm

In both cases an array will have greater memory footprint unless
we implement reallocation ourselves when cpus are disabled which will
make code harder to maintain.
Case b) does not handle huge core ids and sparse enumeration well -
it is still to discuss whether we really need it since there is no
such hardware yet.

I am not saying that my solution is the best of possible ones.
I am saying that "the best" can vary depending on which criteria do you
choose from (time, memory, clean code...). Some may say that O(n) is
fine unless we have thousands of cores and this code is not on hot path,
others may be concerned more about memory on small/old devices.
I don't see holy grail here, If you see one please let me know.

If you think that we don't have to care so much about memory,
then I can create another patch which uses array instead of list.

Thanks,
Lukas

2015-07-17 18:01:57

by Guenter Roeck

[permalink] [raw]

Subject: Re: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

On 07/17/2015 10:28 AM, Odzioba, Lukasz wrote:
> From: Guenter Roeck [mailto:[email protected]]
> On Friday, July 17, 2015 6:55 PM Guenter Roeck wrote:
>
>> You don't really explain why your approach would be better than
>> allocating an array of pointers to struct temp_data and increasing
>> its size using krealloc if needed.
>
> Let's consider two cases of such implementation:
> a) we use array of pointers with O(n) access algorithm
> b) we use array of pointers with O(1) access algorithm
>
> In both cases an array will have greater memory footprint unless
> we implement reallocation ourselves when cpus are disabled which will
> make code harder to maintain.

Please explain why krealloc() won't work, why using krealloc(()
would result in a larger memory footprint than using lists,
and why disabling CPUs would require any action in the first place.

> Case b) does not handle huge core ids and sparse enumeration well -
> it is still to discuss whether we really need it since there is no
> such hardware yet.
>
"yet" is a key term here. Presumably you have insider information.
Unless you can share this information, I don't see the point of
replacing an O(1) algorithm with O(n), especially since there
is a relatively simple alternative available to support more CPUs.

> I am not saying that my solution is the best of possible ones.
> I am saying that "the best" can vary depending on which criteria do you
> choose from (time, memory, clean code...). Some may say that O(n) is
> fine unless we have thousands of cores and this code is not on hot path,
> others may be concerned more about memory on small/old devices.
> I don't see holy grail here, If you see one please let me know.
>

Unless you clarify that Intel will introduce CPU IDs which can not be used
as array index because they are too sparse, I don't really see how the list
solution would consume less memory than an array of pointers, even if the
array is somewhat sparse. After all, a list consumes at least two pointers
per entry.

Thanks,
Guenter

2015-07-17 19:11:09

by Jean Delvare

[permalink] [raw]

Subject: Re: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

On Fri, 17 Jul 2015 17:28:20 +0000, Odzioba, Lukasz wrote:
> From: Guenter Roeck [mailto:[email protected]]
> On Friday, July 17, 2015 6:55 PM Guenter Roeck wrote:
>
> > You don't really explain why your approach would be better than
> > allocating an array of pointers to struct temp_data and increasing
> > its size using krealloc if needed.
>
> Let's consider two cases of such implementation:
> a) we use array of pointers with O(n) access algorithm
> b) we use array of pointers with O(1) access algorithm
>
> In both cases an array will have greater memory footprint unless
> we implement reallocation ourselves when cpus are disabled which will
> make code harder to maintain.

I see no reason to reallocate when CPUs are disabled. This is rare
enough that I very much doubt we care.

> Case b) does not handle huge core ids and sparse enumeration well -
> it is still to discuss whether we really need it since there is no
> such hardware yet.

If you don't have a use case, I see no reason to change anything.

If you have a use case, it would be nice to tell us what it is, so that
we can make better comments on your proposal.

> I am not saying that my solution is the best of possible ones.
> I am saying that "the best" can vary depending on which criteria do you
> choose from (time, memory, clean code...). Some may say that O(n) is
> fine unless we have thousands of cores and this code is not on hot path,
> others may be concerned more about memory on small/old devices.
> I don't see holy grail here, If you see one please let me know.

The problem is that the lookup algorithm is only one piece of the
puzzle. When a user runs "sensors" or any other monitoring tool, you'll
do the look-up once for each logical CPU, or even for every attribute
of every CPU. So we're not talking about n, we're talking about
5 * n^2. And that can happen every other second. So while you may not
call it a "hot path", this is still frequent enough so I am worried
about performance aka algorithmic complexity.

Switching from linear complexity to quadratic complexity "because n is
going to increase soon" is quite opposite to what I would expect.

> If you think that we don't have to care so much about memory,
> then I can create another patch which uses array instead of list.

I'm not so worried about memory. Did you actually check how many bytes
of memory were used per supported logical CPU?

We could just drop NUM_REAL_CORES and use CONFIG_NR_CPUS instead, I
would be fine with that. This lets people worried about memory
consumption control it.

--
Jean Delvare
SUSE L3 Support

2015-07-17 19:23:49

by Lukasz Odzioba

[permalink] [raw]

Subject: RE: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

On Friday, July 17, 2015 8:02 PM Guenter Roeck wrote:
> Please explain why krealloc() won't work, why using krealloc(() would
> result in a larger memory footprint than using lists, and why disabling
> CPUs would require any action in the first place.

It will work, but it can use more memory for cpus with many cores.
If you have just one core visible to the kernel with id 59
(i.e. the rest are disabled by hardware) out of 60-core cpu then you
have to allocate an array of 60 pointers instead of just one element of
the list. Of course you can say that for cpu with just one core list will
use 3x the memory needed by array and that's true. I see no point in
arguing which case is more important, let's move on.

> "yet" is a key term here. Presumably you have insider information.

No I don't have such information.

> Unless you can share this information, I don't see the point of replacing
> an O(1) algorithm with O(n), especially since there is a relatively simple
> alternative available to support more CPUs.

Ok I understand that, and somewhat agree with that.

> Unless you clarify that Intel will introduce CPU IDs which cannot be used
> as array index because they are too sparse, I don't really see how the list
> solution would consume less memory than an array of pointers, even if the
> array is somewhat sparse. After all, a list consumes at least two pointers
> per entry.

Ok maybe I should state that clearer, my bad. For my purposes removing limit
of supported cores or bumping it is all I need. Removing limit of maxid is
just nice to have - nothing really practical any soon I guess.

I based this patch on Kirill's which used lists and then you guys did not
say that this is a bad idea, so I tried to solve problems reported earlier
- deadlocks and race conditions and submitted another version.

Maybe if I would start from scratch I would not ran into all this.
In general it is good that you found the weak point of it.

Thanks,
Lukas

2015-07-17 19:36:40

by Guenter Roeck

[permalink] [raw]

Subject: Re: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

Hi Jean,

On 07/17/2015 12:11 PM, Jean Delvare wrote:

>
> We could just drop NUM_REAL_CORES and use CONFIG_NR_CPUS instead, I
> would be fine with that. This lets people worried about memory
> consumption control it.
>
Unfortunately this won't work because the CPU ID is non-linear;
an 8-core system may have a CPI ID larger than 7.

Guenter

2015-07-17 21:25:13

by Jean Delvare

[permalink] [raw]

Subject: Re: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

On Fri, 17 Jul 2015 12:36:34 -0700, Guenter Roeck wrote:
> Hi Jean,
>
> On 07/17/2015 12:11 PM, Jean Delvare wrote:
>
> >
> > We could just drop NUM_REAL_CORES and use CONFIG_NR_CPUS instead, I
> > would be fine with that. This lets people worried about memory
> > consumption control it.
>
> Unfortunately this won't work because the CPU ID is non-linear;
> an 8-core system may have a CPU ID larger than 7.

Oh right, I forgot about that. Brilliant hardware/firmware engineers...

Well that does not prevent us from using CONFIG_NR_CPUS, "just" the
code would need to be modified to remove the assumption that the array
index matches the logical CPU ID.

BTW I wonder how the rest of the kernel handles the situation.
CONFIG_NR_CPUS is used where relevant so there certainly _is_ a linear
numbering which is independent from the CPU ID.

--
Jean Delvare
SUSE L3 Support

2015-07-17 21:33:42

by Jean Delvare

[permalink] [raw]

Subject: Re: [PATCH] hwmon: coretemp: use list instead of fixed size array for temp data

Hi Lukasz,

On Fri, 17 Jul 2015 19:23:44 +0000, Odzioba, Lukasz wrote:
> On Friday, July 17, 2015 8:02 PM Guenter Roeck wrote:
> > Please explain why krealloc() won't work, why using krealloc(() would
> > result in a larger memory footprint than using lists, and why disabling
> > CPUs would require any action in the first place.
>
> It will work, but it can use more memory for cpus with many cores.
> If you have just one core visible to the kernel with id 59
> (i.e. the rest are disabled by hardware) out of 60-core cpu then you
> have to allocate an array of 60 pointers instead of just one element of

Arrays of pointers are cheap. You can fit 512 pointers in a single
memory page.

> the list. Of course you can say that for cpu with just one core list will
> use 3x the memory needed by array and that's true. I see no point in
> arguing which case is more important, let's move on.

I see the point in arguing: the example above is just silly and does
not match any real-world application. Nobody buys a 60-core CPU to run
it with a single core enabled.

When you have two or more alternative implementations possible,
thinking in terms of the most common cases is a key to make the right
decision. Thinking about servers with a lots of CPU cores versus
embedded devices with few cores and tight memory constraints, that is
useful. Making up corner cases is not.

--
Jean Delvare
SUSE L3 Support