The coretemp driver uses rdmsr_on_cpu calls to read
MSR_IA32_PACKAGE_THERM_STATUS/MSR_IA32_THERM_STATUS registers,
which contain information about current core temperature.
For certain low latency applications, the RDMSR interruption exceeds
the applications requirements.
So disable reading of crit_alarm and temp files via /sys, in case
CPU isolation is enabled.
Temperature information from the housekeeping cores should be
sufficient to infer die temperature.
Signed-off-by: Marcelo Tosatti <[email protected]>
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 9bee4d33fbdf..30a35f4130d5 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -27,6 +27,7 @@
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpu_device_id.h>
+#include <linux/sched/isolation.h>
#define DRVNAME "coretemp"
@@ -121,6 +122,10 @@ static ssize_t show_crit_alarm(struct device *dev,
struct platform_data *pdata = dev_get_drvdata(dev);
struct temp_data *tdata = pdata->core_data[attr->index];
+
+ if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
+ return -EINVAL;
+
mutex_lock(&tdata->update_lock);
rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
mutex_unlock(&tdata->update_lock);
@@ -158,6 +163,8 @@ static ssize_t show_temp(struct device *dev,
/* Check whether the time interval has elapsed */
if (!tdata->valid || time_after(jiffies, tdata->last_updated + HZ)) {
+ if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
+ return -EINVAL;
rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
/*
* Ignore the valid bit. In all observed cases the register
On 12/15/22 04:42, Marcelo Tosatti wrote:
>
> The coretemp driver uses rdmsr_on_cpu calls to read
> MSR_IA32_PACKAGE_THERM_STATUS/MSR_IA32_THERM_STATUS registers,
> which contain information about current core temperature.
>
> For certain low latency applications, the RDMSR interruption exceeds
> the applications requirements.
>
> So disable reading of crit_alarm and temp files via /sys, in case
> CPU isolation is enabled.
>
That isn't really what the code is doing. It doesn't disable reading
the attributes, it returns an error when an attempt is made to read
them.
> Temperature information from the housekeeping cores should be
> sufficient to infer die temperature.
>
> Signed-off-by: Marcelo Tosatti <[email protected]>
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index 9bee4d33fbdf..30a35f4130d5 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -27,6 +27,7 @@
> #include <asm/msr.h>
> #include <asm/processor.h>
> #include <asm/cpu_device_id.h>
> +#include <linux/sched/isolation.h>
>
> #define DRVNAME "coretemp"
>
> @@ -121,6 +122,10 @@ static ssize_t show_crit_alarm(struct device *dev,
> struct platform_data *pdata = dev_get_drvdata(dev);
> struct temp_data *tdata = pdata->core_data[attr->index];
>
> +
> + if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
> + return -EINVAL;
Littering the output of the "sensors" command with errors is most
definitely wrong and not acceptable. On top of that, the user didn't
do anything wrong, so -EINVAL ("Invalid Argument") is definitely
the wrong error. Maybe return -ENODATA, or if the condition is
static just don't instantiate the attribute for the affected CPUs
to start with. Also, this warrants a comment in the code and an
explanation in Documentation/hwmon/coretemp.rst.
Guenter
The coretemp driver uses rdmsr_on_cpu calls to read
MSR_IA32_PACKAGE_THERM_STATUS/MSR_IA32_THERM_STATUS registers,
which contain information about current core temperature.
For certain low latency applications, the RDMSR interruption exceeds
the applications requirements.
So disallow reading of crit_alarm and temp files via /sys, returning
-EINVAL, in case CPU isolation is enabled.
Temperature information from the housekeeping cores should be
sufficient to infer die temperature.
Signed-off-by: Marcelo Tosatti <[email protected]>
---
v2: improve changelog to mention that an error is returned,
and sysfs file is not disabled (Guenter Roeck)
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 9bee4d33fbdf..30a35f4130d5 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -27,6 +27,7 @@
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpu_device_id.h>
+#include <linux/sched/isolation.h>
#define DRVNAME "coretemp"
@@ -121,6 +122,10 @@ static ssize_t show_crit_alarm(struct device *dev,
struct platform_data *pdata = dev_get_drvdata(dev);
struct temp_data *tdata = pdata->core_data[attr->index];
+
+ if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
+ return -EINVAL;
+
mutex_lock(&tdata->update_lock);
rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
mutex_unlock(&tdata->update_lock);
@@ -158,6 +163,8 @@ static ssize_t show_temp(struct device *dev,
/* Check whether the time interval has elapsed */
if (!tdata->valid || time_after(jiffies, tdata->last_updated + HZ)) {
+ if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
+ return -EINVAL;
rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
/*
* Ignore the valid bit. In all observed cases the register
On 12/16/22 06:07, Marcelo Tosatti wrote:
>
> The coretemp driver uses rdmsr_on_cpu calls to read
> MSR_IA32_PACKAGE_THERM_STATUS/MSR_IA32_THERM_STATUS registers,
> which contain information about current core temperature.
>
> For certain low latency applications, the RDMSR interruption exceeds
> the applications requirements.
>
> So disallow reading of crit_alarm and temp files via /sys, returning
> -EINVAL, in case CPU isolation is enabled.
>
> Temperature information from the housekeeping cores should be
> sufficient to infer die temperature.
>
> Signed-off-by: Marcelo Tosatti <[email protected]>
>
> ---
> v2: improve changelog to mention that an error is returned,
> and sysfs file is not disabled (Guenter Roeck)
>
You did not address my feedback. I requested a code change.
Returning -EINVAL is unacceptable, and a solution not creating
the sysfs attributes to start with would be preferred.
Guenter
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index 9bee4d33fbdf..30a35f4130d5 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -27,6 +27,7 @@
> #include <asm/msr.h>
> #include <asm/processor.h>
> #include <asm/cpu_device_id.h>
> +#include <linux/sched/isolation.h>
>
> #define DRVNAME "coretemp"
>
> @@ -121,6 +122,10 @@ static ssize_t show_crit_alarm(struct device *dev,
> struct platform_data *pdata = dev_get_drvdata(dev);
> struct temp_data *tdata = pdata->core_data[attr->index];
>
> +
> + if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
> + return -EINVAL;
> +
> mutex_lock(&tdata->update_lock);
> rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
> mutex_unlock(&tdata->update_lock);
> @@ -158,6 +163,8 @@ static ssize_t show_temp(struct device *dev,
>
> /* Check whether the time interval has elapsed */
> if (!tdata->valid || time_after(jiffies, tdata->last_updated + HZ)) {
> + if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
> + return -EINVAL;
> rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
> /*
> * Ignore the valid bit. In all observed cases the register
>
>
>
>
The coretemp driver uses rdmsr_on_cpu calls to read
MSR_IA32_PACKAGE_THERM_STATUS/MSR_IA32_THERM_STATUS registers,
which contain information about current core temperature.
For certain low latency applications, the RDMSR interruption exceeds
the applications requirements.
So do not create core files in sysfs, for CPUs which have
isolation and nohz_full enabled.
Temperature information from the housekeeping cores should be
sufficient to infer die temperature.
Signed-off-by: Marcelo Tosatti <[email protected]>
---
v3: do not create sysfs files for isolated CPUs (Guenter Roeck)
v2: improve changelog to mention that an error is returned,
and sysfs file is not disabled (Guenter Roeck)
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 9bee4d33fbdf..619dfde7a712 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -27,6 +27,7 @@
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/cpu_device_id.h>
+#include <linux/sched/isolation.h>
#define DRVNAME "coretemp"
@@ -458,6 +459,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
u32 eax, edx;
int err, index, attr_no;
+ if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
+ return 0;
+
/*
* Find attr number for sysfs:
* We map the attr number to core id of the CPU
Hi Marcelo,
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Marcelo-Tosatti/hwmon-coretemp-avoid-RDMSR-interruptions-to-isolated-CPUs/20221215-204904
patch link: https://lore.kernel.org/r/Y5sWMEG0xCl9bgEi%40tpad
patch subject: [PATCH] hwmon: coretemp: avoid RDMSR interruptions to isolated CPUs
config: i386-randconfig-m021
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
| Reported-by: Dan Carpenter <[email protected]>
smatch warnings:
drivers/hwmon/coretemp.c:181 show_temp() warn: inconsistent returns '&tdata->update_lock'.
vim +181 drivers/hwmon/coretemp.c
199e0de7f5df31 Durgadoss R 2011-05-20 154 static ssize_t show_temp(struct device *dev,
199e0de7f5df31 Durgadoss R 2011-05-20 155 struct device_attribute *devattr, char *buf)
199e0de7f5df31 Durgadoss R 2011-05-20 156 {
bebe467823c0d8 Rudolf Marek 2007-05-08 157 u32 eax, edx;
199e0de7f5df31 Durgadoss R 2011-05-20 158 struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
199e0de7f5df31 Durgadoss R 2011-05-20 159 struct platform_data *pdata = dev_get_drvdata(dev);
199e0de7f5df31 Durgadoss R 2011-05-20 160 struct temp_data *tdata = pdata->core_data[attr->index];
199e0de7f5df31 Durgadoss R 2011-05-20 161
199e0de7f5df31 Durgadoss R 2011-05-20 162 mutex_lock(&tdata->update_lock);
bebe467823c0d8 Rudolf Marek 2007-05-08 163
199e0de7f5df31 Durgadoss R 2011-05-20 164 /* Check whether the time interval has elapsed */
199e0de7f5df31 Durgadoss R 2011-05-20 165 if (!tdata->valid || time_after(jiffies, tdata->last_updated + HZ)) {
e78264610cd902 Marcelo Tosatti 2022-12-15 166 if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
e78264610cd902 Marcelo Tosatti 2022-12-15 167 return -EINVAL;
mutex_unlock(&tdata->update_lock);
199e0de7f5df31 Durgadoss R 2011-05-20 168 rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
bf6ea084ebb54c Guenter Roeck 2013-11-20 169 /*
bf6ea084ebb54c Guenter Roeck 2013-11-20 170 * Ignore the valid bit. In all observed cases the register
bf6ea084ebb54c Guenter Roeck 2013-11-20 171 * value is either low or zero if the valid bit is 0.
bf6ea084ebb54c Guenter Roeck 2013-11-20 172 * Return it instead of reporting an error which doesn't
bf6ea084ebb54c Guenter Roeck 2013-11-20 173 * really help at all.
bf6ea084ebb54c Guenter Roeck 2013-11-20 174 */
bf6ea084ebb54c Guenter Roeck 2013-11-20 175 tdata->temp = tdata->tjmax - ((eax >> 16) & 0x7f) * 1000;
952a11ca32a604 Paul Fertser 2021-09-24 176 tdata->valid = true;
199e0de7f5df31 Durgadoss R 2011-05-20 177 tdata->last_updated = jiffies;
bebe467823c0d8 Rudolf Marek 2007-05-08 178 }
bebe467823c0d8 Rudolf Marek 2007-05-08 179
199e0de7f5df31 Durgadoss R 2011-05-20 180 mutex_unlock(&tdata->update_lock);
bf6ea084ebb54c Guenter Roeck 2013-11-20 @181 return sprintf(buf, "%d\n", tdata->temp);
bebe467823c0d8 Rudolf Marek 2007-05-08 182 }
--
0-DAY CI Kernel Test Service
https://01.org/lkp
On Fri, Dec 23, 2022 at 01:48:14PM +0300, Dan Carpenter wrote:
> Hi Marcelo,
>
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Marcelo-Tosatti/hwmon-coretemp-avoid-RDMSR-interruptions-to-isolated-CPUs/20221215-204904
> patch link: https://lore.kernel.org/r/Y5sWMEG0xCl9bgEi%40tpad
> patch subject: [PATCH] hwmon: coretemp: avoid RDMSR interruptions to isolated CPUs
> config: i386-randconfig-m021
> compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
>
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <[email protected]>
> | Reported-by: Dan Carpenter <[email protected]>
>
> smatch warnings:
> drivers/hwmon/coretemp.c:181 show_temp() warn: inconsistent returns '&tdata->update_lock'.
>
> vim +181 drivers/hwmon/coretemp.c
>
> 199e0de7f5df31 Durgadoss R 2011-05-20 154 static ssize_t show_temp(struct device *dev,
> 199e0de7f5df31 Durgadoss R 2011-05-20 155 struct device_attribute *devattr, char *buf)
> 199e0de7f5df31 Durgadoss R 2011-05-20 156 {
> bebe467823c0d8 Rudolf Marek 2007-05-08 157 u32 eax, edx;
> 199e0de7f5df31 Durgadoss R 2011-05-20 158 struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
> 199e0de7f5df31 Durgadoss R 2011-05-20 159 struct platform_data *pdata = dev_get_drvdata(dev);
> 199e0de7f5df31 Durgadoss R 2011-05-20 160 struct temp_data *tdata = pdata->core_data[attr->index];
> 199e0de7f5df31 Durgadoss R 2011-05-20 161
> 199e0de7f5df31 Durgadoss R 2011-05-20 162 mutex_lock(&tdata->update_lock);
> bebe467823c0d8 Rudolf Marek 2007-05-08 163
> 199e0de7f5df31 Durgadoss R 2011-05-20 164 /* Check whether the time interval has elapsed */
> 199e0de7f5df31 Durgadoss R 2011-05-20 165 if (!tdata->valid || time_after(jiffies, tdata->last_updated + HZ)) {
> e78264610cd902 Marcelo Tosatti 2022-12-15 166 if (!housekeeping_cpu(tdata->cpu, HK_TYPE_MISC))
> e78264610cd902 Marcelo Tosatti 2022-12-15 167 return -EINVAL;
>
> mutex_unlock(&tdata->update_lock);
>
> 199e0de7f5df31 Durgadoss R 2011-05-20 168 rdmsr_on_cpu(tdata->cpu, tdata->status_reg, &eax, &edx);
> bf6ea084ebb54c Guenter Roeck 2013-11-20 169 /*
> bf6ea084ebb54c Guenter Roeck 2013-11-20 170 * Ignore the valid bit. In all observed cases the register
> bf6ea084ebb54c Guenter Roeck 2013-11-20 171 * value is either low or zero if the valid bit is 0.
> bf6ea084ebb54c Guenter Roeck 2013-11-20 172 * Return it instead of reporting an error which doesn't
> bf6ea084ebb54c Guenter Roeck 2013-11-20 173 * really help at all.
> bf6ea084ebb54c Guenter Roeck 2013-11-20 174 */
> bf6ea084ebb54c Guenter Roeck 2013-11-20 175 tdata->temp = tdata->tjmax - ((eax >> 16) & 0x7f) * 1000;
> 952a11ca32a604 Paul Fertser 2021-09-24 176 tdata->valid = true;
> 199e0de7f5df31 Durgadoss R 2011-05-20 177 tdata->last_updated = jiffies;
> bebe467823c0d8 Rudolf Marek 2007-05-08 178 }
> bebe467823c0d8 Rudolf Marek 2007-05-08 179
> 199e0de7f5df31 Durgadoss R 2011-05-20 180 mutex_unlock(&tdata->update_lock);
> bf6ea084ebb54c Guenter Roeck 2013-11-20 @181 return sprintf(buf, "%d\n", tdata->temp);
> bebe467823c0d8 Rudolf Marek 2007-05-08 182 }
>
> --
> 0-DAY CI Kernel Test Service
> https://01.org/lkp
Thanks,
v3 of the patch should not suffer from this issue.
On Fri, Dec 16, 2022 at 05:24:08PM -0300, Marcelo Tosatti wrote:
> The coretemp driver uses rdmsr_on_cpu calls to read
> MSR_IA32_PACKAGE_THERM_STATUS/MSR_IA32_THERM_STATUS registers,
> which contain information about current core temperature.
>
> For certain low latency applications, the RDMSR interruption exceeds
> the applications requirements.
>
> So do not create core files in sysfs, for CPUs which have
> isolation and nohz_full enabled.
>
> Temperature information from the housekeeping cores should be
> sufficient to infer die temperature.
>
> Signed-off-by: Marcelo Tosatti <[email protected]>
Applied to hwmon-next.
Thanks,
Guenter
> ---
> v3: do not create sysfs files for isolated CPUs (Guenter Roeck)
> v2: improve changelog to mention that an error is returned,
> and sysfs file is not disabled (Guenter Roeck)
>
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index 9bee4d33fbdf..619dfde7a712 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -27,6 +27,7 @@
> #include <asm/msr.h>
> #include <asm/processor.h>
> #include <asm/cpu_device_id.h>
> +#include <linux/sched/isolation.h>
>
> #define DRVNAME "coretemp"
>
> @@ -458,6 +459,9 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
> u32 eax, edx;
> int err, index, attr_no;
>
> + if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> + return 0;
> +
> /*
> * Find attr number for sysfs:
> * We map the attr number to core id of the CPU