2024-02-06 01:54:49

by Zhang, Rui

[permalink] [raw]
Subject: [PATCH] thermal/intel: Fix intel_tcc_get_temp() to support negative CPU temperature

CPU temperature can be negative in some cases. Thus the negative CPU
temperature should not be considered as a failure.

Fix intel_tcc_get_temp() and its users to support negative CPU
temperature.

Fixes: a3c1f066e1c5 ("thermal/intel: Introduce Intel TCC library")
Signed-off-by: Zhang Rui <[email protected]>
---
.../intel/int340x_thermal/processor_thermal_device.c | 8 ++++----
drivers/thermal/intel/intel_tcc.c | 12 ++++++------
drivers/thermal/intel/x86_pkg_temp_thermal.c | 8 ++++----
include/linux/intel_tcc.h | 2 +-
4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
index 649f67fdf345..d75fae7b7ed2 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
@@ -176,14 +176,14 @@ static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
int *temp)
{
int cpu;
- int curr_temp;
+ int curr_temp, ret;

*temp = 0;

for_each_online_cpu(cpu) {
- curr_temp = intel_tcc_get_temp(cpu, false);
- if (curr_temp < 0)
- return curr_temp;
+ ret = intel_tcc_get_temp(cpu, &curr_temp, false);
+ if (ret < 0)
+ return ret;
if (!*temp || curr_temp > *temp)
*temp = curr_temp;
}
diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
index 2e5c741c41ca..5e8b7f34b395 100644
--- a/drivers/thermal/intel/intel_tcc.c
+++ b/drivers/thermal/intel/intel_tcc.c
@@ -103,18 +103,19 @@ EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
/**
* intel_tcc_get_temp() - returns the current temperature
* @cpu: cpu that the MSR should be run on, nagative value means any cpu.
+ * @temp: pointer to the memory for saving cpu temperature.
* @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
*
* Get the current temperature returned by the CPU core/package level
* thermal sensor, in degrees C.
*
- * Return: Temperature in degrees C on success, negative error code otherwise.
+ * Return: 0 on success, negative error code otherwise.
*/
-int intel_tcc_get_temp(int cpu, bool pkg)
+int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
{
u32 low, high;
u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
- int tjmax, temp, err;
+ int tjmax, err;

tjmax = intel_tcc_get_tjmax(cpu);
if (tjmax < 0)
@@ -131,9 +132,8 @@ int intel_tcc_get_temp(int cpu, bool pkg)
if (!(low & BIT(31)))
return -ENODATA;

- temp = tjmax - ((low >> 16) & 0x7f);
+ *temp = tjmax - ((low >> 16) & 0x7f);

- /* Do not allow negative CPU temperature */
- return temp >= 0 ? temp : -ENODATA;
+ return 0;
}
EXPORT_SYMBOL_NS_GPL(intel_tcc_get_temp, INTEL_TCC);
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 11a7f8108bbb..61c3d450ee60 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -108,11 +108,11 @@ static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
{
struct zone_device *zonedev = thermal_zone_device_priv(tzd);
- int val;
+ int val, ret;

- val = intel_tcc_get_temp(zonedev->cpu, true);
- if (val < 0)
- return val;
+ ret = intel_tcc_get_temp(zonedev->cpu, &val, true);
+ if (ret < 0)
+ return ret;

*temp = val * 1000;
pr_debug("sys_get_curr_temp %d\n", *temp);
diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
index f422612c28d6..8ff8eabb4a98 100644
--- a/include/linux/intel_tcc.h
+++ b/include/linux/intel_tcc.h
@@ -13,6 +13,6 @@
int intel_tcc_get_tjmax(int cpu);
int intel_tcc_get_offset(int cpu);
int intel_tcc_set_offset(int cpu, int offset);
-int intel_tcc_get_temp(int cpu, bool pkg);
+int intel_tcc_get_temp(int cpu, int *temp, bool pkg);

#endif /* __INTEL_TCC_H__ */
--
2.34.1



2024-02-06 19:34:47

by Stanislaw Gruszka

[permalink] [raw]
Subject: Re: [PATCH] thermal/intel: Fix intel_tcc_get_temp() to support negative CPU temperature

On Tue, Feb 06, 2024 at 09:54:09AM +0800, Zhang Rui wrote:
> CPU temperature can be negative in some cases. Thus the negative CPU
> temperature should not be considered as a failure.
>
> Fix intel_tcc_get_temp() and its users to support negative CPU
> temperature.
>
> Fixes: a3c1f066e1c5 ("thermal/intel: Introduce Intel TCC library")
> Signed-off-by: Zhang Rui <[email protected]>
Reviewed-by: Stanislaw Gruszka <[email protected]>

> ---
> .../intel/int340x_thermal/processor_thermal_device.c | 8 ++++----
> drivers/thermal/intel/intel_tcc.c | 12 ++++++------
> drivers/thermal/intel/x86_pkg_temp_thermal.c | 8 ++++----
> include/linux/intel_tcc.h | 2 +-
> 4 files changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> index 649f67fdf345..d75fae7b7ed2 100644
> --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> @@ -176,14 +176,14 @@ static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
> int *temp)
> {
> int cpu;
> - int curr_temp;
> + int curr_temp, ret;
>
> *temp = 0;
>
> for_each_online_cpu(cpu) {
> - curr_temp = intel_tcc_get_temp(cpu, false);
> - if (curr_temp < 0)
> - return curr_temp;
> + ret = intel_tcc_get_temp(cpu, &curr_temp, false);
> + if (ret < 0)
> + return ret;
> if (!*temp || curr_temp > *temp)
> *temp = curr_temp;
> }
> diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
> index 2e5c741c41ca..5e8b7f34b395 100644
> --- a/drivers/thermal/intel/intel_tcc.c
> +++ b/drivers/thermal/intel/intel_tcc.c
> @@ -103,18 +103,19 @@ EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
> /**
> * intel_tcc_get_temp() - returns the current temperature
> * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
> + * @temp: pointer to the memory for saving cpu temperature.
> * @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
> *
> * Get the current temperature returned by the CPU core/package level
> * thermal sensor, in degrees C.
> *
> - * Return: Temperature in degrees C on success, negative error code otherwise.
> + * Return: 0 on success, negative error code otherwise.
> */
> -int intel_tcc_get_temp(int cpu, bool pkg)
> +int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
> {
> u32 low, high;
> u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
> - int tjmax, temp, err;
> + int tjmax, err;
>
> tjmax = intel_tcc_get_tjmax(cpu);
> if (tjmax < 0)
> @@ -131,9 +132,8 @@ int intel_tcc_get_temp(int cpu, bool pkg)
> if (!(low & BIT(31)))
> return -ENODATA;
>
> - temp = tjmax - ((low >> 16) & 0x7f);
> + *temp = tjmax - ((low >> 16) & 0x7f);
>
> - /* Do not allow negative CPU temperature */
> - return temp >= 0 ? temp : -ENODATA;
> + return 0;
> }
> EXPORT_SYMBOL_NS_GPL(intel_tcc_get_temp, INTEL_TCC);
> diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> index 11a7f8108bbb..61c3d450ee60 100644
> --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
> +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> @@ -108,11 +108,11 @@ static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
> static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
> {
> struct zone_device *zonedev = thermal_zone_device_priv(tzd);
> - int val;
> + int val, ret;
>
> - val = intel_tcc_get_temp(zonedev->cpu, true);
> - if (val < 0)
> - return val;
> + ret = intel_tcc_get_temp(zonedev->cpu, &val, true);
> + if (ret < 0)
> + return ret;
>
> *temp = val * 1000;
> pr_debug("sys_get_curr_temp %d\n", *temp);
> diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
> index f422612c28d6..8ff8eabb4a98 100644
> --- a/include/linux/intel_tcc.h
> +++ b/include/linux/intel_tcc.h
> @@ -13,6 +13,6 @@
> int intel_tcc_get_tjmax(int cpu);
> int intel_tcc_get_offset(int cpu);
> int intel_tcc_set_offset(int cpu, int offset);
> -int intel_tcc_get_temp(int cpu, bool pkg);
> +int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
>
> #endif /* __INTEL_TCC_H__ */
> --
> 2.34.1
>
>

2024-02-12 17:43:39

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] thermal/intel: Fix intel_tcc_get_temp() to support negative CPU temperature

On Tue, Feb 6, 2024 at 4:53 PM Stanislaw Gruszka
<[email protected]> wrote:
>
> On Tue, Feb 06, 2024 at 09:54:09AM +0800, Zhang Rui wrote:
> > CPU temperature can be negative in some cases. Thus the negative CPU
> > temperature should not be considered as a failure.
> >
> > Fix intel_tcc_get_temp() and its users to support negative CPU
> > temperature.
> >
> > Fixes: a3c1f066e1c5 ("thermal/intel: Introduce Intel TCC library")
> > Signed-off-by: Zhang Rui <[email protected]>
> Reviewed-by: Stanislaw Gruszka <[email protected]>
>
> > ---
> > .../intel/int340x_thermal/processor_thermal_device.c | 8 ++++----
> > drivers/thermal/intel/intel_tcc.c | 12 ++++++------
> > drivers/thermal/intel/x86_pkg_temp_thermal.c | 8 ++++----
> > include/linux/intel_tcc.h | 2 +-
> > 4 files changed, 15 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> > index 649f67fdf345..d75fae7b7ed2 100644
> > --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> > +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
> > @@ -176,14 +176,14 @@ static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
> > int *temp)
> > {
> > int cpu;
> > - int curr_temp;
> > + int curr_temp, ret;
> >
> > *temp = 0;
> >
> > for_each_online_cpu(cpu) {
> > - curr_temp = intel_tcc_get_temp(cpu, false);
> > - if (curr_temp < 0)
> > - return curr_temp;
> > + ret = intel_tcc_get_temp(cpu, &curr_temp, false);
> > + if (ret < 0)
> > + return ret;
> > if (!*temp || curr_temp > *temp)
> > *temp = curr_temp;
> > }
> > diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
> > index 2e5c741c41ca..5e8b7f34b395 100644
> > --- a/drivers/thermal/intel/intel_tcc.c
> > +++ b/drivers/thermal/intel/intel_tcc.c
> > @@ -103,18 +103,19 @@ EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
> > /**
> > * intel_tcc_get_temp() - returns the current temperature
> > * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
> > + * @temp: pointer to the memory for saving cpu temperature.
> > * @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
> > *
> > * Get the current temperature returned by the CPU core/package level
> > * thermal sensor, in degrees C.
> > *
> > - * Return: Temperature in degrees C on success, negative error code otherwise.
> > + * Return: 0 on success, negative error code otherwise.
> > */
> > -int intel_tcc_get_temp(int cpu, bool pkg)
> > +int intel_tcc_get_temp(int cpu, int *temp, bool pkg)
> > {
> > u32 low, high;
> > u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
> > - int tjmax, temp, err;
> > + int tjmax, err;
> >
> > tjmax = intel_tcc_get_tjmax(cpu);
> > if (tjmax < 0)
> > @@ -131,9 +132,8 @@ int intel_tcc_get_temp(int cpu, bool pkg)
> > if (!(low & BIT(31)))
> > return -ENODATA;
> >
> > - temp = tjmax - ((low >> 16) & 0x7f);
> > + *temp = tjmax - ((low >> 16) & 0x7f);
> >
> > - /* Do not allow negative CPU temperature */
> > - return temp >= 0 ? temp : -ENODATA;
> > + return 0;
> > }
> > EXPORT_SYMBOL_NS_GPL(intel_tcc_get_temp, INTEL_TCC);
> > diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> > index 11a7f8108bbb..61c3d450ee60 100644
> > --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
> > +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
> > @@ -108,11 +108,11 @@ static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
> > static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
> > {
> > struct zone_device *zonedev = thermal_zone_device_priv(tzd);
> > - int val;
> > + int val, ret;
> >
> > - val = intel_tcc_get_temp(zonedev->cpu, true);
> > - if (val < 0)
> > - return val;
> > + ret = intel_tcc_get_temp(zonedev->cpu, &val, true);
> > + if (ret < 0)
> > + return ret;
> >
> > *temp = val * 1000;
> > pr_debug("sys_get_curr_temp %d\n", *temp);
> > diff --git a/include/linux/intel_tcc.h b/include/linux/intel_tcc.h
> > index f422612c28d6..8ff8eabb4a98 100644
> > --- a/include/linux/intel_tcc.h
> > +++ b/include/linux/intel_tcc.h
> > @@ -13,6 +13,6 @@
> > int intel_tcc_get_tjmax(int cpu);
> > int intel_tcc_get_offset(int cpu);
> > int intel_tcc_set_offset(int cpu, int offset);
> > -int intel_tcc_get_temp(int cpu, bool pkg);
> > +int intel_tcc_get_temp(int cpu, int *temp, bool pkg);
> >
> > #endif /* __INTEL_TCC_H__ */
> > --

Applied as 6.9 material, thanks!