Subject: [PATCH v4] drivers: thermal: clear all mitigation when thermal zone is disabled

Whenever a thermal zone is in trip violated state, there is a chance
that the same thermal zone mode can be disabled either via
thermal core API or via thermal zone sysfs. Once it is disabled,
the framework bails out any re-evaluation of thermal zone. It leads
to a case where if it is already in mitigation state, it will stay
the same state forever.

To avoid above mentioned issue, add support to bind/unbind
governor from thermal zone during thermal zone mode change request
and clear all existing throttling in governor unbind_from_tz()
callback.

Suggested-by: Daniel Lezcano <[email protected]>
Signed-off-by: Manaf Meethalavalappu Pallikunhi <[email protected]>
---
drivers/thermal/gov_power_allocator.c | 3 +++
drivers/thermal/gov_step_wise.c | 26 ++++++++++++++++++++++++++
drivers/thermal/thermal_core.c | 31 +++++++++++++++++++++++++++----
3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 13e3757..9ff0c5f 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -696,6 +696,9 @@ static void power_allocator_unbind(struct thermal_zone_device *tz)

dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);

+ tz->passive = 0;
+ allow_maximum_power(tz, true);
+
if (params->allocated_tzp) {
kfree(tz->tzp);
tz->tzp = NULL;
diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c
index 12acb12..2132c14 100644
--- a/drivers/thermal/gov_step_wise.c
+++ b/drivers/thermal/gov_step_wise.c
@@ -168,6 +168,31 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
}

/**
+ * step_wise_unbind() - unbind the step_wise governor to a thermal zone
+ * @tz: thermal zone to unbind it to
+ *
+ * Clear all previous throttling and reset passive counter.
+ *
+ */
+static void step_wise_unbind(struct thermal_zone_device *tz)
+{
+ struct thermal_instance *instance;
+
+ dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
+
+ mutex_lock(&tz->lock);
+ tz->passive = 0;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ instance->initialized = false;
+ instance->target = THERMAL_NO_TARGET;
+ mutex_lock(&instance->cdev->lock);
+ __thermal_cdev_update(instance->cdev);
+ mutex_unlock(&instance->cdev->lock);
+ }
+ mutex_unlock(&tz->lock);
+}
+
+/**
* step_wise_throttle - throttles devices associated with the given zone
* @tz: thermal_zone_device
* @trip: trip point index
@@ -196,6 +221,7 @@ static int step_wise_throttle(struct thermal_zone_device *tz, int trip)

static struct thermal_governor thermal_gov_step_wise = {
.name = "step_wise",
+ .unbind_from_tz = step_wise_unbind,
.throttle = step_wise_throttle,
};
THERMAL_GOVERNOR_DECLARE(thermal_gov_step_wise);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 1389174..9828eb3 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -274,6 +274,26 @@ static int __init thermal_register_governors(void)
return ret;
}

+static void thermal_governor_attach(struct thermal_zone_device *tz)
+{
+ mutex_lock(&thermal_governor_lock);
+ if (tz->governor && tz->governor->bind_to_tz) {
+ if (tz->governor->bind_to_tz(tz))
+ dev_err(&tz->device,
+ "governor %s failed to bind to thermal zone %s\n",
+ tz->governor->name, tz->type);
+ }
+ mutex_unlock(&thermal_governor_lock);
+}
+
+static void thermal_governor_detach(struct thermal_zone_device *tz)
+{
+ mutex_lock(&thermal_governor_lock);
+ if (tz->governor && tz->governor->unbind_from_tz)
+ tz->governor->unbind_from_tz(tz);
+ mutex_unlock(&thermal_governor_lock);
+}
+
/*
* Zone update section: main control loop applied to each zone while monitoring
*
@@ -447,12 +467,15 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,

mutex_unlock(&tz->lock);

- thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
-
- if (mode == THERMAL_DEVICE_ENABLED)
+ if (mode == THERMAL_DEVICE_ENABLED) {
+ thermal_governor_attach(tz);
+ thermal_zone_device_init(tz);
+ thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
thermal_notify_tz_enable(tz->id);
- else
+ } else {
+ thermal_governor_detach(tz);
thermal_notify_tz_disable(tz->id);
+ }

return ret;
}


2022-01-26 22:08:08

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v4] drivers: thermal: clear all mitigation when thermal zone is disabled

Hi Manaf,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on rafael-pm/thermal]
[also build test WARNING on v5.17-rc1 next-20220125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Manaf-Meethalavalappu-Pallikunhi/drivers-thermal-clear-all-mitigation-when-thermal-zone-is-disabled/20220126-004720
base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git thermal
config: nios2-randconfig-m031-20220124 (https://download.01.org/0day-ci/archive/20220126/[email protected]/config)
compiler: nios2-linux-gcc (GCC) 11.2.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

smatch warnings:
drivers/thermal/gov_step_wise.c:189 step_wise_unbind() warn: inconsistent indenting

vim +189 drivers/thermal/gov_step_wise.c

169
170 /**
171 * step_wise_unbind() - unbind the step_wise governor to a thermal zone
172 * @tz: thermal zone to unbind it to
173 *
174 * Clear all previous throttling and reset passive counter.
175 *
176 */
177 static void step_wise_unbind(struct thermal_zone_device *tz)
178 {
179 struct thermal_instance *instance;
180
181 dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
182
183 mutex_lock(&tz->lock);
184 tz->passive = 0;
185 list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
186 instance->initialized = false;
187 instance->target = THERMAL_NO_TARGET;
188 mutex_lock(&instance->cdev->lock);
> 189 __thermal_cdev_update(instance->cdev);
190 mutex_unlock(&instance->cdev->lock);
191 }
192 mutex_unlock(&tz->lock);
193 }
194

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]