Subject: [PATCH v5] drivers: thermal: clear all mitigation when thermal zone is disabled

Whenever a thermal zone is in trip violated state, there is a chance
that the same thermal zone mode can be disabled either via
thermal core API or via thermal zone sysfs. Once it is disabled,
the framework bails out any re-evaluation of thermal zone. It leads
to a case where if it is already in mitigation state, it will stay
the same state forever.

To avoid above mentioned issue, add support to bind/unbind
governor from thermal zone during thermal zone mode change request
and clear all existing throttling in governor unbind_from_tz()
callback.

Suggested-by: Daniel Lezcano <[email protected]>
Signed-off-by: Manaf Meethalavalappu Pallikunhi <[email protected]>
---
drivers/thermal/gov_power_allocator.c | 3 +++
drivers/thermal/gov_step_wise.c | 26 ++++++++++++++++++++++++++
drivers/thermal/thermal_core.c | 31 +++++++++++++++++++++++++++----
3 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 13e3757..9ff0c5f 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -696,6 +696,9 @@ static void power_allocator_unbind(struct thermal_zone_device *tz)

dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);

+ tz->passive = 0;
+ allow_maximum_power(tz, true);
+
if (params->allocated_tzp) {
kfree(tz->tzp);
tz->tzp = NULL;
diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c
index 12acb12..2d2186b 100644
--- a/drivers/thermal/gov_step_wise.c
+++ b/drivers/thermal/gov_step_wise.c
@@ -168,6 +168,31 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
}

/**
+ * step_wise_unbind() - unbind the step_wise governor to a thermal zone
+ * @tz: thermal zone to unbind it to
+ *
+ * Clear all previous throttling and reset passive counter.
+ *
+ */
+static void step_wise_unbind(struct thermal_zone_device *tz)
+{
+ struct thermal_instance *instance;
+
+ dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
+
+ mutex_lock(&tz->lock);
+ tz->passive = 0;
+ list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ instance->initialized = false;
+ instance->target = THERMAL_NO_TARGET;
+ mutex_lock(&instance->cdev->lock);
+ __thermal_cdev_update(instance->cdev);
+ mutex_unlock(&instance->cdev->lock);
+ }
+ mutex_unlock(&tz->lock);
+}
+
+/**
* step_wise_throttle - throttles devices associated with the given zone
* @tz: thermal_zone_device
* @trip: trip point index
@@ -196,6 +221,7 @@ static int step_wise_throttle(struct thermal_zone_device *tz, int trip)

static struct thermal_governor thermal_gov_step_wise = {
.name = "step_wise",
+ .unbind_from_tz = step_wise_unbind,
.throttle = step_wise_throttle,
};
THERMAL_GOVERNOR_DECLARE(thermal_gov_step_wise);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 82654dc..34726d3 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -274,6 +274,26 @@ static int __init thermal_register_governors(void)
return ret;
}

+static void thermal_governor_attach(struct thermal_zone_device *tz)
+{
+ mutex_lock(&thermal_governor_lock);
+ if (tz->governor && tz->governor->bind_to_tz) {
+ if (tz->governor->bind_to_tz(tz))
+ dev_err(&tz->device,
+ "governor %s failed to bind to thermal zone %s\n",
+ tz->governor->name, tz->type);
+ }
+ mutex_unlock(&thermal_governor_lock);
+}
+
+static void thermal_governor_detach(struct thermal_zone_device *tz)
+{
+ mutex_lock(&thermal_governor_lock);
+ if (tz->governor && tz->governor->unbind_from_tz)
+ tz->governor->unbind_from_tz(tz);
+ mutex_unlock(&thermal_governor_lock);
+}
+
/*
* Zone update section: main control loop applied to each zone while monitoring
*
@@ -449,12 +469,15 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,

mutex_unlock(&tz->lock);

- thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
-
- if (mode == THERMAL_DEVICE_ENABLED)
+ if (mode == THERMAL_DEVICE_ENABLED) {
+ thermal_governor_attach(tz);
+ thermal_zone_device_init(tz);
+ thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
thermal_notify_tz_enable(tz->id);
- else
+ } else {
+ thermal_governor_detach(tz);
thermal_notify_tz_disable(tz->id);
+ }

return ret;
}


2022-02-01 15:36:29

by Lukasz Luba

[permalink] [raw]
Subject: Re: [PATCH v5] drivers: thermal: clear all mitigation when thermal zone is disabled

Hi Manaf,

On 1/27/22 6:11 PM, Manaf Meethalavalappu Pallikunhi wrote:
> Whenever a thermal zone is in trip violated state, there is a chance
> that the same thermal zone mode can be disabled either via
> thermal core API or via thermal zone sysfs. Once it is disabled,
> the framework bails out any re-evaluation of thermal zone. It leads
> to a case where if it is already in mitigation state, it will stay
> the same state forever.
>
> To avoid above mentioned issue, add support to bind/unbind
> governor from thermal zone during thermal zone mode change request
> and clear all existing throttling in governor unbind_from_tz()
> callback.

I have one use case:
This would be a bit dangerous, e.g. to switch governors while there is a
high temperature. Although, sounds reasonable to left a 'default' state
for a next governor.

>
> Suggested-by: Daniel Lezcano <[email protected]>
> Signed-off-by: Manaf Meethalavalappu Pallikunhi <[email protected]>
> ---
> drivers/thermal/gov_power_allocator.c | 3 +++
> drivers/thermal/gov_step_wise.c | 26 ++++++++++++++++++++++++++
> drivers/thermal/thermal_core.c | 31 +++++++++++++++++++++++++++----
> 3 files changed, 56 insertions(+), 4 deletions(-)

Why only two governors need that change and not all?
Because they don't have 'bind/unbind' callbacks, then maybe we should
change that as well to make it consistent?

Regards,
Lukasz

2022-02-03 03:32:58

by kernel test robot

[permalink] [raw]
Subject: [drivers] 5c5cdc6f33: INFO:task_blocked_for_more_than#seconds



Greeting,

FYI, we noticed the following commit (built with gcc-9):

commit: 5c5cdc6f33a44efa58bfc229fd27009b6892b852 ("[PATCH v5] drivers: thermal: clear all mitigation when thermal zone is disabled")
url: https://github.com/0day-ci/linux/commits/Manaf-Meethalavalappu-Pallikunhi/drivers-thermal-clear-all-mitigation-when-thermal-zone-is-disabled/20220128-021438
base: https://git.kernel.org/cgit/linux/kernel/git/rafael/linux-pm.git thermal
patch link: https://lore.kernel.org/linux-pm/[email protected]

in testcase: pm-qa
version: pm-qa-x86_64-5ead848-1_20220129
with following parameters:

test: thermal
ucode: 0x28



on test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz with 8G memory

caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):



If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>


[ 989.983940][ T64] INFO: task thermal_02.sh:1314 blocked for more than 491 seconds.
[ 989.991701][ T64] Tainted: G I 5.16.0-05211-g5c5cdc6f33a4 #1
[ 989.999398][ T64] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 990.007953][ T64] task:thermal_02.sh state:D stack: 0 pid: 1314 ppid: 1313 flags:0x00004000
[ 990.017033][ T64] Call Trace:
[ 990.020208][ T64] <TASK>
[ 990.023013][ T64] __schedule (kernel/sched/core.c:4972 kernel/sched/core.c:6253)
[ 990.027354][ T64] ? io_schedule_timeout (kernel/sched/core.c:6132)
[ 990.032537][ T64] ? asm_sysvec_reschedule_ipi (arch/x86/include/asm/idtentry.h:643)
[ 990.038043][ T64] schedule (include/linux/instrumented.h:71 (discriminator 1) include/asm-generic/bitops/instrumented-non-atomic.h:134 (discriminator 1) include/linux/thread_info.h:118 (discriminator 1) include/linux/sched.h:2120 (discriminator 1) kernel/sched/core.c:6328 (discriminator 1))
[ 990.042042][ T64] schedule_preempt_disabled (arch/x86/include/asm/preempt.h:80 kernel/sched/core.c:6386)
[ 990.047401][ T64] __mutex_lock+0x5b6/0xc80
[ 990.052406][ T64] ? ww_mutex_unlock (kernel/locking/mutex.c:737)
[ 990.057038][ T64] ? entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
[ 990.063041][ T64] ? pointer (lib/vsprintf.c:2736)
[ 990.067185][ T64] ? sysfs_kf_bin_read (fs/sysfs/file.c:129)
[ 990.072222][ T64] mutex_lock (kernel/locking/mutex.c:283)
[ 990.076346][ T64] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279)
[ 990.081506][ T64] step_wise_unbind (drivers/thermal/gov_step_wise.c:184)
[ 990.086199][ T64] thermal_set_governor (drivers/thermal/thermal_core.c:103)
[ 990.091186][ T64] thermal_zone_device_set_policy (drivers/thermal/thermal_core.c:210)
[ 990.096996][ T64] policy_store (drivers/thermal/thermal_sysfs.c:222)
[ 990.101212][ T64] ? available_policies_show (drivers/thermal/thermal_sysfs.c:222)
[ 990.106533][ T64] ? mutex_lock (arch/x86/include/asm/atomic64_64.h:190 include/linux/atomic/atomic-long.h:443 include/linux/atomic/atomic-instrumented.h:1669 kernel/locking/mutex.c:168 kernel/locking/mutex.c:282)
[ 990.110836][ T64] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279)
[ 990.116004][ T64] ? __check_object_size (mm/usercopy.c:241 mm/usercopy.c:287 mm/usercopy.c:257)
[ 990.121170][ T64] kernfs_fop_write_iter (fs/kernfs/file.c:300)
[ 990.126340][ T64] new_sync_write (fs/read_write.c:504 (discriminator 1))
[ 990.130905][ T64] ? __handle_mm_fault (mm/memory.c:4549 mm/memory.c:4686)
[ 990.136071][ T64] ? new_sync_read (fs/read_write.c:493)
[ 990.140735][ T64] vfs_write (fs/read_write.c:590)
[ 990.144881][ T64] ksys_write (fs/read_write.c:643)
[ 990.148982][ T64] ? __ia32_sys_read (fs/read_write.c:633)
[ 990.153620][ T64] ? fput_many (fs/file_table.c:336)
[ 990.157842][ T64] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
[ 990.162138][ T64] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
[ 990.167893][ T64] RIP: 0033:0x7fd8aec02504
[ 990.172185][ T64] RSP: 002b:00007ffec45a5278 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 990.180487][ T64] RAX: ffffffffffffffda RBX: 0000559787f4ece0 RCX: 00007fd8aec02504
[ 990.188348][ T64] RDX: 000000000000000b RSI: 0000559787f4ece0 RDI: 0000000000000001
[ 990.196325][ T64] RBP: 000000000000000b R08: 0000000000000003 R09: 00007fd8aecd42c0
[ 990.204172][ T64] R10: 0000559787f35010 R11: 0000000000000246 R12: 0000000000000001
[ 990.212023][ T64] R13: 000000000000000b R14: 7fffffffffffffff R15: 0000000000000002
[ 990.219884][ T64] </TASK>
[ 1481.503934][ T64] INFO: task thermal_02.sh:1314 blocked for more than 983 seconds.
[ 1481.511693][ T64] Tainted: G I 5.16.0-05211-g5c5cdc6f33a4 #1
[ 1481.519382][ T64] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1481.527980][ T64] task:thermal_02.sh state:D stack: 0 pid: 1314 ppid: 1313 flags:0x00004000
[ 1481.537086][ T64] Call Trace:
[ 1481.540290][ T64] <TASK>
[ 1481.543111][ T64] __schedule (kernel/sched/core.c:4972 kernel/sched/core.c:6253)
[ 1481.547498][ T64] ? io_schedule_timeout (kernel/sched/core.c:6132)
[ 1481.552653][ T64] ? asm_sysvec_reschedule_ipi (arch/x86/include/asm/idtentry.h:643)
[ 1481.558244][ T64] schedule (include/linux/instrumented.h:71 (discriminator 1) include/asm-generic/bitops/instrumented-non-atomic.h:134 (discriminator 1) include/linux/thread_info.h:118 (discriminator 1) include/linux/sched.h:2120 (discriminator 1) kernel/sched/core.c:6328 (discriminator 1))
[ 1481.562177][ T64] schedule_preempt_disabled (arch/x86/include/asm/preempt.h:80 kernel/sched/core.c:6386)
[ 1481.567574][ T64] __mutex_lock+0x5b6/0xc80
[ 1481.572565][ T64] ? ww_mutex_unlock (kernel/locking/mutex.c:737)
[ 1481.577217][ T64] ? entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
[ 1481.583168][ T64] ? pointer (lib/vsprintf.c:2736)
[ 1481.587303][ T64] ? sysfs_kf_bin_read (fs/sysfs/file.c:129)
[ 1481.592311][ T64] mutex_lock (kernel/locking/mutex.c:283)
[ 1481.596429][ T64] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279)
[ 1481.601611][ T64] step_wise_unbind (drivers/thermal/gov_step_wise.c:184)
[ 1481.606267][ T64] thermal_set_governor (drivers/thermal/thermal_core.c:103)
[ 1481.611257][ T64] thermal_zone_device_set_policy (drivers/thermal/thermal_core.c:210)
[ 1481.617039][ T64] policy_store (drivers/thermal/thermal_sysfs.c:222)
[ 1481.621258][ T64] ? available_policies_show (drivers/thermal/thermal_sysfs.c:222)
[ 1481.626606][ T64] ? mutex_lock (arch/x86/include/asm/atomic64_64.h:190 include/linux/atomic/atomic-long.h:443 include/linux/atomic/atomic-instrumented.h:1669 kernel/locking/mutex.c:168 kernel/locking/mutex.c:282)
[ 1481.630922][ T64] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279)
[ 1481.636087][ T64] ? __check_object_size (mm/usercopy.c:241 mm/usercopy.c:287 mm/usercopy.c:257)
[ 1481.641262][ T64] kernfs_fop_write_iter (fs/kernfs/file.c:300)
[ 1481.646435][ T64] new_sync_write (fs/read_write.c:504 (discriminator 1))
[ 1481.651001][ T64] ? __handle_mm_fault (mm/memory.c:4549 mm/memory.c:4686)
[ 1481.656176][ T64] ? new_sync_read (fs/read_write.c:493)
[ 1481.660822][ T64] vfs_write (fs/read_write.c:590)
[ 1481.664949][ T64] ksys_write (fs/read_write.c:643)
[ 1481.669081][ T64] ? __ia32_sys_read (fs/read_write.c:633)
[ 1481.673736][ T64] ? fput_many (fs/file_table.c:336)
[ 1481.677948][ T64] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
[ 1481.682255][ T64] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
[ 1481.688035][ T64] RIP: 0033:0x7fd8aec02504
[ 1481.692343][ T64] RSP: 002b:00007ffec45a5278 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 1481.700644][ T64] RAX: ffffffffffffffda RBX: 0000559787f4ece0 RCX: 00007fd8aec02504
[ 1481.708514][ T64] RDX: 000000000000000b RSI: 0000559787f4ece0 RDI: 0000000000000001
[ 1481.716382][ T64] RBP: 000000000000000b R08: 0000000000000003 R09: 00007fd8aecd42c0
[ 1481.724236][ T64] R10: 0000559787f35010 R11: 0000000000000246 R12: 0000000000000001
[ 1481.732120][ T64] R13: 000000000000000b R14: 7fffffffffffffff R15: 0000000000000002
[ 1481.740001][ T64] </TASK>
[ 1973.023992][ T64] INFO: task thermal_02.sh:1314 blocked for more than 1474 seconds.
[ 1973.031833][ T64] Tainted: G I 5.16.0-05211-g5c5cdc6f33a4 #1
[ 1973.039492][ T64] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1973.048057][ T64] task:thermal_02.sh state:D stack: 0 pid: 1314 ppid: 1313 flags:0x00004000
[ 1973.057149][ T64] Call Trace:
[ 1973.060294][ T64] <TASK>
[ 1973.063103][ T64] __schedule (kernel/sched/core.c:4972 kernel/sched/core.c:6253)
[ 1973.067411][ T64] ? io_schedule_timeout (kernel/sched/core.c:6132)
[ 1973.072583][ T64] ? asm_sysvec_reschedule_ipi (arch/x86/include/asm/idtentry.h:643)
[ 1973.078088][ T64] schedule (include/linux/instrumented.h:71 (discriminator 1) include/asm-generic/bitops/instrumented-non-atomic.h:134 (discriminator 1) include/linux/thread_info.h:118 (discriminator 1) include/linux/sched.h:2120 (discriminator 1) kernel/sched/core.c:6328 (discriminator 1))
[ 1973.082049][ T64] schedule_preempt_disabled (arch/x86/include/asm/preempt.h:80 kernel/sched/core.c:6386)
[ 1973.087388][ T64] __mutex_lock+0x5b6/0xc80
[ 1973.092371][ T64] ? ww_mutex_unlock (kernel/locking/mutex.c:737)
[ 1973.097005][ T64] ? entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
[ 1973.102969][ T64] ? pointer (lib/vsprintf.c:2736)
[ 1973.107102][ T64] ? sysfs_kf_bin_read (fs/sysfs/file.c:129)
[ 1973.112094][ T64] mutex_lock (kernel/locking/mutex.c:283)
[ 1973.116195][ T64] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279)
[ 1973.121369][ T64] step_wise_unbind (drivers/thermal/gov_step_wise.c:184)
[ 1973.126023][ T64] thermal_set_governor (drivers/thermal/thermal_core.c:103)
[ 1973.131023][ T64] thermal_zone_device_set_policy (drivers/thermal/thermal_core.c:210)
[ 1973.136804][ T64] policy_store (drivers/thermal/thermal_sysfs.c:222)
[ 1973.141024][ T64] ? available_policies_show (drivers/thermal/thermal_sysfs.c:222)
[ 1973.146363][ T64] ? mutex_lock (arch/x86/include/asm/atomic64_64.h:190 include/linux/atomic/atomic-long.h:443 include/linux/atomic/atomic-instrumented.h:1669 kernel/locking/mutex.c:168 kernel/locking/mutex.c:282)
[ 1973.150654][ T64] ? __mutex_lock_slowpath (kernel/locking/mutex.c:279)
[ 1973.155828][ T64] ? __check_object_size (mm/usercopy.c:241 mm/usercopy.c:287 mm/usercopy.c:257)
[ 1973.161021][ T64] kernfs_fop_write_iter (fs/kernfs/file.c:300)
[ 1973.166195][ T64] new_sync_write (fs/read_write.c:504 (discriminator 1))
[ 1973.170761][ T64] ? __handle_mm_fault (mm/memory.c:4549 mm/memory.c:4686)
[ 1973.175929][ T64] ? new_sync_read (fs/read_write.c:493)
[ 1973.180593][ T64] vfs_write (fs/read_write.c:590)
[ 1973.184714][ T64] ksys_write (fs/read_write.c:643)
[ 1973.188847][ T64] ? __ia32_sys_read (fs/read_write.c:633)
[ 1973.193494][ T64] ? fput_many (fs/file_table.c:336)
[ 1973.197716][ T64] do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
[ 1973.202021][ T64] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
[ 1973.207804][ T64] RIP: 0033:0x7fd8aec02504
[ 1973.212111][ T64] RSP: 002b:00007ffec45a5278 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 1973.220413][ T64] RAX: ffffffffffffffda RBX: 0000559787f4ece0 RCX: 00007fd8aec02504
[ 1973.228282][ T64] RDX: 000000000000000b RSI: 0000559787f4ece0 RDI: 0000000000000001
[ 1973.236151][ T64] RBP: 000000000000000b R08: 0000000000000003 R09: 00007fd8aecd42c0
[ 1973.244021][ T64] R10: 0000559787f35010 R11: 0000000000000246 R12: 0000000000000001
[ 1973.251890][ T64] R13: 000000000000000b R14: 7fffffffffffffff R15: 0000000000000002
[ 1973.259760][ T64] </TASK>
[ 2191.943376][ T392] Tue Feb 1 04:21:49 UTC 2022 detected soft_timeout
[ 2191.943384][ T392]
[ 2191.981858][ T394] Terminated
[ 2191.981867][ T394]
[ 2191.982749][ T392] kill 1134 /usr/bin/time -v -o /tmp/lkp/pm-qa.time /lkp/lkp/src/tests/pm-qa
[ 2191.987190][ T392]
[ 2192.026709][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-2.yaml&job_state=post_run -O /dev/null
[ 2192.026719][ T392]
[ 2192.638864][ T392] Tue Feb 1 04:21:50 UTC 2022 wait_job_finished --timeout 60
[ 2192.638873][ T392]
[ 2193.642239][ T392] kill 1074 vmstat --timestamp -n 10
[ 2193.642246][ T392]
[ 2193.650995][ T392] kill 1072 dmesg --follow --decode
[ 2193.651002][ T392]
[ 2193.662851][ T392] wait for background processes: 1081 1077 oom-killer meminfo
[ 2193.662858][ T392]
[ 2199.782625][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-2.yaml&loadavg=1.00%201.00%200.97%201/198%208469&start_time=1643687149&end_time=1643689309&version=/lkp/lkp/.src-20220131-222107:85a4e74d:6386d7104& -O /dev/null
[ 2199.782638][ T392]
[ 2199.833583][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-2.yaml&job_state=soft_timeout -O /dev/null
[ 2199.833590][ T392]
[ 2199.873122][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-post-run?job_file=/lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-2.yaml -O /dev/null
[ 2199.873128][ T392]
[ 2200.282075][ T392] getting new job...
[ 2200.282083][ T392]
[ 2200.329121][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/gpxelinux.cgi?hostname=lkp-hsw-d04&mac=74:d4:35:85:1d:0d&last_kernel=/pkg/linux/x86_64-rhel-8.3-func/gcc-9/5c5cdc6f33a44efa58bfc229fd27009b6892b852/vmlinuz-5.16.0-05211-g5c5cdc6f33a4&lkp_wtmp -O /tmp/next-job-lkp
[ 2200.329132][ T392]
[ 2271.732054][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp//lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-5.cgz -O /tmp/next-job.cgz
[ 2271.732064][ T392]
[ 2271.741809][ T394] 22 blocks
[ 2271.761153][ T394]
[ 2271.774837][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-wtmp?tbox_name=lkp-hsw-d04&tbox_state=kexec_to_next_job -O /dev/null
[ 2271.774845][ T392]
[ 2272.458476][ T392] downloading kernel image ...
[ 2272.458485][ T392]
[ 2272.499677][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-5.yaml&job_state=wget_kernel -O /dev/null
[ 2272.499687][ T392]
[ 2273.147391][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/pkg/linux/x86_64-rhel-8.3-func/gcc-9/5c5cdc6f33a44efa58bfc229fd27009b6892b852/vmlinuz-5.16.0-05211-g5c5cdc6f33a4 -N -P /opt/rootfs/tmp/pkg/linux/x86_64-rhel-8.3-func/gcc-9/5c5cdc6f33a44efa58bfc229fd27009b6892b852
[ 2273.147400][ T392]
[ 2273.179427][ T392] downloading initrds ...
[ 2273.179434][ T392]
[ 2273.192811][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/cgi-bin/lkp-jobfile-append-var?job_file=/lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-5.yaml&job_state=wget_initrd -O /dev/null
[ 2273.192818][ T392]
[ 2273.792937][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/osimage/debian/debian-10.4-x86_64-20200603.cgz -N -P /opt/rootfs/tmp/osimage/debian
[ 2273.792947][ T392]
[ 2273.974736][ T392] /opt/rootfs/tmp/osimage/debian/debian-10.4-x86_64-20200603.cgz isn't modified
[ 2273.974746][ T392]
[ 2273.992780][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/lkp/jobs/scheduled/lkp-hsw-d04/pm-qa-thermal-ucode=0x28-debian-10.4-x86_64-20200603.cgz-5c5cdc6f33a44efa58bfc229fd27009b6892b852-20220201-47846-ansh2t-5.cgz -N -P /opt/rootfs/tmp/lkp/jobs/scheduled/lkp-hsw-d04
[ 2273.992789][ T392]
[ 2274.024249][ T394] 22 blocks
[ 2274.024260][ T394]
[ 2274.040106][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/osimage/user/lkp/lkp-x86_64.cgz -N -P /opt/rootfs/tmp/osimage/user/lkp
[ 2274.040114][ T392]
[ 2274.072422][ T392] /opt/rootfs/tmp/osimage/user/lkp/lkp-x86_64.cgz isn't modified
[ 2274.072431][ T392]
[ 2274.087145][ T392] /usr/bin/wget -q --timeout=1800 --tries=1 --local-encoding=UTF-8 http://internal-lkp-server:80/~lkp/osimage/deps/debian-10.4-x86_64-20200603.cgz/run-ipconfig_20200608.cgz -N -P /opt/rootfs/tmp/osimage/deps/debian-10.4-x86_64-20200603.cgz
[ 2274.087152][ T392]
[ 2274.118219][ T392] /opt/rootfs/tmp/osimage/deps/debian-10.4-x86_64-20200603.cgz/run-ipconfig_20200608.cgz isn't modified


To reproduce:

git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
sudo bin/lkp install job.yaml # job file is attached in this email
bin/lkp split-job --compatible job.yaml # generate the yaml file for lkp run
sudo bin/lkp run generated-yaml-file

# if come across any failure that blocks the test,
# please remove ~/.lkp and /lkp dir to run from a clean state.



---
0DAY/LKP+ Test Infrastructure Open Source Technology Center
https://lists.01.org/hyperkitty/list/[email protected] Intel Corporation

Thanks,
Oliver Sang


Attachments:
(No filename) (18.36 kB)
config-5.16.0-05211-g5c5cdc6f33a4 (180.56 kB)
job-script (5.43 kB)
dmesg.xz (7.40 kB)
pm-qa (5.77 kB)
job.yaml (4.24 kB)
reproduce (27.00 B)
Download all attachments
Subject: Re: [PATCH v5] drivers: thermal: clear all mitigation when thermal zone is disabled


On 1/31/2022 12:55 PM, Lukasz Luba wrote:
> Hi Manaf,
>
> On 1/27/22 6:11 PM, Manaf Meethalavalappu Pallikunhi wrote:
>> Whenever a thermal zone is in trip violated state, there is a chance
>> that the same thermal zone mode can be disabled either via
>> thermal core API or via thermal zone sysfs. Once it is disabled,
>> the framework bails out any re-evaluation of thermal zone. It leads
>> to a case where if it is already in mitigation state, it will stay
>> the same state forever.
>>
>> To avoid above mentioned issue, add support to bind/unbind
>> governor from thermal zone during thermal zone mode change request
>> and clear all existing throttling in governor unbind_from_tz()
>> callback.
>
> I have one use case:
> This would be a bit dangerous, e.g. to switch governors while there is a
> high temperature. Although, sounds reasonable to left a 'default' state
> for a next governor.
>
I believe only way to change the governror via userspace at runtime.

Just re-evaluate thermal zone  (thermal_zone_device_update) immediately 
after

thermal_zone_device_set_policy()  in same policy_store() context, isn't
it good enough ?

Not sure how a "default" state  can be reverted once governor change is
done.

Re-evaluating thermal zone doesn't guarantee that it will recover previous

set default state for all governors, right ?

>>
>> Suggested-by: Daniel Lezcano <[email protected]>
>> Signed-off-by: Manaf Meethalavalappu Pallikunhi
>> <[email protected]>
>> ---
>>   drivers/thermal/gov_power_allocator.c |  3 +++
>>   drivers/thermal/gov_step_wise.c       | 26 ++++++++++++++++++++++++++
>>   drivers/thermal/thermal_core.c        | 31
>> +++++++++++++++++++++++++++----
>>   3 files changed, 56 insertions(+), 4 deletions(-)
>
> Why only two governors need that change and not all?
> Because they don't have 'bind/unbind' callbacks, then maybe we should
> change that as well to make it consistent?
I will update other governors as well in v6
>
> Regards,
> Lukasz

2022-02-15 10:14:58

by Lukasz Luba

[permalink] [raw]
Subject: Re: [PATCH v5] drivers: thermal: clear all mitigation when thermal zone is disabled



On 2/14/22 8:00 PM, Manaf Meethalavalappu Pallikunhi wrote:
>
> On 1/31/2022 12:55 PM, Lukasz Luba wrote:
>> Hi Manaf,
>>
>> On 1/27/22 6:11 PM, Manaf Meethalavalappu Pallikunhi wrote:
>>> Whenever a thermal zone is in trip violated state, there is a chance
>>> that the same thermal zone mode can be disabled either via
>>> thermal core API or via thermal zone sysfs. Once it is disabled,
>>> the framework bails out any re-evaluation of thermal zone. It leads
>>> to a case where if it is already in mitigation state, it will stay
>>> the same state forever.
>>>
>>> To avoid above mentioned issue, add support to bind/unbind
>>> governor from thermal zone during thermal zone mode change request
>>> and clear all existing throttling in governor unbind_from_tz()
>>> callback.
>>
>> I have one use case:
>> This would be a bit dangerous, e.g. to switch governors while there is a
>> high temperature. Although, sounds reasonable to left a 'default' state
>> for a next governor.
>>
> I believe only way to change the governror via userspace at runtime.
>
> Just re-evaluate thermal zone  (thermal_zone_device_update) immediately
> after
>
> thermal_zone_device_set_policy()  in same policy_store() context, isn't
> it good enough ?

It depends. The code would switch the governors very fast, in the
meantime notifying about possible full speed of CPU (cooling state = 0).
If the task scheduler goes via schedutil (cpufreq governor) at that
moment and decides to set this max frequency, it will be set.
This is situation with your patch, since you added in IPA unbind
'allow_maximum_power()'.
Then the new governor is bind, evaluates the max cooling state, the
notification about reduced max freq is sent to schedutil (a workqueue
will call .sugov_limits() callback) and lower freq would be set.

Now there are things which are not greatly covered by these 4
involved sub-systems (thermal fwk, schedutil, scheduler, HW).
It takes time. It also depends when the actual HW freq is possible to be
set. It might take a few milli-seconds or even a dozes of milli-seconds
(depends on HW).

Without your change, we avoid such situation while switching the
thermal governors.

For your requirement, which is 'mode' enable/disable it OK to
un-throttle.

It's probably something to Rafael and Daniel to judge if we want to
pay that cost and introduce this racy time slot.

Maybe there is a way to implement your needed feature differently.
Unfortunately, I'm super busy with other stuff this month so I cannot
spent much time investigating this.


>
> Not sure how a "default" state  can be reverted once governor change is
> done.
>
> Re-evaluating thermal zone doesn't guarantee that it will recover previous
>
> set default state for all governors, right ?
>
>>>
>>> Suggested-by: Daniel Lezcano <[email protected]>
>>> Signed-off-by: Manaf Meethalavalappu Pallikunhi
>>> <[email protected]>
>>> ---
>>>   drivers/thermal/gov_power_allocator.c |  3 +++
>>>   drivers/thermal/gov_step_wise.c       | 26 ++++++++++++++++++++++++++
>>>   drivers/thermal/thermal_core.c        | 31
>>> +++++++++++++++++++++++++++----
>>>   3 files changed, 56 insertions(+), 4 deletions(-)
>>
>> Why only two governors need that change and not all?
>> Because they don't have 'bind/unbind' callbacks, then maybe we should
>> change that as well to make it consistent?
> I will update other governors as well in v6

Sounds reasonable based on your code (you've added the unbind_from_tz()
callback to step_wise, but not for others).

2022-02-16 19:50:26

by Daniel Lezcano

[permalink] [raw]
Subject: Re: [PATCH v5] drivers: thermal: clear all mitigation when thermal zone is disabled

On 15/02/2022 10:57, Lukasz Luba wrote:
>
>
> On 2/14/22 8:00 PM, Manaf Meethalavalappu Pallikunhi wrote:
>>
>> On 1/31/2022 12:55 PM, Lukasz Luba wrote:
>>> Hi Manaf,
>>>
>>> On 1/27/22 6:11 PM, Manaf Meethalavalappu Pallikunhi wrote:
>>>> Whenever a thermal zone is in trip violated state, there is a chance
>>>> that the same thermal zone mode can be disabled either via
>>>> thermal core API or via thermal zone sysfs. Once it is disabled,
>>>> the framework bails out any re-evaluation of thermal zone. It leads
>>>> to a case where if it is already in mitigation state, it will stay
>>>> the same state forever.
>>>>
>>>> To avoid above mentioned issue, add support to bind/unbind
>>>> governor from thermal zone during thermal zone mode change request
>>>> and clear all existing throttling in governor unbind_from_tz()
>>>> callback.
>>>
>>> I have one use case:
>>> This would be a bit dangerous, e.g. to switch governors while there is a
>>> high temperature. Although, sounds reasonable to left a 'default' state
>>> for a next governor.
>>>
>> I believe only way to change the governror via userspace at runtime.
>>
>> Just re-evaluate thermal zone  (thermal_zone_device_update)
>> immediately after
>>
>> thermal_zone_device_set_policy()  in same policy_store() context,
>> isn't it good enough ?
>
> It depends. The code would switch the governors very fast, in the
> meantime notifying about possible full speed of CPU (cooling state = 0).
> If the task scheduler goes via schedutil (cpufreq governor) at that
> moment and decides to set this max frequency, it will be set.
> This is situation with your patch, since you added in IPA unbind
> 'allow_maximum_power()'.
> Then the new governor is bind, evaluates the max cooling state, the
> notification about reduced max freq is sent to schedutil (a workqueue
> will call .sugov_limits() callback) and lower freq would be set.
>
> Now there are things which are not greatly covered by these 4
> involved sub-systems (thermal fwk, schedutil, scheduler, HW).
> It takes time. It also depends when the actual HW freq is possible to be
> set. It might take a few milli-seconds or even a dozes of milli-seconds
> (depends on HW).
>
> Without your change, we avoid such situation while switching the
> thermal governors.
>
> For your requirement, which is 'mode' enable/disable it OK to
> un-throttle.
>
> It's probably something to Rafael and Daniel to judge if we want to
> pay that cost and introduce this racy time slot.
>
> Maybe there is a way to implement your needed feature differently.
> Unfortunately, I'm super busy with other stuff this month so I cannot
> spent much time investigating this.

IMO, we should be able to disable a thermal zone, no mitigation will
happen but somehow we can keep the hot / critical trip points enabled,
so if the temperature crosses these that would trigger an action for safety.

However, for me it is still unclear what means "disabling a thermal
zone" exactly ?

Maybe we should clarify that before going further


--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog

2022-02-17 03:54:04

by Daniel Lezcano

[permalink] [raw]
Subject: Re: [PATCH v5] drivers: thermal: clear all mitigation when thermal zone is disabled

On 27/01/2022 19:11, Manaf Meethalavalappu Pallikunhi wrote:
> Whenever a thermal zone is in trip violated state, there is a chance
> that the same thermal zone mode can be disabled either via
> thermal core API or via thermal zone sysfs. Once it is disabled,
> the framework bails out any re-evaluation of thermal zone. It leads
> to a case where if it is already in mitigation state, it will stay
> the same state forever.
>
> To avoid above mentioned issue, add support to bind/unbind
> governor from thermal zone during thermal zone mode change request
> and clear all existing throttling in governor unbind_from_tz()
> callback.

Manaf,

can you clarify when / why is needed to disable a thermal zone at runtime?



> Suggested-by: Daniel Lezcano <[email protected]>
> Signed-off-by: Manaf Meethalavalappu Pallikunhi <[email protected]>
> ---
> drivers/thermal/gov_power_allocator.c | 3 +++
> drivers/thermal/gov_step_wise.c | 26 ++++++++++++++++++++++++++
> drivers/thermal/thermal_core.c | 31 +++++++++++++++++++++++++++----
> 3 files changed, 56 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
> index 13e3757..9ff0c5f 100644
> --- a/drivers/thermal/gov_power_allocator.c
> +++ b/drivers/thermal/gov_power_allocator.c
> @@ -696,6 +696,9 @@ static void power_allocator_unbind(struct thermal_zone_device *tz)
>
> dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
>
> + tz->passive = 0;
> + allow_maximum_power(tz, true);
> +
> if (params->allocated_tzp) {
> kfree(tz->tzp);
> tz->tzp = NULL;
> diff --git a/drivers/thermal/gov_step_wise.c b/drivers/thermal/gov_step_wise.c
> index 12acb12..2d2186b 100644
> --- a/drivers/thermal/gov_step_wise.c
> +++ b/drivers/thermal/gov_step_wise.c
> @@ -168,6 +168,31 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
> }
>
> /**
> + * step_wise_unbind() - unbind the step_wise governor to a thermal zone
> + * @tz: thermal zone to unbind it to
> + *
> + * Clear all previous throttling and reset passive counter.
> + *
> + */
> +static void step_wise_unbind(struct thermal_zone_device *tz)
> +{
> + struct thermal_instance *instance;
> +
> + dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id);
> +
> + mutex_lock(&tz->lock);
> + tz->passive = 0;
> + list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
> + instance->initialized = false;
> + instance->target = THERMAL_NO_TARGET;
> + mutex_lock(&instance->cdev->lock);
> + __thermal_cdev_update(instance->cdev);
> + mutex_unlock(&instance->cdev->lock);
> + }
> + mutex_unlock(&tz->lock);
> +}
> +
> +/**
> * step_wise_throttle - throttles devices associated with the given zone
> * @tz: thermal_zone_device
> * @trip: trip point index
> @@ -196,6 +221,7 @@ static int step_wise_throttle(struct thermal_zone_device *tz, int trip)
>
> static struct thermal_governor thermal_gov_step_wise = {
> .name = "step_wise",
> + .unbind_from_tz = step_wise_unbind,
> .throttle = step_wise_throttle,
> };
> THERMAL_GOVERNOR_DECLARE(thermal_gov_step_wise);
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 82654dc..34726d3 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -274,6 +274,26 @@ static int __init thermal_register_governors(void)
> return ret;
> }
>
> +static void thermal_governor_attach(struct thermal_zone_device *tz)
> +{
> + mutex_lock(&thermal_governor_lock);
> + if (tz->governor && tz->governor->bind_to_tz) {
> + if (tz->governor->bind_to_tz(tz))
> + dev_err(&tz->device,
> + "governor %s failed to bind to thermal zone %s\n",
> + tz->governor->name, tz->type);
> + }
> + mutex_unlock(&thermal_governor_lock);
> +}
> +
> +static void thermal_governor_detach(struct thermal_zone_device *tz)
> +{
> + mutex_lock(&thermal_governor_lock);
> + if (tz->governor && tz->governor->unbind_from_tz)
> + tz->governor->unbind_from_tz(tz);
> + mutex_unlock(&thermal_governor_lock);
> +}
> +
> /*
> * Zone update section: main control loop applied to each zone while monitoring
> *
> @@ -449,12 +469,15 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,
>
> mutex_unlock(&tz->lock);
>
> - thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> -
> - if (mode == THERMAL_DEVICE_ENABLED)
> + if (mode == THERMAL_DEVICE_ENABLED) {
> + thermal_governor_attach(tz);
> + thermal_zone_device_init(tz);
> + thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
> thermal_notify_tz_enable(tz->id);
> - else
> + } else {
> + thermal_governor_detach(tz);
> thermal_notify_tz_disable(tz->id);
> + }
>
> return ret;
> }


--
<http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro: <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog