It is dangerous to output warnings in task_call_func,
which may lead to the risk of deadlock. task_call_func
uses p->pi_lock, and the serial port output may call
try_to_wake_up to wake up the write buff, which also
uses p->pi_lock.
WARNING: possible circular locking dependency detected
6.7.0-rc4 #28 Not tainted
------------------------------------------------------
sh/475 is trying to acquire lock:
ffff800082b17f20 ((console_sem).lock){-.-.}-{2:2}, at: down_trylock+0x18/0x48
but task is already holding lock:
ffff0000c582dde0 (&p->pi_lock){-.-.}-{2:2}, at: task_call_func+0x40/0x124
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (&p->pi_lock){-.-.}-{2:2}:
_raw_spin_lock_irqsave+0x68/0xd0
try_to_wake_up+0x5c/0x820
wake_up_process+0x18/0x24
__up.isra.0+0x40/0x4c
up+0x5c/0x78
console_unlock+0x124/0x138
vt_move_to_console+0x48/0xb8
pm_restore_console+0x44/0x5c
pm_suspend+0x2f0/0x688
state_store+0x8c/0x118
kobj_attr_store+0x18/0x2c
sysfs_kf_write+0x4c/0x78
kernfs_fop_write_iter+0x120/0x1b4
vfs_write+0x3b4/0x558
ksys_write+0x6c/0xfc
__arm64_sys_write+0x1c/0x28
invoke_syscall+0x44/0x104
el0_svc_common.constprop.0+0xc0/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x50/0xec
el0t_64_sync_handler+0xc0/0xc4
el0t_64_sync+0x190/0x194
-> #0 ((console_sem).lock){-.-.}-{2:2}:
__lock_acquire+0x1248/0x1ab4
lock_acquire+0x120/0x308
_raw_spin_lock_irqsave+0x68/0xd0
down_trylock+0x18/0x48
__down_trylock_console_sem+0x38/0xc4
console_trylock+0x34/0x78
vprintk_emit+0x124/0x3a4
vprintk_default+0x38/0x44
vprintk+0xb0/0xc0
_printk+0x60/0x88
report_bug+0x208/0x270
bug_handler+0x24/0x6c
brk_handler+0x70/0xd4
do_debug_exception+0x9c/0x16c
el1_dbg+0x74/0x90
el1h_64_sync_handler+0xc8/0xe4
el1h_64_sync+0x64/0x68
__set_task_frozen+0x64/0xac
task_call_func+0xa0/0x124
freeze_task+0xb4/0x10c
try_to_freeze_tasks+0xd8/0x3fc
freeze_processes+0xd4/0xe4
pm_suspend+0x21c/0x688
state_store+0x8c/0x118
kobj_attr_store+0x18/0x2c
sysfs_kf_write+0x4c/0x78
kernfs_fop_write_iter+0x120/0x1b4
vfs_write+0x3b4/0x558
ksys_write+0x6c/0xfc
__arm64_sys_write+0x1c/0x28
invoke_syscall+0x44/0x104
el0_svc_common.constprop.0+0xc0/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x50/0xec
el0t_64_sync_handler+0xc0/0xc4
el0t_64_sync+0x190/0x194
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(&p->pi_lock);
lock((console_sem).lock);
lock(&p->pi_lock);
*** DEADLOCK ***
7 locks held by sh/475:
#0: ffff0000c7245420 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x6c/0xfc
#1: ffff0000c313a890 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf0/0x1b4
#2: ffff0000c0d3e0b8 (kn->active#48){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xf8/0x1b4
#3: ffff800082b11530 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0xb4/0x688
#4: ffff800082ae6058 (tasklist_lock){.+.+}-{2:2}, at: try_to_freeze_tasks+0x88/0x3fc
#5: ffff800082b93f10 (freezer_lock){....}-{2:2}, at: freeze_task+0x3c/0x10c
#6: ffff0000c582dde0 (&p->pi_lock){-.-.}-{2:2}, at: task_call_func+0x40/0x124
Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic")
Signed-off-by: Xiaolei Wang <[email protected]>
---
include/linux/freezer.h | 9 +++++++++
kernel/freezer.c | 40 ++++++++++++++++++++++++----------------
2 files changed, 33 insertions(+), 16 deletions(-)
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index b303472255be..0f089bf6ff7e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -16,6 +16,15 @@ DECLARE_STATIC_KEY_FALSE(freezer_active);
extern bool pm_freezing; /* PM freezing in effect */
extern bool pm_nosig_freezing; /* PM nosig freezing in effect */
+/*
+ * Check whether the status and locks are normal
+ * when the task is frozen
+ */
+struct task_freeze_check {
+ unsigned int state;
+ int lockdep_depth;
+};
+
/*
* Timeout for stopping processes
*/
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c450fa8b8b5e..263687e0b0a3 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -108,6 +108,7 @@ static void fake_signal_wake_up(struct task_struct *p)
static int __set_task_frozen(struct task_struct *p, void *arg)
{
unsigned int state = READ_ONCE(p->__state);
+ struct task_freeze_check *p_check = arg;
if (p->on_rq)
return 0;
@@ -118,30 +119,37 @@ static int __set_task_frozen(struct task_struct *p, void *arg)
if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
return 0;
- /*
- * Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
- * can suffer spurious wakeups.
- */
- if (state & TASK_FREEZABLE)
- WARN_ON_ONCE(!(state & TASK_NORMAL));
-
-#ifdef CONFIG_LOCKDEP
- /*
- * It's dangerous to freeze with locks held; there be dragons there.
- */
- if (!(state & __TASK_FREEZABLE_UNSAFE))
- WARN_ON_ONCE(debug_locks && p->lockdep_depth);
-#endif
-
p->saved_state = p->__state;
WRITE_ONCE(p->__state, TASK_FROZEN);
+ p_check->state = p->__state;
+ p_check->lockdep_depth = p->lockdep_depth;
return TASK_FROZEN;
}
static bool __freeze_task(struct task_struct *p)
{
+ struct task_freeze_check p_check;
+ unsigned int ret;
/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
- return task_call_func(p, __set_task_frozen, NULL);
+ ret = task_call_func(p, __set_task_frozen, &p_check);
+ if (ret) {
+ /*
+ * Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
+ * can suffer spurious wakeups.
+ */
+ if (p_check.state & TASK_FREEZABLE)
+ WARN_ON_ONCE(!(p_check.state & TASK_NORMAL));
+
+#ifdef CONFIG_LOCKDEP
+ /*
+ * It's dangerous to freeze with locks held; there be dragons there.
+ */
+ if (!(p_check.state & __TASK_FREEZABLE_UNSAFE))
+ WARN_ON_ONCE(debug_locks && p_check.lockdep_depth);
+#endif
+ }
+ return ret;
+
}
/**
--
2.25.1
On Thu, Dec 07, 2023 at 07:16:34PM +0800, Xiaolei Wang wrote:
> It is dangerous to output warnings in task_call_func,
> which may lead to the risk of deadlock. task_call_func
> uses p->pi_lock, and the serial port output may call
> try_to_wake_up to wake up the write buff, which also
> uses p->pi_lock.
No, we're not going to make the code ugly because printk is stupid. Fix
whatever that triggers the WARN and leave it at that.
Hi Xiaolei,
kernel test robot noticed the following build errors:
[auto build test ERROR on linus/master]
[also build test ERROR on v6.7-rc4 next-20231207]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xiaolei-Wang/freezer-sched-Move-state-information-judgment-outside-task_call_func/20231207-191924
base: linus/master
patch link: https://lore.kernel.org/r/20231207111634.667057-1-xiaolei.wang%40windriver.com
patch subject: [PATCH] freezer,sched: Move state information judgment outside task_call_func
config: arm-defconfig (https://download.01.org/0day-ci/archive/20231208/[email protected]/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231208/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
>> kernel/freezer.c:125:30: error: no member named 'lockdep_depth' in 'struct task_struct'
p_check->lockdep_depth = p->lockdep_depth;
~ ^
1 error generated.
vim +125 kernel/freezer.c
107
108 static int __set_task_frozen(struct task_struct *p, void *arg)
109 {
110 unsigned int state = READ_ONCE(p->__state);
111 struct task_freeze_check *p_check = arg;
112
113 if (p->on_rq)
114 return 0;
115
116 if (p != current && task_curr(p))
117 return 0;
118
119 if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
120 return 0;
121
122 p->saved_state = p->__state;
123 WRITE_ONCE(p->__state, TASK_FROZEN);
124 p_check->state = p->__state;
> 125 p_check->lockdep_depth = p->lockdep_depth;
126 return TASK_FROZEN;
127 }
128
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Xiaolei,
kernel test robot noticed the following build errors:
[auto build test ERROR on linus/master]
[also build test ERROR on v6.7-rc4 next-20231207]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xiaolei-Wang/freezer-sched-Move-state-information-judgment-outside-task_call_func/20231207-191924
base: linus/master
patch link: https://lore.kernel.org/r/20231207111634.667057-1-xiaolei.wang%40windriver.com
patch subject: [PATCH] freezer,sched: Move state information judgment outside task_call_func
config: arm64-defconfig (https://download.01.org/0day-ci/archive/20231208/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231208/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All errors (new ones prefixed by >>):
kernel/freezer.c: In function '__set_task_frozen':
>> kernel/freezer.c:125:35: error: 'struct task_struct' has no member named 'lockdep_depth'
125 | p_check->lockdep_depth = p->lockdep_depth;
| ^~
vim +125 kernel/freezer.c
107
108 static int __set_task_frozen(struct task_struct *p, void *arg)
109 {
110 unsigned int state = READ_ONCE(p->__state);
111 struct task_freeze_check *p_check = arg;
112
113 if (p->on_rq)
114 return 0;
115
116 if (p != current && task_curr(p))
117 return 0;
118
119 if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
120 return 0;
121
122 p->saved_state = p->__state;
123 WRITE_ONCE(p->__state, TASK_FROZEN);
124 p_check->state = p->__state;
> 125 p_check->lockdep_depth = p->lockdep_depth;
126 return TASK_FROZEN;
127 }
128
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki