2023-12-07 11:18:03

by Xiaolei Wang

[permalink] [raw]
Subject: [PATCH] freezer,sched: Move state information judgment outside task_call_func

It is dangerous to output warnings in task_call_func,
which may lead to the risk of deadlock. task_call_func
uses p->pi_lock, and the serial port output may call
try_to_wake_up to wake up the write buff, which also
uses p->pi_lock.

WARNING: possible circular locking dependency detected
6.7.0-rc4 #28 Not tainted
------------------------------------------------------
sh/475 is trying to acquire lock:
ffff800082b17f20 ((console_sem).lock){-.-.}-{2:2}, at: down_trylock+0x18/0x48

but task is already holding lock:
ffff0000c582dde0 (&p->pi_lock){-.-.}-{2:2}, at: task_call_func+0x40/0x124

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&p->pi_lock){-.-.}-{2:2}:
_raw_spin_lock_irqsave+0x68/0xd0
try_to_wake_up+0x5c/0x820
wake_up_process+0x18/0x24
__up.isra.0+0x40/0x4c
up+0x5c/0x78
console_unlock+0x124/0x138
vt_move_to_console+0x48/0xb8
pm_restore_console+0x44/0x5c
pm_suspend+0x2f0/0x688
state_store+0x8c/0x118
kobj_attr_store+0x18/0x2c
sysfs_kf_write+0x4c/0x78
kernfs_fop_write_iter+0x120/0x1b4
vfs_write+0x3b4/0x558
ksys_write+0x6c/0xfc
__arm64_sys_write+0x1c/0x28
invoke_syscall+0x44/0x104
el0_svc_common.constprop.0+0xc0/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x50/0xec
el0t_64_sync_handler+0xc0/0xc4
el0t_64_sync+0x190/0x194

-> #0 ((console_sem).lock){-.-.}-{2:2}:
__lock_acquire+0x1248/0x1ab4
lock_acquire+0x120/0x308
_raw_spin_lock_irqsave+0x68/0xd0
down_trylock+0x18/0x48
__down_trylock_console_sem+0x38/0xc4
console_trylock+0x34/0x78
vprintk_emit+0x124/0x3a4
vprintk_default+0x38/0x44
vprintk+0xb0/0xc0
_printk+0x60/0x88
report_bug+0x208/0x270
bug_handler+0x24/0x6c
brk_handler+0x70/0xd4
do_debug_exception+0x9c/0x16c
el1_dbg+0x74/0x90
el1h_64_sync_handler+0xc8/0xe4
el1h_64_sync+0x64/0x68
__set_task_frozen+0x64/0xac
task_call_func+0xa0/0x124
freeze_task+0xb4/0x10c
try_to_freeze_tasks+0xd8/0x3fc
freeze_processes+0xd4/0xe4
pm_suspend+0x21c/0x688
state_store+0x8c/0x118
kobj_attr_store+0x18/0x2c
sysfs_kf_write+0x4c/0x78
kernfs_fop_write_iter+0x120/0x1b4
vfs_write+0x3b4/0x558
ksys_write+0x6c/0xfc
__arm64_sys_write+0x1c/0x28
invoke_syscall+0x44/0x104
el0_svc_common.constprop.0+0xc0/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x50/0xec
el0t_64_sync_handler+0xc0/0xc4
el0t_64_sync+0x190/0x194

other info that might help us debug this:

Possible unsafe locking scenario:

CPU0 CPU1
---- ----
lock(&p->pi_lock);
lock((console_sem).lock);
lock(&p->pi_lock);

*** DEADLOCK ***

7 locks held by sh/475:
#0: ffff0000c7245420 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x6c/0xfc
#1: ffff0000c313a890 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf0/0x1b4
#2: ffff0000c0d3e0b8 (kn->active#48){.+.+}-{0:0}, at: kernfs_fop_write_iter+0xf8/0x1b4
#3: ffff800082b11530 (system_transition_mutex){+.+.}-{3:3}, at: pm_suspend+0xb4/0x688
#4: ffff800082ae6058 (tasklist_lock){.+.+}-{2:2}, at: try_to_freeze_tasks+0x88/0x3fc
#5: ffff800082b93f10 (freezer_lock){....}-{2:2}, at: freeze_task+0x3c/0x10c
#6: ffff0000c582dde0 (&p->pi_lock){-.-.}-{2:2}, at: task_call_func+0x40/0x124

Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic")
Signed-off-by: Xiaolei Wang <[email protected]>
---
include/linux/freezer.h | 9 +++++++++
kernel/freezer.c | 40 ++++++++++++++++++++++++----------------
2 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index b303472255be..0f089bf6ff7e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -16,6 +16,15 @@ DECLARE_STATIC_KEY_FALSE(freezer_active);
extern bool pm_freezing; /* PM freezing in effect */
extern bool pm_nosig_freezing; /* PM nosig freezing in effect */

+/*
+ * Check whether the status and locks are normal
+ * when the task is frozen
+ */
+struct task_freeze_check {
+ unsigned int state;
+ int lockdep_depth;
+};
+
/*
* Timeout for stopping processes
*/
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c450fa8b8b5e..263687e0b0a3 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -108,6 +108,7 @@ static void fake_signal_wake_up(struct task_struct *p)
static int __set_task_frozen(struct task_struct *p, void *arg)
{
unsigned int state = READ_ONCE(p->__state);
+ struct task_freeze_check *p_check = arg;

if (p->on_rq)
return 0;
@@ -118,30 +119,37 @@ static int __set_task_frozen(struct task_struct *p, void *arg)
if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
return 0;

- /*
- * Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
- * can suffer spurious wakeups.
- */
- if (state & TASK_FREEZABLE)
- WARN_ON_ONCE(!(state & TASK_NORMAL));
-
-#ifdef CONFIG_LOCKDEP
- /*
- * It's dangerous to freeze with locks held; there be dragons there.
- */
- if (!(state & __TASK_FREEZABLE_UNSAFE))
- WARN_ON_ONCE(debug_locks && p->lockdep_depth);
-#endif
-
p->saved_state = p->__state;
WRITE_ONCE(p->__state, TASK_FROZEN);
+ p_check->state = p->__state;
+ p_check->lockdep_depth = p->lockdep_depth;
return TASK_FROZEN;
}

static bool __freeze_task(struct task_struct *p)
{
+ struct task_freeze_check p_check;
+ unsigned int ret;
/* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */
- return task_call_func(p, __set_task_frozen, NULL);
+ ret = task_call_func(p, __set_task_frozen, &p_check);
+ if (ret) {
+ /*
+ * Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they
+ * can suffer spurious wakeups.
+ */
+ if (p_check.state & TASK_FREEZABLE)
+ WARN_ON_ONCE(!(p_check.state & TASK_NORMAL));
+
+#ifdef CONFIG_LOCKDEP
+ /*
+ * It's dangerous to freeze with locks held; there be dragons there.
+ */
+ if (!(p_check.state & __TASK_FREEZABLE_UNSAFE))
+ WARN_ON_ONCE(debug_locks && p_check.lockdep_depth);
+#endif
+ }
+ return ret;
+
}

/**
--
2.25.1


2023-12-07 12:36:19

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] freezer,sched: Move state information judgment outside task_call_func

On Thu, Dec 07, 2023 at 07:16:34PM +0800, Xiaolei Wang wrote:
> It is dangerous to output warnings in task_call_func,
> which may lead to the risk of deadlock. task_call_func
> uses p->pi_lock, and the serial port output may call
> try_to_wake_up to wake up the write buff, which also
> uses p->pi_lock.

No, we're not going to make the code ugly because printk is stupid. Fix
whatever that triggers the WARN and leave it at that.

2023-12-07 19:31:09

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] freezer,sched: Move state information judgment outside task_call_func

Hi Xiaolei,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.7-rc4 next-20231207]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Xiaolei-Wang/freezer-sched-Move-state-information-judgment-outside-task_call_func/20231207-191924
base: linus/master
patch link: https://lore.kernel.org/r/20231207111634.667057-1-xiaolei.wang%40windriver.com
patch subject: [PATCH] freezer,sched: Move state information judgment outside task_call_func
config: arm-defconfig (https://download.01.org/0day-ci/archive/20231208/[email protected]/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231208/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

>> kernel/freezer.c:125:30: error: no member named 'lockdep_depth' in 'struct task_struct'
p_check->lockdep_depth = p->lockdep_depth;
~ ^
1 error generated.


vim +125 kernel/freezer.c

107
108 static int __set_task_frozen(struct task_struct *p, void *arg)
109 {
110 unsigned int state = READ_ONCE(p->__state);
111 struct task_freeze_check *p_check = arg;
112
113 if (p->on_rq)
114 return 0;
115
116 if (p != current && task_curr(p))
117 return 0;
118
119 if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
120 return 0;
121
122 p->saved_state = p->__state;
123 WRITE_ONCE(p->__state, TASK_FROZEN);
124 p_check->state = p->__state;
> 125 p_check->lockdep_depth = p->lockdep_depth;
126 return TASK_FROZEN;
127 }
128

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

2023-12-07 21:07:18

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] freezer,sched: Move state information judgment outside task_call_func

Hi Xiaolei,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.7-rc4 next-20231207]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Xiaolei-Wang/freezer-sched-Move-state-information-judgment-outside-task_call_func/20231207-191924
base: linus/master
patch link: https://lore.kernel.org/r/20231207111634.667057-1-xiaolei.wang%40windriver.com
patch subject: [PATCH] freezer,sched: Move state information judgment outside task_call_func
config: arm64-defconfig (https://download.01.org/0day-ci/archive/20231208/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231208/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All errors (new ones prefixed by >>):

kernel/freezer.c: In function '__set_task_frozen':
>> kernel/freezer.c:125:35: error: 'struct task_struct' has no member named 'lockdep_depth'
125 | p_check->lockdep_depth = p->lockdep_depth;
| ^~


vim +125 kernel/freezer.c

107
108 static int __set_task_frozen(struct task_struct *p, void *arg)
109 {
110 unsigned int state = READ_ONCE(p->__state);
111 struct task_freeze_check *p_check = arg;
112
113 if (p->on_rq)
114 return 0;
115
116 if (p != current && task_curr(p))
117 return 0;
118
119 if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED)))
120 return 0;
121
122 p->saved_state = p->__state;
123 WRITE_ONCE(p->__state, TASK_FROZEN);
124 p_check->state = p->__state;
> 125 p_check->lockdep_depth = p->lockdep_depth;
126 return TASK_FROZEN;
127 }
128

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki