2013-05-01 15:47:39

by Dave Jones

[permalink] [raw]
Subject: 3.9+ tty lockdep trace.

[ 6760.076083] ======================================================
[ 6760.076084] [ INFO: possible circular locking dependency detected ]
[ 6760.076086] 3.9.0+ #16 Not tainted
[ 6760.076086] -------------------------------------------------------
[ 6760.076087] (agetty)/26163 is trying to acquire lock:
[ 6760.076096] blocked: ((&buf->work)){+.+...}, instance: ffff88011c8b0020, at: [<ffffffff81062065>] flush_work+0x5/0x2e0
[ 6760.076096]
but task is already holding lock:
[ 6760.076102] blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
[ 6760.076103]
which lock already depends on the new lock.

[ 6760.076103]
the existing dependency chain (in reverse order) is:
[ 6760.076106]
-> #1 (console_lock){+.+.+.}:
[ 6760.076111] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
[ 6760.076115] [<ffffffff810416c7>] console_lock+0x77/0x80
[ 6760.076118] [<ffffffff813c3dcd>] con_flush_chars+0x2d/0x50
[ 6760.076123] [<ffffffff813b32b2>] n_tty_receive_buf+0x122/0x14d0
[ 6760.076126] [<ffffffff813b7709>] flush_to_ldisc+0x119/0x170
[ 6760.076130] [<ffffffff81064381>] process_one_work+0x211/0x700
[ 6760.076133] [<ffffffff8106498b>] worker_thread+0x11b/0x3a0
[ 6760.076137] [<ffffffff8106ce5d>] kthread+0xed/0x100
[ 6760.076141] [<ffffffff81601cac>] ret_from_fork+0x7c/0xb0
[ 6760.076143]
-> #0 ((&buf->work)){+.+...}:
[ 6760.076146] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
[ 6760.076149] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
[ 6760.076151] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
[ 6760.076153] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
[ 6760.076155] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
[ 6760.076157] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
[ 6760.076159] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
[ 6760.076162] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
[ 6760.076165] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
[ 6760.076168] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
[ 6760.076170] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
[ 6760.076174] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b
[ 6760.076174]
other info that might help us debug this:

[ 6760.076175] Possible unsafe locking scenario:

[ 6760.076175] CPU0 CPU1
[ 6760.076176] ---- ----
[ 6760.076177] lock(console_lock);
[ 6760.076179] lock((&buf->work));
[ 6760.076180] lock(console_lock);
[ 6760.076181] lock((&buf->work));
[ 6760.076182]
*** DEADLOCK ***

[ 6760.076183] 1 lock on stack by (agetty)/26163:
[ 6760.076188] #0: blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
[ 6760.076188]
stack backtrace:
[ 6760.076190] Pid: 26163, comm: (agetty) Not tainted 3.9.0+ #16
[ 6760.076191] Call Trace:
[ 6760.076196] [<ffffffff815edb14>] print_circular_bug+0x200/0x20e
[ 6760.076199] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
[ 6760.076203] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
[ 6760.076206] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
[ 6760.076209] [<ffffffff8100a200>] ? native_sched_clock+0x20/0x80
[ 6760.076213] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
[ 6760.076215] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
[ 6760.076217] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
[ 6760.076219] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
[ 6760.076222] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
[ 6760.076226] [<ffffffff8113c8a3>] ? __free_pages_ok.part.57+0x93/0xc0
[ 6760.076229] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
[ 6760.076231] [<ffffffff810652f2>] ? __cancel_work_timer+0x82/0x130
[ 6760.076234] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
[ 6760.076236] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
[ 6760.076238] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
[ 6760.076240] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
[ 6760.076243] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
[ 6760.076246] [<ffffffff810aec41>] ? lock_release_holdtime.part.30+0xa1/0x170
[ 6760.076250] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
[ 6760.076254] [<ffffffff812b00f6>] ? inode_has_perm.isra.46.constprop.61+0x56/0x80
[ 6760.076257] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
[ 6760.076260] [<ffffffff812b04db>] ? selinux_file_ioctl+0x5b/0x110
[ 6760.076263] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
[ 6760.076266] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b


2013-05-06 18:24:07

by Dave Jones

[permalink] [raw]
Subject: Re: 3.9+ tty lockdep trace.

On Wed, May 01, 2013 at 11:47:23AM -0400, Dave Jones wrote:
> [ 6760.076083] ======================================================
> [ 6760.076084] [ INFO: possible circular locking dependency detected ]
> [ 6760.076086] 3.9.0+ #16 Not tainted
> [ 6760.076086] -------------------------------------------------------
> [ 6760.076087] (agetty)/26163 is trying to acquire lock:
> [ 6760.076096] blocked: ((&buf->work)){+.+...}, instance: ffff88011c8b0020, at: [<ffffffff81062065>] flush_work+0x5/0x2e0
> [ 6760.076096]
> but task is already holding lock:
> [ 6760.076102] blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
> [ 6760.076103]
> which lock already depends on the new lock.
>
> [ 6760.076103]
> the existing dependency chain (in reverse order) is:
> [ 6760.076106]
> -> #1 (console_lock){+.+.+.}:
> [ 6760.076111] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
> [ 6760.076115] [<ffffffff810416c7>] console_lock+0x77/0x80
> [ 6760.076118] [<ffffffff813c3dcd>] con_flush_chars+0x2d/0x50
> [ 6760.076123] [<ffffffff813b32b2>] n_tty_receive_buf+0x122/0x14d0
> [ 6760.076126] [<ffffffff813b7709>] flush_to_ldisc+0x119/0x170
> [ 6760.076130] [<ffffffff81064381>] process_one_work+0x211/0x700
> [ 6760.076133] [<ffffffff8106498b>] worker_thread+0x11b/0x3a0
> [ 6760.076137] [<ffffffff8106ce5d>] kthread+0xed/0x100
> [ 6760.076141] [<ffffffff81601cac>] ret_from_fork+0x7c/0xb0
> [ 6760.076143]
> -> #0 ((&buf->work)){+.+...}:
> [ 6760.076146] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
> [ 6760.076149] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
> [ 6760.076151] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
> [ 6760.076153] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
> [ 6760.076155] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
> [ 6760.076157] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
> [ 6760.076159] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
> [ 6760.076162] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
> [ 6760.076165] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
> [ 6760.076168] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
> [ 6760.076170] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
> [ 6760.076174] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b
> [ 6760.076174]
> other info that might help us debug this:
>
> [ 6760.076175] Possible unsafe locking scenario:
>
> [ 6760.076175] CPU0 CPU1
> [ 6760.076176] ---- ----
> [ 6760.076177] lock(console_lock);
> [ 6760.076179] lock((&buf->work));
> [ 6760.076180] lock(console_lock);
> [ 6760.076181] lock((&buf->work));
> [ 6760.076182]
> *** DEADLOCK ***
>
> [ 6760.076183] 1 lock on stack by (agetty)/26163:
> [ 6760.076188] #0: blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
> [ 6760.076188]
> stack backtrace:
> [ 6760.076190] Pid: 26163, comm: (agetty) Not tainted 3.9.0+ #16
> [ 6760.076191] Call Trace:
> [ 6760.076196] [<ffffffff815edb14>] print_circular_bug+0x200/0x20e
> [ 6760.076199] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
> [ 6760.076203] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
> [ 6760.076206] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
> [ 6760.076209] [<ffffffff8100a200>] ? native_sched_clock+0x20/0x80
> [ 6760.076213] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
> [ 6760.076215] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
> [ 6760.076217] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
> [ 6760.076219] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
> [ 6760.076222] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
> [ 6760.076226] [<ffffffff8113c8a3>] ? __free_pages_ok.part.57+0x93/0xc0
> [ 6760.076229] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
> [ 6760.076231] [<ffffffff810652f2>] ? __cancel_work_timer+0x82/0x130
> [ 6760.076234] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
> [ 6760.076236] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
> [ 6760.076238] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
> [ 6760.076240] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
> [ 6760.076243] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
> [ 6760.076246] [<ffffffff810aec41>] ? lock_release_holdtime.part.30+0xa1/0x170
> [ 6760.076250] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
> [ 6760.076254] [<ffffffff812b00f6>] ? inode_has_perm.isra.46.constprop.61+0x56/0x80
> [ 6760.076257] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
> [ 6760.076260] [<ffffffff812b04db>] ? selinux_file_ioctl+0x5b/0x110
> [ 6760.076263] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
> [ 6760.076266] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b

Just hit this again, by switching tty's during bootup.
The previous time was switching from X to console iirc, so that seems to be
the key to triggering this.

Dave

2013-05-06 18:41:03

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: 3.9+ tty lockdep trace.

On Mon, May 06, 2013 at 02:00:20PM -0400, Dave Jones wrote:
> On Wed, May 01, 2013 at 11:47:23AM -0400, Dave Jones wrote:
> > [ 6760.076083] ======================================================
> > [ 6760.076084] [ INFO: possible circular locking dependency detected ]
> > [ 6760.076086] 3.9.0+ #16 Not tainted
> > [ 6760.076086] -------------------------------------------------------
> > [ 6760.076087] (agetty)/26163 is trying to acquire lock:
> > [ 6760.076096] blocked: ((&buf->work)){+.+...}, instance: ffff88011c8b0020, at: [<ffffffff81062065>] flush_work+0x5/0x2e0
> > [ 6760.076096]
> > but task is already holding lock:
> > [ 6760.076102] blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
> > [ 6760.076103]
> > which lock already depends on the new lock.
> >
> > [ 6760.076103]
> > the existing dependency chain (in reverse order) is:
> > [ 6760.076106]
> > -> #1 (console_lock){+.+.+.}:
> > [ 6760.076111] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
> > [ 6760.076115] [<ffffffff810416c7>] console_lock+0x77/0x80
> > [ 6760.076118] [<ffffffff813c3dcd>] con_flush_chars+0x2d/0x50
> > [ 6760.076123] [<ffffffff813b32b2>] n_tty_receive_buf+0x122/0x14d0
> > [ 6760.076126] [<ffffffff813b7709>] flush_to_ldisc+0x119/0x170
> > [ 6760.076130] [<ffffffff81064381>] process_one_work+0x211/0x700
> > [ 6760.076133] [<ffffffff8106498b>] worker_thread+0x11b/0x3a0
> > [ 6760.076137] [<ffffffff8106ce5d>] kthread+0xed/0x100
> > [ 6760.076141] [<ffffffff81601cac>] ret_from_fork+0x7c/0xb0
> > [ 6760.076143]
> > -> #0 ((&buf->work)){+.+...}:
> > [ 6760.076146] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
> > [ 6760.076149] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
> > [ 6760.076151] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
> > [ 6760.076153] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
> > [ 6760.076155] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
> > [ 6760.076157] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
> > [ 6760.076159] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
> > [ 6760.076162] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
> > [ 6760.076165] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
> > [ 6760.076168] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
> > [ 6760.076170] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
> > [ 6760.076174] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b
> > [ 6760.076174]
> > other info that might help us debug this:
> >
> > [ 6760.076175] Possible unsafe locking scenario:
> >
> > [ 6760.076175] CPU0 CPU1
> > [ 6760.076176] ---- ----
> > [ 6760.076177] lock(console_lock);
> > [ 6760.076179] lock((&buf->work));
> > [ 6760.076180] lock(console_lock);
> > [ 6760.076181] lock((&buf->work));
> > [ 6760.076182]
> > *** DEADLOCK ***
> >
> > [ 6760.076183] 1 lock on stack by (agetty)/26163:
> > [ 6760.076188] #0: blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
> > [ 6760.076188]
> > stack backtrace:
> > [ 6760.076190] Pid: 26163, comm: (agetty) Not tainted 3.9.0+ #16
> > [ 6760.076191] Call Trace:
> > [ 6760.076196] [<ffffffff815edb14>] print_circular_bug+0x200/0x20e
> > [ 6760.076199] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
> > [ 6760.076203] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
> > [ 6760.076206] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
> > [ 6760.076209] [<ffffffff8100a200>] ? native_sched_clock+0x20/0x80
> > [ 6760.076213] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
> > [ 6760.076215] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
> > [ 6760.076217] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
> > [ 6760.076219] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
> > [ 6760.076222] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
> > [ 6760.076226] [<ffffffff8113c8a3>] ? __free_pages_ok.part.57+0x93/0xc0
> > [ 6760.076229] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
> > [ 6760.076231] [<ffffffff810652f2>] ? __cancel_work_timer+0x82/0x130
> > [ 6760.076234] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
> > [ 6760.076236] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
> > [ 6760.076238] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
> > [ 6760.076240] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
> > [ 6760.076243] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
> > [ 6760.076246] [<ffffffff810aec41>] ? lock_release_holdtime.part.30+0xa1/0x170
> > [ 6760.076250] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
> > [ 6760.076254] [<ffffffff812b00f6>] ? inode_has_perm.isra.46.constprop.61+0x56/0x80
> > [ 6760.076257] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
> > [ 6760.076260] [<ffffffff812b04db>] ? selinux_file_ioctl+0x5b/0x110
> > [ 6760.076263] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
> > [ 6760.076266] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b
>
> Just hit this again, by switching tty's during bootup.
> The previous time was switching from X to console iirc, so that seems to be
> the key to triggering this.

Sorry for not getting back to this sooner, merge window fun...

Anyway, Peter, could this be something that your ldisc patches are
triggering? That's all I can think of at the moment.

greg k-h

2013-05-07 14:14:17

by Peter Hurley

[permalink] [raw]
Subject: Re: 3.9+ tty lockdep trace.

On 05/06/2013 02:40 PM, Greg Kroah-Hartman wrote:
> On Mon, May 06, 2013 at 02:00:20PM -0400, Dave Jones wrote:
>> On Wed, May 01, 2013 at 11:47:23AM -0400, Dave Jones wrote:
>> > [ 6760.076083] ======================================================
>> > [ 6760.076084] [ INFO: possible circular locking dependency detected ]
>> > [ 6760.076086] 3.9.0+ #16 Not tainted
>> > [ 6760.076086] -------------------------------------------------------
>> > [ 6760.076087] (agetty)/26163 is trying to acquire lock:
>> > [ 6760.076096] blocked: ((&buf->work)){+.+...}, instance: ffff88011c8b0020, at: [<ffffffff81062065>] flush_work+0x5/0x2e0
>> > [ 6760.076096]
>> > but task is already holding lock:
>> > [ 6760.076102] blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
>> > [ 6760.076103]
>> > which lock already depends on the new lock.
>> >
>> > [ 6760.076103]
>> > the existing dependency chain (in reverse order) is:
>> > [ 6760.076106]
>> > -> #1 (console_lock){+.+.+.}:
>> > [ 6760.076111] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
>> > [ 6760.076115] [<ffffffff810416c7>] console_lock+0x77/0x80
>> > [ 6760.076118] [<ffffffff813c3dcd>] con_flush_chars+0x2d/0x50
>> > [ 6760.076123] [<ffffffff813b32b2>] n_tty_receive_buf+0x122/0x14d0
>> > [ 6760.076126] [<ffffffff813b7709>] flush_to_ldisc+0x119/0x170
>> > [ 6760.076130] [<ffffffff81064381>] process_one_work+0x211/0x700
>> > [ 6760.076133] [<ffffffff8106498b>] worker_thread+0x11b/0x3a0
>> > [ 6760.076137] [<ffffffff8106ce5d>] kthread+0xed/0x100
>> > [ 6760.076141] [<ffffffff81601cac>] ret_from_fork+0x7c/0xb0
>> > [ 6760.076143]
>> > -> #0 ((&buf->work)){+.+...}:
>> > [ 6760.076146] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
>> > [ 6760.076149] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
>> > [ 6760.076151] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
>> > [ 6760.076153] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
>> > [ 6760.076155] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
>> > [ 6760.076157] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
>> > [ 6760.076159] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
>> > [ 6760.076162] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
>> > [ 6760.076165] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
>> > [ 6760.076168] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
>> > [ 6760.076170] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
>> > [ 6760.076174] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b
>> > [ 6760.076174]
>> > other info that might help us debug this:
>> >
>> > [ 6760.076175] Possible unsafe locking scenario:
>> >
>> > [ 6760.076175] CPU0 CPU1
>> > [ 6760.076176] ---- ----
>> > [ 6760.076177] lock(console_lock);
>> > [ 6760.076179] lock((&buf->work));
>> > [ 6760.076180] lock(console_lock);
>> > [ 6760.076181] lock((&buf->work));
>> > [ 6760.076182]
>> > *** DEADLOCK ***
>> >
>> > [ 6760.076183] 1 lock on stack by (agetty)/26163:
>> > [ 6760.076188] #0: blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
>> > [ 6760.076188]
>> > stack backtrace:
>> > [ 6760.076190] Pid: 26163, comm: (agetty) Not tainted 3.9.0+ #16
>> > [ 6760.076191] Call Trace:
>> > [ 6760.076196] [<ffffffff815edb14>] print_circular_bug+0x200/0x20e
>> > [ 6760.076199] [<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
>> > [ 6760.076203] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
>> > [ 6760.076206] [<ffffffff8100a269>] ? sched_clock+0x9/0x10
>> > [ 6760.076209] [<ffffffff8100a200>] ? native_sched_clock+0x20/0x80
>> > [ 6760.076213] [<ffffffff810b3f74>] lock_acquire+0xa4/0x210
>> > [ 6760.076215] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
>> > [ 6760.076217] [<ffffffff810620ae>] flush_work+0x4e/0x2e0
>> > [ 6760.076219] [<ffffffff81062065>] ? flush_work+0x5/0x2e0
>> > [ 6760.076222] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
>> > [ 6760.076226] [<ffffffff8113c8a3>] ? __free_pages_ok.part.57+0x93/0xc0
>> > [ 6760.076229] [<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
>> > [ 6760.076231] [<ffffffff810652f2>] ? __cancel_work_timer+0x82/0x130
>> > [ 6760.076234] [<ffffffff81065305>] __cancel_work_timer+0x95/0x130
>> > [ 6760.076236] [<ffffffff810653b0>] cancel_work_sync+0x10/0x20
>> > [ 6760.076238] [<ffffffff813b8212>] tty_port_destroy+0x12/0x20
>> > [ 6760.076240] [<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
>> > [ 6760.076243] [<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
>> > [ 6760.076246] [<ffffffff810aec41>] ? lock_release_holdtime.part.30+0xa1/0x170
>> > [ 6760.076250] [<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
>> > [ 6760.076254] [<ffffffff812b00f6>] ? inode_has_perm.isra.46.constprop.61+0x56/0x80
>> > [ 6760.076257] [<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
>> > [ 6760.076260] [<ffffffff812b04db>] ? selinux_file_ioctl+0x5b/0x110
>> > [ 6760.076263] [<ffffffff811baad1>] sys_ioctl+0x81/0xa0
>> > [ 6760.076266] [<ffffffff81601d59>] system_call_fastpath+0x16/0x1b
>>
>> Just hit this again, by switching tty's during bootup.
>> The previous time was switching from X to console iirc, so that seems to be
>> the key to triggering this.
>
> Sorry for not getting back to this sooner, merge window fun...
>
> Anyway, Peter, could this be something that your ldisc patches are
> triggering?

Yes, although early analysis also suggests the lockdep splat is possible
regardless, just much less likely.

I looked at this over the weekend, but I wasn't able to determine (in
the limited time I had available) if the splat was valid; ie., if the
cpu states could actually be concurrent and result in deadlock.

I need to study the VT_DISALLOCATE ioctl in more detail which I plan
to do this week.

Regards,
Peter Hurley

2013-05-07 15:04:34

by Dave Jones

[permalink] [raw]
Subject: Re: 3.9+ tty lockdep trace.

On Tue, May 07, 2013 at 10:14:09AM -0400, Peter Hurley wrote:

> >> Just hit this again, by switching tty's during bootup.
> >> The previous time was switching from X to console iirc, so that seems to be
> >> the key to triggering this.
> >
> > Sorry for not getting back to this sooner, merge window fun...
> >
> > Anyway, Peter, could this be something that your ldisc patches are
> > triggering?
>
> Yes, although early analysis also suggests the lockdep splat is possible
> regardless, just much less likely.
>
> I looked at this over the weekend, but I wasn't able to determine (in
> the limited time I had available) if the splat was valid; ie., if the
> cpu states could actually be concurrent and result in deadlock.
>
> I need to study the VT_DISALLOCATE ioctl in more detail which I plan
> to do this week.

I don't know if it proves/disproves anything, but shortly before the dump
gets printed, everything is kind of locked up. I can switch tty's, but
the gettys on them are dead to the world until after the dump.

ISTR a pause for a while when I hit this switching from X too.

Dave

2013-05-07 17:35:40

by Peter Hurley

[permalink] [raw]
Subject: Re: 3.9+ tty lockdep trace.

On 05/07/2013 11:04 AM, Dave Jones wrote:
> On Tue, May 07, 2013 at 10:14:09AM -0400, Peter Hurley wrote:
>
> > >> Just hit this again, by switching tty's during bootup.
> > >> The previous time was switching from X to console iirc, so that seems to be
> > >> the key to triggering this.
> > >
> > > Sorry for not getting back to this sooner, merge window fun...
> > >
> > > Anyway, Peter, could this be something that your ldisc patches are
> > > triggering?
> >
> > Yes, although early analysis also suggests the lockdep splat is possible
> > regardless, just much less likely.
> >
> > I looked at this over the weekend, but I wasn't able to determine (in
> > the limited time I had available) if the splat was valid; ie., if the
> > cpu states could actually be concurrent and result in deadlock.
> >
> > I need to study the VT_DISALLOCATE ioctl in more detail which I plan
> > to do this week.
>
> I don't know if it proves/disproves anything, but shortly before the dump
> gets printed, everything is kind of locked up. I can switch tty's, but
> the gettys on them are dead to the world until after the dump.
>
> ISTR a pause for a while when I hit this switching from X too.

It would help to know the steps to reproduce. For example, you mention
unresponsive gettys but the VT_DISALLOCATE ioctl doesn't work (and isn't
designed to) on open ttys.

Regards,
Peter Hurley

2013-05-07 18:36:43

by Dave Jones

[permalink] [raw]
Subject: Re: 3.9+ tty lockdep trace.

On Tue, May 07, 2013 at 01:35:36PM -0400, Peter Hurley wrote:
> On 05/07/2013 11:04 AM, Dave Jones wrote:
> > On Tue, May 07, 2013 at 10:14:09AM -0400, Peter Hurley wrote:
> >
> > > >> Just hit this again, by switching tty's during bootup.
> > > >> The previous time was switching from X to console iirc, so that seems to be
> > > >> the key to triggering this.
> > > >
> > > > Sorry for not getting back to this sooner, merge window fun...
> > > >
> > > > Anyway, Peter, could this be something that your ldisc patches are
> > > > triggering?
> > >
> > > Yes, although early analysis also suggests the lockdep splat is possible
> > > regardless, just much less likely.
> > >
> > > I looked at this over the weekend, but I wasn't able to determine (in
> > > the limited time I had available) if the splat was valid; ie., if the
> > > cpu states could actually be concurrent and result in deadlock.
> > >
> > > I need to study the VT_DISALLOCATE ioctl in more detail which I plan
> > > to do this week.
> >
> > I don't know if it proves/disproves anything, but shortly before the dump
> > gets printed, everything is kind of locked up. I can switch tty's, but
> > the gettys on them are dead to the world until after the dump.
> >
> > ISTR a pause for a while when I hit this switching from X too.
>
> It would help to know the steps to reproduce. For example, you mention
> unresponsive gettys but the VT_DISALLOCATE ioctl doesn't work (and isn't
> designed to) on open ttys.

Hmm, dunno. I booted up, and tried to log in on tty1 (no X on that box), and after
I entered my password everything just hung for a while. I flipped to tty2
to try and log in as root, but after changing tty, I just got a blinking cursor.
10 seconds or so later, lockdep spew.

Dave

2013-05-08 21:18:06

by Peter Hurley

[permalink] [raw]
Subject: Re: 3.9+ tty lockdep trace.

On 05/07/2013 02:00 PM, Dave Jones wrote:
> On Tue, May 07, 2013 at 01:35:36PM -0400, Peter Hurley wrote:
> > On 05/07/2013 11:04 AM, Dave Jones wrote:
> > > On Tue, May 07, 2013 at 10:14:09AM -0400, Peter Hurley wrote:
> > >
> > > > >> Just hit this again, by switching tty's during bootup.
> > > > >> The previous time was switching from X to console iirc, so that seems to be
> > > > >> the key to triggering this.
> > > > >
> > > > > Sorry for not getting back to this sooner, merge window fun...
> > > > >
> > > > > Anyway, Peter, could this be something that your ldisc patches are
> > > > > triggering?
> > > >
> > > > Yes, although early analysis also suggests the lockdep splat is possible
> > > > regardless, just much less likely.
> > > >
> > > > I looked at this over the weekend, but I wasn't able to determine (in
> > > > the limited time I had available) if the splat was valid; ie., if the
> > > > cpu states could actually be concurrent and result in deadlock.
> > > >
> > > > I need to study the VT_DISALLOCATE ioctl in more detail which I plan
> > > > to do this week.
> > >
> > > I don't know if it proves/disproves anything, but shortly before the dump
> > > gets printed, everything is kind of locked up. I can switch tty's, but
> > > the gettys on them are dead to the world until after the dump.
> > >
> > > ISTR a pause for a while when I hit this switching from X too.
> >
> > It would help to know the steps to reproduce. For example, you mention
> > unresponsive gettys but the VT_DISALLOCATE ioctl doesn't work (and isn't
> > designed to) on open ttys.
>
> Hmm, dunno. I booted up, and tried to log in on tty1 (no X on that box), and after
> I entered my password everything just hung for a while. I flipped to tty2
> to try and log in as root, but after changing tty, I just got a blinking cursor.
> 10 seconds or so later, lockdep spew.

Ok, Dave, thanks for the extra information; that makes sense now.

systemd optionally uses the VT_DISALLOCATE ioctl to free the VT. So when
getty on tty2 died because of some error, systemd exercises the VT_DISALLOCATE
ioctl which generates the lockdep splat.

I'm still trying to figure out if the states can actually overlap; they're
not supposed to, but the VT_DISALLOCATE ioctl is not particularly
defensive and I suspect that overlap is possible.

It's somewhat unfortunate that systemd chose to use a really antiquated
and fundamentally unsafe ioctl like VT_DISALLOCATE. Even when I fix this
lockdep splat, bad things may still be possible.

I'm certain that the lockdep warning has nothing to do with your
login delay; if you want to send me sysrq+t output when that happens again,
I'd be willing to look over it for the underlying problem.

Regards,
Peter Hurley

2013-05-17 16:41:47

by Peter Hurley

[permalink] [raw]
Subject: [PATCH] tty/vt: Fix vc_deallocate() lock order

Now that the tty port owns the flip buffers and i/o is allowed
from the driver even when no tty is attached, the destruction
of the tty port (and the flip buffers) must ensure that no
outstanding work is pending.

Unfortunately, this creates a lock order problem with the
console_lock (see attached lockdep report [1] below).

For single console deallocation, drop the console_lock prior
to port destruction. When multiple console deallocation,
defer port destruction until the consoles have been
deallocated.

tty_port_destroy() is not required if the port has not
been used; remove from vc_allocate() failure path.

[1] lockdep report from Dave Jones <[email protected]>

======================================================
[ INFO: possible circular locking dependency detected ]
3.9.0+ #16 Not tainted
-------------------------------------------------------
(agetty)/26163 is trying to acquire lock:
blocked: ((&buf->work)){+.+...}, instance: ffff88011c8b0020, at: [<ffffffff81062065>] flush_work+0x5/0x2e0

but task is already holding lock:
blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (console_lock){+.+.+.}:
[<ffffffff810b3f74>] lock_acquire+0xa4/0x210
[<ffffffff810416c7>] console_lock+0x77/0x80
[<ffffffff813c3dcd>] con_flush_chars+0x2d/0x50
[<ffffffff813b32b2>] n_tty_receive_buf+0x122/0x14d0
[<ffffffff813b7709>] flush_to_ldisc+0x119/0x170
[<ffffffff81064381>] process_one_work+0x211/0x700
[<ffffffff8106498b>] worker_thread+0x11b/0x3a0
[<ffffffff8106ce5d>] kthread+0xed/0x100
[<ffffffff81601cac>] ret_from_fork+0x7c/0xb0

-> #0 ((&buf->work)){+.+...}:
[<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
[<ffffffff810b3f74>] lock_acquire+0xa4/0x210
[<ffffffff810620ae>] flush_work+0x4e/0x2e0
[<ffffffff81065305>] __cancel_work_timer+0x95/0x130
[<ffffffff810653b0>] cancel_work_sync+0x10/0x20
[<ffffffff813b8212>] tty_port_destroy+0x12/0x20
[<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
[<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
[<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
[<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
[<ffffffff811baad1>] sys_ioctl+0x81/0xa0
[<ffffffff81601d59>] system_call_fastpath+0x16/0x1b

other info that might help us debug this:

[ 6760.076175] Possible unsafe locking scenario:

CPU0 CPU1
---- ----
lock(console_lock);
lock((&buf->work));
lock(console_lock);
lock((&buf->work));

*** DEADLOCK ***

1 lock on stack by (agetty)/26163:
#0: blocked: (console_lock){+.+.+.}, instance: ffffffff81c2fde0, at: [<ffffffff813bc201>] vt_ioctl+0xb61/0x1230
stack backtrace:
Pid: 26163, comm: (agetty) Not tainted 3.9.0+ #16
Call Trace:
[<ffffffff815edb14>] print_circular_bug+0x200/0x20e
[<ffffffff810b349a>] __lock_acquire+0x193a/0x1c00
[<ffffffff8100a269>] ? sched_clock+0x9/0x10
[<ffffffff8100a269>] ? sched_clock+0x9/0x10
[<ffffffff8100a200>] ? native_sched_clock+0x20/0x80
[<ffffffff810b3f74>] lock_acquire+0xa4/0x210
[<ffffffff81062065>] ? flush_work+0x5/0x2e0
[<ffffffff810620ae>] flush_work+0x4e/0x2e0
[<ffffffff81062065>] ? flush_work+0x5/0x2e0
[<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
[<ffffffff8113c8a3>] ? __free_pages_ok.part.57+0x93/0xc0
[<ffffffff810b15db>] ? mark_held_locks+0xbb/0x140
[<ffffffff810652f2>] ? __cancel_work_timer+0x82/0x130
[<ffffffff81065305>] __cancel_work_timer+0x95/0x130
[<ffffffff810653b0>] cancel_work_sync+0x10/0x20
[<ffffffff813b8212>] tty_port_destroy+0x12/0x20
[<ffffffff813c65e8>] vc_deallocate+0xf8/0x110
[<ffffffff813bc20c>] vt_ioctl+0xb6c/0x1230
[<ffffffff810aec41>] ? lock_release_holdtime.part.30+0xa1/0x170
[<ffffffff813b01a5>] tty_ioctl+0x285/0xd50
[<ffffffff812b00f6>] ? inode_has_perm.isra.46.constprop.61+0x56/0x80
[<ffffffff811ba825>] do_vfs_ioctl+0x305/0x530
[<ffffffff812b04db>] ? selinux_file_ioctl+0x5b/0x110
[<ffffffff811baad1>] sys_ioctl+0x81/0xa0
[<ffffffff81601d59>] system_call_fastpath+0x16/0x1b

Cc: Dave Jones <[email protected]>
Signed-off-by: Peter Hurley <[email protected]>
---
drivers/tty/vt/vt.c | 14 +++++-----
drivers/tty/vt/vt_ioctl.c | 67 ++++++++++++++++++++++++++++++++++-------------
include/linux/vt_kern.h | 2 +-
3 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 0829c02..374797b 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -779,7 +779,6 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */
con_set_default_unimap(vc);
vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
if (!vc->vc_screenbuf) {
- tty_port_destroy(&vc->port);
kfree(vc);
vc_cons[currcons].d = NULL;
return -ENOMEM;
@@ -986,26 +985,25 @@ static int vt_resize(struct tty_struct *tty, struct winsize *ws)
return ret;
}

-void vc_deallocate(unsigned int currcons)
+struct vc_data *vc_deallocate(unsigned int currcons)
{
+ struct vc_data *vc = NULL;
+
WARN_CONSOLE_UNLOCKED();

if (vc_cons_allocated(currcons)) {
- struct vc_data *vc = vc_cons[currcons].d;
- struct vt_notifier_param param = { .vc = vc };
+ struct vt_notifier_param param;

+ param.vc = vc = vc_cons[currcons].d;
atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param);
vcs_remove_sysfs(currcons);
vc->vc_sw->con_deinit(vc);
put_pid(vc->vt_pid);
module_put(vc->vc_sw->owner);
kfree(vc->vc_screenbuf);
- if (currcons >= MIN_NR_CONSOLES) {
- tty_port_destroy(&vc->port);
- kfree(vc);
- }
vc_cons[currcons].d = NULL;
}
+ return vc;
}

/*
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 98ff173..fc2c06c 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -283,6 +283,51 @@ do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, int perm, struct vc_
return 0;
}

+/* deallocate a single console, if possible (leave 0) */
+static int vt_disallocate(unsigned int vc_num)
+{
+ struct vc_data *vc = NULL;
+ int ret = 0;
+
+ if (!vc_num)
+ return 0;
+
+ console_lock();
+ if (VT_BUSY(vc_num))
+ ret = -EBUSY;
+ else
+ vc = vc_deallocate(vc_num);
+ console_unlock();
+
+ if (vc && vc_num >= MIN_NR_CONSOLES) {
+ tty_port_destroy(&vc->port);
+ kfree(vc);
+ }
+
+ return ret;
+}
+
+/* deallocate all unused consoles, but leave 0 */
+static void vt_disallocate_all(void)
+{
+ struct vc_data *vc[MAX_NR_CONSOLES];
+ int i;
+
+ console_lock();
+ for (i = 1; i < MAX_NR_CONSOLES; i++)
+ if (!VT_BUSY(i))
+ vc[i] = vc_deallocate(i);
+ else
+ vc[i] = NULL;
+ console_unlock();
+
+ for (i = 1; i < MAX_NR_CONSOLES; i++) {
+ if (vc[i] && i >= MIN_NR_CONSOLES) {
+ tty_port_destroy(&vc[i]->port);
+ kfree(vc[i]);
+ }
+ }
+}


/*
@@ -769,24 +814,10 @@ int vt_ioctl(struct tty_struct *tty,
ret = -ENXIO;
break;
}
- if (arg == 0) {
- /* deallocate all unused consoles, but leave 0 */
- console_lock();
- for (i=1; i<MAX_NR_CONSOLES; i++)
- if (! VT_BUSY(i))
- vc_deallocate(i);
- console_unlock();
- } else {
- /* deallocate a single console, if possible */
- arg--;
- if (VT_BUSY(arg))
- ret = -EBUSY;
- else if (arg) { /* leave 0 */
- console_lock();
- vc_deallocate(arg);
- console_unlock();
- }
- }
+ if (arg == 0)
+ vt_disallocate_all();
+ else
+ ret = vt_disallocate(--arg);
break;

case VT_RESIZE:
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index e8d6571..0d33fca 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -36,7 +36,7 @@ extern int fg_console, last_console, want_console;
int vc_allocate(unsigned int console);
int vc_cons_allocated(unsigned int console);
int vc_resize(struct vc_data *vc, unsigned int cols, unsigned int lines);
-void vc_deallocate(unsigned int console);
+struct vc_data *vc_deallocate(unsigned int console);
void reset_palette(struct vc_data *vc);
void do_blank_screen(int entering_gfx);
void do_unblank_screen(int leaving_gfx);
--
1.8.1.2