Can we safely revert your 8b37e1bef5a6 ("leds: convert blink timer to
workqueue"), or have there been other changes which now depend upon it?
Your commit comment says:
This patch converts the blink timer from led-core to workqueue which is
more suitable for this kind of non-priority operations. Moreover, timer
may lead to errors when a LED setting function use a scheduling function
such as pinctrl which is using mutex.
Which sounds like a good change, except led_blink_set() itself may now
sleep, and at least one established user calls it while holding a lock.
I have CONFIG_DEBUG_ATOMIC_SLEEP=y, plus lockdep: once wireless comes up,
I get the stream of messages below. Reverting 8b37e1bef5a6 works for me,
but perhaps something else would need to be reverted too?
Hugh
BUG: sleeping function called from invalid context at kernel/workqueue.c:2650
in_atomic(): 1, irqs_disabled(): 0, pid: 332, name: wpa_supplicant
7 locks held by wpa_supplicant/332:
#0: (cb_lock){++++++}, at: [<ffffffff814a32b3>] genl_rcv+0x14/0x32
#1: (genl_mutex){+.+.+.}, at: [<ffffffff814a2820>] genl_lock+0x12/0x14
#2: (rtnl_mutex){+.+.+.}, at: [<ffffffff81491a7e>] rtnl_lock+0x12/0x14
#3: (&wdev->mtx){+.+.+.}, at: [<ffffffff81559faf>] nl80211_authenticate+0x20f/0x2ad
#4: (&local->mtx){+.+.+.}, at: [<ffffffff815a0bc5>] ieee80211_prep_connection+0x37a/0xbe9
#5: (&local->chanctx_mtx){+.+.+.}, at: [<ffffffff8159ea88>] ieee80211_vif_use_channel+0x6c/0x21e
#6: (&trig->leddev_list_lock){.+.+..}, at: [<ffffffff815a8653>] tpt_trig_timer+0xd0/0x11b
Preemption disabled at:[<ffffffff815a8653>] tpt_trig_timer+0xd0/0x11b
CPU: 3 PID: 332 Comm: wpa_supplicant Not tainted 3.17.0-rc1 #2
Hardware name: LENOVO 4174EH1/4174EH1, BIOS 8CET51WW (1.31 ) 11/29/2011
0000000000000000 ffff8800b359b5e8 ffffffff815b4eb1 0000000000000000
ffff8800b359b610 ffffffff810a2f46 ffff8800b34051b0 ffff8800b34051d0
0000000ffffffff1 ffff8800b359b6d8 ffffffff8109966f ffffffff81099610
Call Trace:
[<ffffffff815b4eb1>] dump_stack+0x4e/0x7a
[<ffffffff810a2f46>] __might_sleep+0x1fa/0x201
[<ffffffff8109966f>] flush_work+0x5f/0x213
[<ffffffff81099610>] ? mod_delayed_work_on+0x75/0x75
[<ffffffff810bda17>] ? __lock_acquire+0x10ec/0x17e8
[<ffffffff810bc4ec>] ? mark_held_locks+0x50/0x6e
[<ffffffff8109a5b7>] ? __cancel_work_timer+0x9d/0xec
[<ffffffff810bc64c>] ? trace_hardirqs_on_caller+0x142/0x19e
[<ffffffff8109a5c3>] __cancel_work_timer+0xa9/0xec
[<ffffffff8109a621>] cancel_delayed_work_sync+0xe/0x10
[<ffffffff8145a9fb>] led_blink_set+0x1d/0x39
[<ffffffff815a866f>] tpt_trig_timer+0xec/0x11b
[<ffffffff815a8b17>] ieee80211_mod_tpt_led_trig+0x103/0x130
[<ffffffff8158125b>] __ieee80211_recalc_idle+0xcf/0x122
[<ffffffff815814e9>] ieee80211_idle_off+0xe/0x10
[<ffffffff8159c296>] ieee80211_add_chanctx+0x65/0x110
[<ffffffff8159d20c>] ieee80211_new_chanctx+0x6c/0xcb
[<ffffffff8159eb79>] ieee80211_vif_use_channel+0x15d/0x21e
[<ffffffff815a0bd3>] ieee80211_prep_connection+0x388/0xbe9
[<ffffffff815a5c7c>] ieee80211_mgd_auth+0x1db/0x266
[<ffffffff815519b9>] ? cfg80211_get_bss+0x196/0x1b2
[<ffffffff81587868>] ieee80211_auth+0x13/0x15
[<ffffffff81566b61>] cfg80211_mlme_auth+0x123/0x171
[<ffffffff8155a00f>] nl80211_authenticate+0x26f/0x2ad
[<ffffffff814a34c4>] genl_family_rcv_msg+0x1f3/0x254
[<ffffffff814a3560>] genl_rcv_msg+0x3b/0x5c
[<ffffffff814a3525>] ? genl_family_rcv_msg+0x254/0x254
[<ffffffff814a3525>] ? genl_family_rcv_msg+0x254/0x254
[<ffffffff814a25fc>] netlink_rcv_skb+0x3c/0x88
[<ffffffff814a32c2>] genl_rcv+0x23/0x32
[<ffffffff814a203f>] netlink_unicast+0xf4/0x19c
[<ffffffff814a248b>] netlink_sendmsg+0x325/0x37b
[<ffffffff8146cc82>] sock_sendmsg+0x69/0x7a
[<ffffffff81125a20>] ? might_fault+0x9c/0xa1
[<ffffffff811259d7>] ? might_fault+0x53/0xa1
[<ffffffff8147a5e6>] ? verify_iovec+0x64/0xb6
[<ffffffff8146db03>] ___sys_sendmsg+0x1f4/0x272
[<ffffffff81272a7e>] ? debug_smp_processor_id+0x17/0x19
[<ffffffff81177ad9>] ? __fget_light+0xb3/0xda
[<ffffffff8146fa65>] __sys_sendmsg+0x3d/0x5e
[<ffffffff8146fa93>] SyS_sendmsg+0xd/0x19
[<ffffffff815bef52>] system_call_fastpath+0x16/0x1b
wlp3s0: send auth to c0:3f:0e:ad:ff:ee (try 1/3)
wlp3s0: authenticated
wlp3s0: associate with c0:3f:0e:ad:ff:ee (try 1/3)
wlp3s0: RX AssocResp from c0:3f:0e:ad:ff:ee (capab=0x411 status=0 aid=4)
wlp3s0: associated
IPv6: ADDRCONF(NETDEV_CHANGE): wlp3s0: link becomes ready
=================================
[ INFO: inconsistent lock state ]
3.17.0-rc1 #2 Not tainted
---------------------------------
inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
swapper/3/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
((&(&led_cdev->blink_work)->work)){+.?...}, at: [<ffffffff81099610>] flush_work+0x0/0x213
{SOFTIRQ-ON-W} state was registered at:
[<ffffffff810bcf41>] __lock_acquire+0x616/0x17e8
[<ffffffff810be752>] lock_acquire+0x61/0x78
[<ffffffff81099648>] flush_work+0x38/0x213
[<ffffffff8109a5c3>] __cancel_work_timer+0xa9/0xec
[<ffffffff8109a621>] cancel_delayed_work_sync+0xe/0x10
[<ffffffff8145a9fb>] led_blink_set+0x1d/0x39
[<ffffffff815a866f>] tpt_trig_timer+0xec/0x11b
[<ffffffff815a8b17>] ieee80211_mod_tpt_led_trig+0x103/0x130
[<ffffffff8158125b>] __ieee80211_recalc_idle+0xcf/0x122
[<ffffffff815814e9>] ieee80211_idle_off+0xe/0x10
[<ffffffff8159c296>] ieee80211_add_chanctx+0x65/0x110
[<ffffffff8159d20c>] ieee80211_new_chanctx+0x6c/0xcb
[<ffffffff8159eb79>] ieee80211_vif_use_channel+0x15d/0x21e
[<ffffffff815a0bd3>] ieee80211_prep_connection+0x388/0xbe9
[<ffffffff815a5c7c>] ieee80211_mgd_auth+0x1db/0x266
[<ffffffff81587868>] ieee80211_auth+0x13/0x15
[<ffffffff81566b61>] cfg80211_mlme_auth+0x123/0x171
[<ffffffff8155a00f>] nl80211_authenticate+0x26f/0x2ad
[<ffffffff814a34c4>] genl_family_rcv_msg+0x1f3/0x254
[<ffffffff814a3560>] genl_rcv_msg+0x3b/0x5c
[<ffffffff814a25fc>] netlink_rcv_skb+0x3c/0x88
[<ffffffff814a32c2>] genl_rcv+0x23/0x32
[<ffffffff814a203f>] netlink_unicast+0xf4/0x19c
[<ffffffff814a248b>] netlink_sendmsg+0x325/0x37b
[<ffffffff8146cc82>] sock_sendmsg+0x69/0x7a
[<ffffffff8146db03>] ___sys_sendmsg+0x1f4/0x272
[<ffffffff8146fa65>] __sys_sendmsg+0x3d/0x5e
[<ffffffff8146fa93>] SyS_sendmsg+0xd/0x19
[<ffffffff815bef52>] system_call_fastpath+0x16/0x1b
irq event stamp: 45440
hardirqs last enabled at (45440): [<ffffffff8109a5b7>] __cancel_work_timer+0x9d/0xec
hardirqs last disabled at (45439): [<ffffffff810991eb>] try_to_grab_pending+0x21/0x14d
softirqs last enabled at (45432): [<ffffffff81087d06>] _local_bh_enable+0x3e/0x40
softirqs last disabled at (45433): [<ffffffff810886c3>] irq_exit+0x3d/0x92
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock((&(&led_cdev->blink_work)->work));
<Interrupt>
lock((&(&led_cdev->blink_work)->work));
*** DEADLOCK ***
2 locks held by swapper/3/0:
#0: (((&tpt_trig->timer))){+.-...}, at: [<ffffffff810d5c55>] call_timer_fn+0x0/0xd4
#1: (&trig->leddev_list_lock){.+.?..}, at: [<ffffffff815a8653>] tpt_trig_timer+0xd0/0x11b
stack backtrace:
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 3.17.0-rc1 #2
Hardware name: LENOVO 4174EH1/4174EH1, BIOS 8CET51WW (1.31 ) 11/29/2011
0000000000000000 ffff88023e383b98 ffffffff815b4eb1 ffff8802339ac310
ffff88023e383be8 ffffffff815b1351 0000000000000001 0000000000000001
ffff880200000000 ffff8802339acb28 0000000000000004 0000000000000006
Call Trace:
<IRQ> [<ffffffff815b4eb1>] dump_stack+0x4e/0x7a
[<ffffffff815b1351>] print_usage_bug+0x2ac/0x2bd
[<ffffffff810bb609>] ? print_irq_inversion_bug+0x1cc/0x1cc
[<ffffffff810bc256>] mark_lock+0x348/0x58e
[<ffffffff810bceca>] __lock_acquire+0x59f/0x17e8
[<ffffffff810bb6b4>] ? check_usage_forwards+0xab/0xe7
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff810be752>] lock_acquire+0x61/0x78
[<ffffffff81099610>] ? mod_delayed_work_on+0x75/0x75
[<ffffffff81099648>] flush_work+0x38/0x213
[<ffffffff81099610>] ? mod_delayed_work_on+0x75/0x75
[<ffffffff810bda17>] ? __lock_acquire+0x10ec/0x17e8
[<ffffffff810bc161>] ? mark_lock+0x253/0x58e
[<ffffffff810bc4ec>] ? mark_held_locks+0x50/0x6e
[<ffffffff8109a5b7>] ? __cancel_work_timer+0x9d/0xec
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff810bc699>] ? trace_hardirqs_on_caller+0x18f/0x19e
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff8109a5c3>] __cancel_work_timer+0xa9/0xec
[<ffffffff8109a621>] cancel_delayed_work_sync+0xe/0x10
[<ffffffff8145a9fb>] led_blink_set+0x1d/0x39
[<ffffffff815a866f>] tpt_trig_timer+0xec/0x11b
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff810d5cbc>] call_timer_fn+0x67/0xd4
[<ffffffff810d5c55>] ? process_timeout+0xb/0xb
[<ffffffff810d6819>] run_timer_softirq+0x1aa/0x1f2
[<ffffffff810883a9>] __do_softirq+0xfc/0x21f
[<ffffffff810886c3>] irq_exit+0x3d/0x92
[<ffffffff81052fe4>] smp_apic_timer_interrupt+0x3f/0x4b
[<ffffffff815bfe1c>] apic_timer_interrupt+0x6c/0x80
<EOI> [<ffffffff81444829>] ? cpuidle_enter_state+0x44/0xa0
[<ffffffff81444835>] ? cpuidle_enter_state+0x50/0xa0
[<ffffffff81444926>] cpuidle_enter+0x12/0x14
[<ffffffff810b633c>] cpu_startup_entry+0x183/0x23f
[<ffffffff81051860>] start_secondary+0x1b0/0x1b5
BUG: sleeping function called from invalid context at kernel/workqueue.c:2650
in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/3
INFO: lockdep is turned off.
Preemption disabled at:[<ffffffff810b63e8>] cpu_startup_entry+0x22f/0x23f
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 3.17.0-rc1 #2
Hardware name: LENOVO 4174EH1/4174EH1, BIOS 8CET51WW (1.31 ) 11/29/2011
0000000000000000 ffff88023e383cf8 ffffffff815b4eb1 0000000000000000
ffff88023e383d20 ffffffff810a2f46 ffff8800b34051b0 ffff8800b34051d0
0000000ffffffff1 ffff88023e383de8 ffffffff8109966f ffffffff81099610
Call Trace:
<IRQ> [<ffffffff815b4eb1>] dump_stack+0x4e/0x7a
[<ffffffff810a2f46>] __might_sleep+0x1fa/0x201
[<ffffffff8109966f>] flush_work+0x5f/0x213
[<ffffffff81099610>] ? mod_delayed_work_on+0x75/0x75
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff81099206>] ? try_to_grab_pending+0x3c/0x14d
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff8109a5c3>] __cancel_work_timer+0xa9/0xec
[<ffffffff8109a621>] cancel_delayed_work_sync+0xe/0x10
[<ffffffff8145a9fb>] led_blink_set+0x1d/0x39
[<ffffffff815a866f>] tpt_trig_timer+0xec/0x11b
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff810d5cbc>] call_timer_fn+0x67/0xd4
[<ffffffff810d5c55>] ? process_timeout+0xb/0xb
[<ffffffff810d6819>] run_timer_softirq+0x1aa/0x1f2
[<ffffffff810883a9>] __do_softirq+0xfc/0x21f
[<ffffffff810886c3>] irq_exit+0x3d/0x92
[<ffffffff81052fe4>] smp_apic_timer_interrupt+0x3f/0x4b
[<ffffffff815bfe1c>] apic_timer_interrupt+0x6c/0x80
<EOI> [<ffffffff81444829>] ? cpuidle_enter_state+0x44/0xa0
[<ffffffff81444831>] ? cpuidle_enter_state+0x4c/0xa0
[<ffffffff81444835>] ? cpuidle_enter_state+0x50/0xa0
[<ffffffff81444926>] cpuidle_enter+0x12/0x14
[<ffffffff810b633c>] cpu_startup_entry+0x183/0x23f
[<ffffffff81051860>] start_secondary+0x1b0/0x1b5
BUG: sleeping function called from invalid context at kernel/workqueue.c:2650
in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/3
INFO: lockdep is turned off.
Preemption disabled at:[<ffffffff810b63e8>] cpu_startup_entry+0x22f/0x23f
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 3.17.0-rc1 #2
Hardware name: LENOVO 4174EH1/4174EH1, BIOS 8CET51WW (1.31 ) 11/29/2011
0000000000000000 ffff88023e383cf8 ffffffff815b4eb1 0000000000000000
ffff88023e383d20 ffffffff810a2f46 ffff8800b34051b0 ffff8800b34051d0
0000000ffffffff1 ffff88023e383de8 ffffffff8109966f ffffffff81099610
Call Trace:
<IRQ> [<ffffffff815b4eb1>] dump_stack+0x4e/0x7a
[<ffffffff810a2f46>] __might_sleep+0x1fa/0x201
[<ffffffff8109966f>] flush_work+0x5f/0x213
[<ffffffff81099610>] ? mod_delayed_work_on+0x75/0x75
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff81099206>] ? try_to_grab_pending+0x3c/0x14d
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff8109a5c3>] __cancel_work_timer+0xa9/0xec
[<ffffffff8109a621>] cancel_delayed_work_sync+0xe/0x10
[<ffffffff8145a9fb>] led_blink_set+0x1d/0x39
[<ffffffff815a866f>] tpt_trig_timer+0xec/0x11b
[<ffffffff815a8583>] ? __ieee80211_create_tpt_led_trigger+0xf3/0xf3
[<ffffffff810d5cbc>] call_timer_fn+0x67/0xd4
[<ffffffff810d5c55>] ? process_timeout+0xb/0xb
[<ffffffff810d6819>] run_timer_softirq+0x1aa/0x1f2
[<ffffffff810883a9>] __do_softirq+0xfc/0x21f
[<ffffffff810886c3>] irq_exit+0x3d/0x92
[<ffffffff81052fe4>] smp_apic_timer_interrupt+0x3f/0x4b
[<ffffffff815bfe1c>] apic_timer_interrupt+0x6c/0x80
<EOI> [<ffffffff81444829>] ? cpuidle_enter_state+0x44/0xa0
[<ffffffff81444831>] ? cpuidle_enter_state+0x4c/0xa0
[<ffffffff81444835>] ? cpuidle_enter_state+0x50/0xa0
[<ffffffff81444926>] cpuidle_enter+0x12/0x14
[<ffffffff810b633c>] cpu_startup_entry+0x183/0x23f
[<ffffffff81051860>] start_secondary+0x1b0/0x1b5
and another such message every second.
Hello,
Sabrina Dubroca, le Mon 25 Aug 2014 23:13:40 +0200, a ?crit :
> 2014-08-19, 13:06:07 -0400, [email protected] wrote:
> > On Sat, 16 Aug 2014 20:27:01 -0700, Hugh Dickins said:
> > > Can we safely revert your 8b37e1bef5a6 ("leds: convert blink timer to
> > > workqueue"), or have there been other changes which now depend upon it?
> >
> > I suspect there's something else busted. I hand-reverted that patch, and I *still*
> > see the following lockdep whine that looks related (as it talks about
> > leddev_list_lock). next-0811 was OK, looks like next-0815 and -0818 had this....
>
> I had a look at the code, led_trigger_event calls vt_led_set, which
> calls led_trigger_event again.
Yes, that is expected: the vt::* leds actually generate the
corresponding vt-* trigger events, which are used by the various
input*::* leds. We could indeed have a loop if the user was making the
VT::* leds use the vt-* trigger, but otherwise it's safe since it's a
different trigger. We can add code to prevent the user from building
loops, but otherwise it's a false positive.
Samuel
Samuel Thibault, le Mon 25 Aug 2014 23:23:24 +0200, a ?crit :
> We could indeed have a loop if the user was making the VT::* leds use
> the vt-* trigger,
Actually, while there can be a loop, it wouldn't be possible to inject
events in it: a VT::* led only makes the corresponding vt-* trigger if
it got an event from its trigger, etc. So it's really a false positive,
the lock detector just can not know that it can not happen.
Samuel
Hugh,
Here's a patch which must fix your problem. It allows to call led_blink_set()
from on IRQ handler by adding a work to take care of the scheduling function
cancel_delayed_work_sync().
Regards,
Vincent.
Vincent Donnefort (1):
leds: make led_blink_set IRQ safe
drivers/leds/led-class.c | 19 +++++++++++++++++++
drivers/leds/led-core.c | 16 +---------------
include/linux/leds.h | 1 +
3 files changed, 21 insertions(+), 15 deletions(-)
--
1.9.1
On Sat, Aug 23, 2014 at 01:24:56PM -0400, Tejun Heo wrote:
> Hello,
>
> On Fri, Aug 22, 2014 at 05:21:30PM -0700, Bryan Wu wrote:
> > On Tue, Aug 19, 2014 at 6:51 PM, Hugh Dickins <[email protected]> wrote:
> > > On Tue, 19 Aug 2014, Vincent Donnefort wrote:
> > >
> > >> This patch introduces a work which take care of reseting the blink workqueue and
> > >> avoid calling the cancel_delayed_work_sync function which may sleep, from an IRQ
> > >> context.
> > >>
> >
> > Vincent, I'm just wandering can we use cancel_delayed_work() instead
> > of sync version here.
> > cancel_delayed_work() can be called from IRQ context.
>
> But it doesn't wait for the currently running one to finish. It
> should wait, right?
>
It should indeed wait. Using the cancel_delayed_work() function was my first
thought, however as described into the function header, when calling
cancel_delayed_work(), the work may be running and since it re-arms
itself, the next work may still be queued.
> > > May I (most ungratefully!) say that your patch doesn't fill me with
> > > confidence that it's the right solution: adding yet another work_struct
> > > to get around the issue seemed dubious to me, I wonder if it might expose
> > > new races.
> >
> > I agree with Hugh about this new cancel work_struct. But if we revert
> > it back, I saw led_blink_set() will call del_timer_sync() which might
> > also sleep and can't be used in IRQ context. Looks like we can't call
> > led_blink_set() in any IRQ/atomic context.
>
> del_timer_sync() doesn't block but it does run from bh. It naturally
> can't be waited from an IRQ context. It may be sitting on top of a
> running instance.
>
> > > But rest assured that I know nothing about this, and I'm not at all
> > > qualified to review your patch: I hope Bryan and others will do so.
> >
> > Let me invite Tejun to give some advice on how to solve this problem.
> >
> > Tejun, Vincent's commit 8b37e1bef5a6b60e949e28a4db3006e4b00bd758
> > convert a timer into work_struct, but Hugh found it will cause
> > sleeping BUGs [1]. Could you give some opinion about that?
>
> Not knowing the code base, I'm not sure how helpful I can be but if
> something wants to synchronize against another thing which can block,
> that something needs to be able to block too. There's no way around
> it and it holds the same for timers. As long as all the work items
> are properly shut down at the end, there's nothing wrong with using
> multiple of them even when they form a dependency chain.
>
> Heh, I think I need more specific questions to be actually useful. :)
>
> Thanks.
>
> --
> tejun
Hello,
On Fri, Aug 22, 2014 at 05:21:30PM -0700, Bryan Wu wrote:
> On Tue, Aug 19, 2014 at 6:51 PM, Hugh Dickins <[email protected]> wrote:
> > On Tue, 19 Aug 2014, Vincent Donnefort wrote:
> >
> >> This patch introduces a work which take care of reseting the blink workqueue and
> >> avoid calling the cancel_delayed_work_sync function which may sleep, from an IRQ
> >> context.
> >>
>
> Vincent, I'm just wandering can we use cancel_delayed_work() instead
> of sync version here.
> cancel_delayed_work() can be called from IRQ context.
But it doesn't wait for the currently running one to finish. It
should wait, right?
> > May I (most ungratefully!) say that your patch doesn't fill me with
> > confidence that it's the right solution: adding yet another work_struct
> > to get around the issue seemed dubious to me, I wonder if it might expose
> > new races.
>
> I agree with Hugh about this new cancel work_struct. But if we revert
> it back, I saw led_blink_set() will call del_timer_sync() which might
> also sleep and can't be used in IRQ context. Looks like we can't call
> led_blink_set() in any IRQ/atomic context.
del_timer_sync() doesn't block but it does run from bh. It naturally
can't be waited from an IRQ context. It may be sitting on top of a
running instance.
> > But rest assured that I know nothing about this, and I'm not at all
> > qualified to review your patch: I hope Bryan and others will do so.
>
> Let me invite Tejun to give some advice on how to solve this problem.
>
> Tejun, Vincent's commit 8b37e1bef5a6b60e949e28a4db3006e4b00bd758
> convert a timer into work_struct, but Hugh found it will cause
> sleeping BUGs [1]. Could you give some opinion about that?
Not knowing the code base, I'm not sure how helpful I can be but if
something wants to synchronize against another thing which can block,
that something needs to be able to block too. There's no way around
it and it holds the same for timers. As long as all the work items
are properly shut down at the end, there's nothing wrong with using
multiple of them even when they form a dependency chain.
Heh, I think I need more specific questions to be actually useful. :)
Thanks.
--
tejun
On Mon, 25 Aug 2014, Samuel Thibault wrote:
> Samuel Thibault, le Mon 25 Aug 2014 23:23:24 +0200, a ?crit :
> > We could indeed have a loop if the user was making the VT::* leds use
> > the vt-* trigger,
>
> Actually, while there can be a loop, it wouldn't be possible to inject
> events in it: a VT::* led only makes the corresponding vt-* trigger if
> it got an event from its trigger, etc. So it's really a false positive,
> the lock detector just can not know that it can not happen.
I'm not suffering from this lockdep warning myself; but, false positive
or not, it does need to be fixed (or annotated). Because once lockdep
reports one issue, it turns itself off. So any developer who hits this
warning is then unable test their own changes with lockdep afterwards.
Hugh
Hugh Dickins, le Mon 25 Aug 2014 15:00:44 -0700, a ?crit :
> On Mon, 25 Aug 2014, Samuel Thibault wrote:
> > Samuel Thibault, le Mon 25 Aug 2014 23:23:24 +0200, a ?crit :
> > > We could indeed have a loop if the user was making the VT::* leds use
> > > the vt-* trigger,
> >
> > Actually, while there can be a loop, it wouldn't be possible to inject
> > events in it: a VT::* led only makes the corresponding vt-* trigger if
> > it got an event from its trigger, etc. So it's really a false positive,
> > the lock detector just can not know that it can not happen.
>
> I'm not suffering from this lockdep warning myself; but, false positive
> or not, it does need to be fixed (or annotated). Because once lockdep
> reports one issue, it turns itself off. So any developer who hits this
> warning is then unable test their own changes with lockdep afterwards.
Ew. I'll have a look.
Samuel
On Sat, 16 Aug 2014 20:27:01 -0700, Hugh Dickins said:
> Can we safely revert your 8b37e1bef5a6 ("leds: convert blink timer to
> workqueue"), or have there been other changes which now depend upon it?
I suspect there's something else busted. I hand-reverted that patch, and I *still*
see the following lockdep whine that looks related (as it talks about
leddev_list_lock). next-0811 was OK, looks like next-0815 and -0818 had this....
[ 2.473044] iTCO_vendor_support: vendor-support=0
[ 2.473122] device-mapper: uevent: version 1.0.3
[ 2.473145] =============================================
[ 2.473177] [ INFO: possible recursive locking detected ]
[ 2.473204] 3.17.0-rc1-next-20140818-dirty #274 Not tainted
[ 2.473231] ---------------------------------------------
[ 2.473258] kworker/3:0/25 is trying to acquire lock:
[ 2.473283] (&trig->leddev_list_lock){.+.?..}, at: [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
[ 2.473341]
but task is already holding lock:
[ 2.473370] (&trig->leddev_list_lock){.+.?..}, at: [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
[ 2.473425]
other info that might help us debug this:
[ 2.473456] Possible unsafe locking scenario:
[ 2.473485] CPU0
[ 2.473498] ----
[ 2.473511] lock(&trig->leddev_list_lock);
[ 2.473538] lock(&trig->leddev_list_lock);
[ 2.473565]
*** DEADLOCK ***
[ 2.473594] May be due to missing lock nesting notation
[ 2.473626] 11 locks held by kworker/3:0/25:
[ 2.473648] #0: ("events_long"){.+.+.+}, at: [<ffffffff93054594>] process_one_work+0x1e9/0x4ad
[ 2.473704] #1: (serio_event_work){+.+.+.}, at: [<ffffffff93054594>] process_one_work+0x1e9/0x4ad
[ 2.473760] #2: (serio_mutex){+.+.+.}, at: [<ffffffff933dd7f0>] serio_handle_event+0x19/0x19c
[ 2.473816] #3: (&dev->mutex){......}, at: [<ffffffff9332cec3>] __driver_attach+0x2f/0x7e
[ 2.473869] #4: (&dev->mutex){......}, at: [<ffffffff9332cedb>] __driver_attach+0x47/0x7e
[ 2.473922] #5: (&serio->drv_mutex){+.+.+.}, at: [<ffffffff933dca77>] serio_connect_driver+0x21/0x42
[ 2.473980] #6: (input_mutex){+.+.+.}, at: [<ffffffff933e183a>] input_register_device+0x2a9/0x381
[ 2.474036] #7: (vt_led_registered_lock){+.+.+.}, at: [<ffffffff933e3846>] input_led_connect+0x49/0x1cd
[ 2.474095] #8: (triggers_list_lock){++++.+}, at: [<ffffffff9344dd91>] led_trigger_set_default+0x27/0x87
[ 2.474154] #9: (&led_cdev->trigger_lock){+.+.+.}, at: [<ffffffff9344dd99>] led_trigger_set_default+0x2f/0x87
[ 2.474214] #10: (&trig->leddev_list_lock){.+.?..}, at: [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
[ 2.474274]
stack backtrace:
[ 2.474297] CPU: 3 PID: 25 Comm: kworker/3:0 Not tainted 3.17.0-rc1-next-20140818-dirty #274
[ 2.474338] Hardware name: Dell Inc. Latitude E6530/07Y85M, BIOS A15 06/20/2014
[ 2.474374] Workqueue: events_long serio_handle_event
[ 2.474401] 0000000000000000 ffff880224ceb960 ffffffff9368ba02 ffffffff945ff130
[ 2.474447] ffffffff945ff130 ffff880224ceba20 ffffffff9307b730 0000000a00000246
[ 2.474492] 0000000000000000 ffffffff945ff130 ffffffff00000003 23605381dcae248d
[ 2.474538] Call Trace:
[ 2.474554] [<ffffffff9368ba02>] dump_stack+0x51/0xaa
[ 2.474581] [<ffffffff9307b730>] __lock_acquire+0xd22/0xed1
[ 2.474611] [<ffffffff9307b09a>] ? __lock_acquire+0x68c/0xed1
[ 2.474641] [<ffffffff9344da1b>] ? led_trigger_event+0x26/0x69
[ 2.474671] [<ffffffff9307bc64>] lock_acquire+0xdd/0x16a
[ 2.474699] [<ffffffff9307bc64>] ? lock_acquire+0xdd/0x16a
[ 2.474727] [<ffffffff9344da1b>] ? led_trigger_event+0x26/0x69
[ 2.474758] [<ffffffff93696391>] _raw_read_lock+0x30/0x3f
[ 2.474786] [<ffffffff9344da1b>] ? led_trigger_event+0x26/0x69
[ 2.474816] [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
[ 2.474846] [<ffffffff933e3751>] vt_led_set+0x32/0x34
[ 2.474873] [<ffffffff9344d312>] __led_set_brightness+0x1b/0x1d
[ 2.474904] [<ffffffff9344d4a8>] led_set_brightness+0x37/0x39
[ 2.474934] [<ffffffff9344da3d>] led_trigger_event+0x48/0x69
[ 2.474965] [<ffffffff9330b7fb>] kbd_ledstate_trigger_activate+0x47/0x4f
[ 2.474999] [<ffffffff9344dbda>] led_trigger_set+0xfd/0x139
[ 2.475028] [<ffffffff9344ddcc>] led_trigger_set_default+0x62/0x87
[ 2.475061] [<ffffffff9344d87e>] led_classdev_register+0x139/0x144
[ 2.475093] [<ffffffff933e3887>] input_led_connect+0x8a/0x1cd
[ 2.475123] [<ffffffff933e1901>] input_register_device+0x370/0x381
[ 2.475154] [<ffffffff933e848f>] atkbd_connect+0x216/0x269
[ 2.475182] [<ffffffff933dca82>] serio_connect_driver+0x2c/0x42
[ 2.475213] [<ffffffff933dcab3>] serio_driver_probe+0x1b/0x1d
[ 2.476508] [<ffffffff9332cd33>] driver_probe_device+0xda/0x203
[ 2.477797] [<ffffffff9332cef0>] __driver_attach+0x5c/0x7e
[ 2.479081] [<ffffffff9332ce94>] ? __device_attach+0x38/0x38
[ 2.480328] [<ffffffff9332b3bb>] bus_for_each_dev+0x6a/0x82
[ 2.481604] [<ffffffff9332c87d>] driver_attach+0x19/0x1b
[ 2.482874] [<ffffffff933dd92d>] serio_handle_event+0x156/0x19c
[ 2.483387] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
[ 2.485373] [<ffffffff93054636>] process_one_work+0x28b/0x4ad
[ 2.485976] ata1.00: ACPI cmd ef/10:06:00:00:00:00 (SET FEATURES) succeeded
[ 2.485979] ata1.00: ACPI cmd f5/00:00:00:00:00:00 (SECURITY FREEZE LOCK) filtered out
[ 2.485982] ata1.00: ACPI cmd b1/c1:00:00:00:00:00 (DEVICE CONFIGURATION OVERLAY) filtered out
[ 2.486523] ata1.00: ACPI cmd 00/00:00:00:00:00:a0 (NOP) rejected by device (Stat=0x51 Err=0x04)
[ 2.489121] ata1.00: ATA-8: ST500LX003-1AC15G, DEM4, max UDMA/133
[ 2.489123] ata1.00: 976773168 sectors, multi 16: LBA48 NCQ (depth 31/32)
[ 2.492676] ata1.00: ACPI cmd ef/10:06:00:00:00:00 (SET FEATURES) succeeded
[ 2.492678] ata1.00: ACPI cmd f5/00:00:00:00:00:00 (SECURITY FREEZE LOCK) filtered out
[ 2.492681] ata1.00: ACPI cmd b1/c1:00:00:00:00:00 (DEVICE CONFIGURATION OVERLAY) filtered out
[ 2.493213] ata1.00: ACPI cmd 00/00:00:00:00:00:a0 (NOP) rejected by device (Stat=0x51 Err=0x04)
[ 2.495797] ata1.00: configured for UDMA/133
[ 2.497541] scsi 0:0:0:0: Direct-Access ATA ST500LX003-1AC15 DEM4 PQ: 0 ANSI: 5
[ 2.498023] sd 0:0:0:0: [sda] 976773168 512-byte logical blocks: (500 GB/465 GiB)
[ 2.498026] sd 0:0:0:0: [sda] 4096-byte physical blocks
[ 2.498169] sd 0:0:0:0: [sda] Write Protect is off
[ 2.498172] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
[ 2.498231] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
[ 2.505326] sda: sda1 sda2 sda3
[ 2.506172] sd 0:0:0:0: [sda] Attached SCSI disk
[ 2.511495] [<ffffffff93054bd3>] worker_thread+0x351/0x46a
[ 2.512855] [<ffffffff93054882>] ? process_scheduled_works+0x2a/0x2a
[ 2.514195] [<ffffffff9305967e>] kthread+0xd6/0xde
[ 2.515521] [<ffffffff930595a8>] ? __kthread_parkme+0x62/0x62
[ 2.516814] [<ffffffff93696dac>] ret_from_fork+0x7c/0xb0
[ 2.518095] [<ffffffff930595a8>] ? __kthread_parkme+0x62/0x62
[ 2.519527] device-mapper: ioctl: 4.27.0-ioctl (2013-10-30) initialised: [email protected]
[ 2.520836] Intel P-state driver initializing.
(Looks like the lockdep output got intermixed with ata spinning up my disk)
This patch introduces a work which take care of reseting the blink workqueue and
avoid calling the cancel_delayed_work_sync function which may sleep, from an IRQ
context.
Signed-off-by: Vincent Donnefort <[email protected]>
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 129729d..0971554 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -148,6 +148,24 @@ static void led_work_function(struct work_struct *ws)
msecs_to_jiffies(delay));
}
+static void reset_blink_work_delayed(struct work_struct *ws)
+{
+ struct led_classdev *led_cdev =
+ container_of(ws, struct led_classdev, reset_blink_work);
+
+ cancel_delayed_work_sync(&led_cdev->blink_work);
+
+ if (!led_cdev->blink_delay_on) {
+ __led_set_brightness(led_cdev, LED_OFF);
+ return;
+ } else if (!led_cdev->blink_delay_off) {
+ __led_set_brightness(led_cdev, led_cdev->blink_brightness);
+ return;
+ }
+
+ queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
+}
+
static void set_brightness_delayed(struct work_struct *ws)
{
struct led_classdev *led_cdev =
@@ -234,6 +252,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);
INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function);
+ INIT_WORK(&led_cdev->reset_blink_work, reset_blink_work_delayed);
#ifdef CONFIG_LEDS_TRIGGERS
led_trigger_set_default(led_cdev);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 4bb1168..959510a 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -40,19 +40,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
led_cdev->blink_delay_on = delay_on;
led_cdev->blink_delay_off = delay_off;
- /* never on - just set to off */
- if (!delay_on) {
- __led_set_brightness(led_cdev, LED_OFF);
- return;
- }
-
- /* never off - just set to brightness */
- if (!delay_off) {
- __led_set_brightness(led_cdev, led_cdev->blink_brightness);
- return;
- }
-
- queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
+ schedule_work(&led_cdev->reset_blink_work);
}
@@ -76,8 +64,6 @@ void led_blink_set(struct led_classdev *led_cdev,
unsigned long *delay_on,
unsigned long *delay_off)
{
- cancel_delayed_work_sync(&led_cdev->blink_work);
-
led_cdev->flags &= ~LED_BLINK_ONESHOT;
led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6a599dc..6e5523d 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -69,6 +69,7 @@ struct led_classdev {
unsigned long blink_delay_on, blink_delay_off;
struct delayed_work blink_work;
+ struct work_struct reset_blink_work;
int blink_brightness;
struct work_struct set_brightness_work;
--
1.9.1
2014-08-19, 13:06:07 -0400, [email protected] wrote:
> On Sat, 16 Aug 2014 20:27:01 -0700, Hugh Dickins said:
> > Can we safely revert your 8b37e1bef5a6 ("leds: convert blink timer to
> > workqueue"), or have there been other changes which now depend upon it?
>
> I suspect there's something else busted. I hand-reverted that patch, and I *still*
> see the following lockdep whine that looks related (as it talks about
> leddev_list_lock). next-0811 was OK, looks like next-0815 and -0818 had this....
I've seen this one in linux-next too. It seems to be caused by
946a4abbcfac ("input: route kbd LEDs through the generic LEDs layer")
I had a look at the code, led_trigger_event calls vt_led_set, which
calls led_trigger_event again.
CC'ing Samuel Thibault, the author of the commit.
>
> [ 2.473044] iTCO_vendor_support: vendor-support=0
> [ 2.473122] device-mapper: uevent: version 1.0.3
>
> [ 2.473145] =============================================
> [ 2.473177] [ INFO: possible recursive locking detected ]
> [ 2.473204] 3.17.0-rc1-next-20140818-dirty #274 Not tainted
> [ 2.473231] ---------------------------------------------
> [ 2.473258] kworker/3:0/25 is trying to acquire lock:
> [ 2.473283] (&trig->leddev_list_lock){.+.?..}, at: [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
> [ 2.473341]
> but task is already holding lock:
> [ 2.473370] (&trig->leddev_list_lock){.+.?..}, at: [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
> [ 2.473425]
> other info that might help us debug this:
> [ 2.473456] Possible unsafe locking scenario:
>
> [ 2.473485] CPU0
> [ 2.473498] ----
> [ 2.473511] lock(&trig->leddev_list_lock);
> [ 2.473538] lock(&trig->leddev_list_lock);
> [ 2.473565]
> *** DEADLOCK ***
>
> [ 2.473594] May be due to missing lock nesting notation
>
> [ 2.473626] 11 locks held by kworker/3:0/25:
> [ 2.473648] #0: ("events_long"){.+.+.+}, at: [<ffffffff93054594>] process_one_work+0x1e9/0x4ad
> [ 2.473704] #1: (serio_event_work){+.+.+.}, at: [<ffffffff93054594>] process_one_work+0x1e9/0x4ad
> [ 2.473760] #2: (serio_mutex){+.+.+.}, at: [<ffffffff933dd7f0>] serio_handle_event+0x19/0x19c
> [ 2.473816] #3: (&dev->mutex){......}, at: [<ffffffff9332cec3>] __driver_attach+0x2f/0x7e
> [ 2.473869] #4: (&dev->mutex){......}, at: [<ffffffff9332cedb>] __driver_attach+0x47/0x7e
> [ 2.473922] #5: (&serio->drv_mutex){+.+.+.}, at: [<ffffffff933dca77>] serio_connect_driver+0x21/0x42
> [ 2.473980] #6: (input_mutex){+.+.+.}, at: [<ffffffff933e183a>] input_register_device+0x2a9/0x381
> [ 2.474036] #7: (vt_led_registered_lock){+.+.+.}, at: [<ffffffff933e3846>] input_led_connect+0x49/0x1cd
> [ 2.474095] #8: (triggers_list_lock){++++.+}, at: [<ffffffff9344dd91>] led_trigger_set_default+0x27/0x87
> [ 2.474154] #9: (&led_cdev->trigger_lock){+.+.+.}, at: [<ffffffff9344dd99>] led_trigger_set_default+0x2f/0x87
> [ 2.474214] #10: (&trig->leddev_list_lock){.+.?..}, at: [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
> [ 2.474274]
> stack backtrace:
> [ 2.474297] CPU: 3 PID: 25 Comm: kworker/3:0 Not tainted 3.17.0-rc1-next-20140818-dirty #274
> [ 2.474338] Hardware name: Dell Inc. Latitude E6530/07Y85M, BIOS A15 06/20/2014
> [ 2.474374] Workqueue: events_long serio_handle_event
> [ 2.474401] 0000000000000000 ffff880224ceb960 ffffffff9368ba02 ffffffff945ff130
> [ 2.474447] ffffffff945ff130 ffff880224ceba20 ffffffff9307b730 0000000a00000246
> [ 2.474492] 0000000000000000 ffffffff945ff130 ffffffff00000003 23605381dcae248d
> [ 2.474538] Call Trace:
> [ 2.474554] [<ffffffff9368ba02>] dump_stack+0x51/0xaa
> [ 2.474581] [<ffffffff9307b730>] __lock_acquire+0xd22/0xed1
> [ 2.474611] [<ffffffff9307b09a>] ? __lock_acquire+0x68c/0xed1
> [ 2.474641] [<ffffffff9344da1b>] ? led_trigger_event+0x26/0x69
> [ 2.474671] [<ffffffff9307bc64>] lock_acquire+0xdd/0x16a
> [ 2.474699] [<ffffffff9307bc64>] ? lock_acquire+0xdd/0x16a
> [ 2.474727] [<ffffffff9344da1b>] ? led_trigger_event+0x26/0x69
> [ 2.474758] [<ffffffff93696391>] _raw_read_lock+0x30/0x3f
> [ 2.474786] [<ffffffff9344da1b>] ? led_trigger_event+0x26/0x69
> [ 2.474816] [<ffffffff9344da1b>] led_trigger_event+0x26/0x69
> [ 2.474846] [<ffffffff933e3751>] vt_led_set+0x32/0x34
> [ 2.474873] [<ffffffff9344d312>] __led_set_brightness+0x1b/0x1d
> [ 2.474904] [<ffffffff9344d4a8>] led_set_brightness+0x37/0x39
> [ 2.474934] [<ffffffff9344da3d>] led_trigger_event+0x48/0x69
> [ 2.474965] [<ffffffff9330b7fb>] kbd_ledstate_trigger_activate+0x47/0x4f
> [ 2.474999] [<ffffffff9344dbda>] led_trigger_set+0xfd/0x139
> [ 2.475028] [<ffffffff9344ddcc>] led_trigger_set_default+0x62/0x87
> [ 2.475061] [<ffffffff9344d87e>] led_classdev_register+0x139/0x144
> [ 2.475093] [<ffffffff933e3887>] input_led_connect+0x8a/0x1cd
> [ 2.475123] [<ffffffff933e1901>] input_register_device+0x370/0x381
> [ 2.475154] [<ffffffff933e848f>] atkbd_connect+0x216/0x269
> [ 2.475182] [<ffffffff933dca82>] serio_connect_driver+0x2c/0x42
> [ 2.475213] [<ffffffff933dcab3>] serio_driver_probe+0x1b/0x1d
> [ 2.476508] [<ffffffff9332cd33>] driver_probe_device+0xda/0x203
> [ 2.477797] [<ffffffff9332cef0>] __driver_attach+0x5c/0x7e
> [ 2.479081] [<ffffffff9332ce94>] ? __device_attach+0x38/0x38
> [ 2.480328] [<ffffffff9332b3bb>] bus_for_each_dev+0x6a/0x82
> [ 2.481604] [<ffffffff9332c87d>] driver_attach+0x19/0x1b
> [ 2.482874] [<ffffffff933dd92d>] serio_handle_event+0x156/0x19c
> [ 2.483387] ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
> [ 2.485373] [<ffffffff93054636>] process_one_work+0x28b/0x4ad
> [ 2.485976] ata1.00: ACPI cmd ef/10:06:00:00:00:00 (SET FEATURES) succeeded
> [ 2.485979] ata1.00: ACPI cmd f5/00:00:00:00:00:00 (SECURITY FREEZE LOCK) filtered out
> [ 2.485982] ata1.00: ACPI cmd b1/c1:00:00:00:00:00 (DEVICE CONFIGURATION OVERLAY) filtered out
> [ 2.486523] ata1.00: ACPI cmd 00/00:00:00:00:00:a0 (NOP) rejected by device (Stat=0x51 Err=0x04)
> [ 2.489121] ata1.00: ATA-8: ST500LX003-1AC15G, DEM4, max UDMA/133
> [ 2.489123] ata1.00: 976773168 sectors, multi 16: LBA48 NCQ (depth 31/32)
> [ 2.492676] ata1.00: ACPI cmd ef/10:06:00:00:00:00 (SET FEATURES) succeeded
> [ 2.492678] ata1.00: ACPI cmd f5/00:00:00:00:00:00 (SECURITY FREEZE LOCK) filtered out
> [ 2.492681] ata1.00: ACPI cmd b1/c1:00:00:00:00:00 (DEVICE CONFIGURATION OVERLAY) filtered out
> [ 2.493213] ata1.00: ACPI cmd 00/00:00:00:00:00:a0 (NOP) rejected by device (Stat=0x51 Err=0x04)
> [ 2.495797] ata1.00: configured for UDMA/133
> [ 2.497541] scsi 0:0:0:0: Direct-Access ATA ST500LX003-1AC15 DEM4 PQ: 0 ANSI: 5
> [ 2.498023] sd 0:0:0:0: [sda] 976773168 512-byte logical blocks: (500 GB/465 GiB)
> [ 2.498026] sd 0:0:0:0: [sda] 4096-byte physical blocks
> [ 2.498169] sd 0:0:0:0: [sda] Write Protect is off
> [ 2.498172] sd 0:0:0:0: [sda] Mode Sense: 00 3a 00 00
> [ 2.498231] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
> [ 2.505326] sda: sda1 sda2 sda3
> [ 2.506172] sd 0:0:0:0: [sda] Attached SCSI disk
> [ 2.511495] [<ffffffff93054bd3>] worker_thread+0x351/0x46a
> [ 2.512855] [<ffffffff93054882>] ? process_scheduled_works+0x2a/0x2a
> [ 2.514195] [<ffffffff9305967e>] kthread+0xd6/0xde
> [ 2.515521] [<ffffffff930595a8>] ? __kthread_parkme+0x62/0x62
> [ 2.516814] [<ffffffff93696dac>] ret_from_fork+0x7c/0xb0
> [ 2.518095] [<ffffffff930595a8>] ? __kthread_parkme+0x62/0x62
> [ 2.519527] device-mapper: ioctl: 4.27.0-ioctl (2013-10-30) initialised: [email protected]
> [ 2.520836] Intel P-state driver initializing.
>
> (Looks like the lockdep output got intermixed with ata spinning up my disk)
--
Sabrina
On Tue, Aug 19, 2014 at 6:51 PM, Hugh Dickins <[email protected]> wrote:
> On Tue, 19 Aug 2014, Vincent Donnefort wrote:
>
>> This patch introduces a work which take care of reseting the blink workqueue and
>> avoid calling the cancel_delayed_work_sync function which may sleep, from an IRQ
>> context.
>>
Vincent, I'm just wandering can we use cancel_delayed_work() instead
of sync version here.
cancel_delayed_work() can be called from IRQ context.
>> Signed-off-by: Vincent Donnefort <[email protected]>
>
> Thanks. It does work for me. Though the problem was more general than
> stated above: not just a problem in IRQ context, but in any atomic context.
>
> I don't suppose it has any effect on Valdis's lockdep issue, which I
> didn't get to see myself; but we should let Valdis report back on that.
>
> May I (most ungratefully!) say that your patch doesn't fill me with
> confidence that it's the right solution: adding yet another work_struct
> to get around the issue seemed dubious to me, I wonder if it might expose
> new races.
>
I agree with Hugh about this new cancel work_struct. But if we revert
it back, I saw led_blink_set() will call del_timer_sync() which might
also sleep and can't be used in IRQ context. Looks like we can't call
led_blink_set() in any IRQ/atomic context.
> But rest assured that I know nothing about this, and I'm not at all
> qualified to review your patch: I hope Bryan and others will do so.
>
Let me invite Tejun to give some advice on how to solve this problem.
Tejun, Vincent's commit 8b37e1bef5a6b60e949e28a4db3006e4b00bd758
convert a timer into work_struct, but Hugh found it will cause
sleeping BUGs [1]. Could you give some opinion about that?
Thanks,
-Bryan
[1]: https://lkml.org/lkml/2014/8/16/128
>
>>
>> diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
>> index 129729d..0971554 100644
>> --- a/drivers/leds/led-class.c
>> +++ b/drivers/leds/led-class.c
>> @@ -148,6 +148,24 @@ static void led_work_function(struct work_struct *ws)
>> msecs_to_jiffies(delay));
>> }
>>
>> +static void reset_blink_work_delayed(struct work_struct *ws)
>> +{
>> + struct led_classdev *led_cdev =
>> + container_of(ws, struct led_classdev, reset_blink_work);
>> +
>> + cancel_delayed_work_sync(&led_cdev->blink_work);
>> +
>> + if (!led_cdev->blink_delay_on) {
>> + __led_set_brightness(led_cdev, LED_OFF);
>> + return;
>> + } else if (!led_cdev->blink_delay_off) {
>> + __led_set_brightness(led_cdev, led_cdev->blink_brightness);
>> + return;
>> + }
>> +
>> + queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
>> +}
>> +
>> static void set_brightness_delayed(struct work_struct *ws)
>> {
>> struct led_classdev *led_cdev =
>> @@ -234,6 +252,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
>> INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);
>>
>> INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function);
>> + INIT_WORK(&led_cdev->reset_blink_work, reset_blink_work_delayed);
>>
>> #ifdef CONFIG_LEDS_TRIGGERS
>> led_trigger_set_default(led_cdev);
>> diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
>> index 4bb1168..959510a 100644
>> --- a/drivers/leds/led-core.c
>> +++ b/drivers/leds/led-core.c
>> @@ -40,19 +40,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
>> led_cdev->blink_delay_on = delay_on;
>> led_cdev->blink_delay_off = delay_off;
>>
>> - /* never on - just set to off */
>> - if (!delay_on) {
>> - __led_set_brightness(led_cdev, LED_OFF);
>> - return;
>> - }
>> -
>> - /* never off - just set to brightness */
>> - if (!delay_off) {
>> - __led_set_brightness(led_cdev, led_cdev->blink_brightness);
>> - return;
>> - }
>> -
>> - queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
>> + schedule_work(&led_cdev->reset_blink_work);
>> }
>>
>>
>> @@ -76,8 +64,6 @@ void led_blink_set(struct led_classdev *led_cdev,
>> unsigned long *delay_on,
>> unsigned long *delay_off)
>> {
>> - cancel_delayed_work_sync(&led_cdev->blink_work);
>> -
>> led_cdev->flags &= ~LED_BLINK_ONESHOT;
>> led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
>>
>> diff --git a/include/linux/leds.h b/include/linux/leds.h
>> index 6a599dc..6e5523d 100644
>> --- a/include/linux/leds.h
>> +++ b/include/linux/leds.h
>> @@ -69,6 +69,7 @@ struct led_classdev {
>>
>> unsigned long blink_delay_on, blink_delay_off;
>> struct delayed_work blink_work;
>> + struct work_struct reset_blink_work;
>> int blink_brightness;
>>
>> struct work_struct set_brightness_work;
>> --
>> 1.9.1
>>
>>
On Tue, 19 Aug 2014, Vincent Donnefort wrote:
> This patch introduces a work which take care of reseting the blink workqueue and
> avoid calling the cancel_delayed_work_sync function which may sleep, from an IRQ
> context.
>
> Signed-off-by: Vincent Donnefort <[email protected]>
Thanks. It does work for me. Though the problem was more general than
stated above: not just a problem in IRQ context, but in any atomic context.
I don't suppose it has any effect on Valdis's lockdep issue, which I
didn't get to see myself; but we should let Valdis report back on that.
May I (most ungratefully!) say that your patch doesn't fill me with
confidence that it's the right solution: adding yet another work_struct
to get around the issue seemed dubious to me, I wonder if it might expose
new races.
But rest assured that I know nothing about this, and I'm not at all
qualified to review your patch: I hope Bryan and others will do so.
Thanks,
Hugh
>
> diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
> index 129729d..0971554 100644
> --- a/drivers/leds/led-class.c
> +++ b/drivers/leds/led-class.c
> @@ -148,6 +148,24 @@ static void led_work_function(struct work_struct *ws)
> msecs_to_jiffies(delay));
> }
>
> +static void reset_blink_work_delayed(struct work_struct *ws)
> +{
> + struct led_classdev *led_cdev =
> + container_of(ws, struct led_classdev, reset_blink_work);
> +
> + cancel_delayed_work_sync(&led_cdev->blink_work);
> +
> + if (!led_cdev->blink_delay_on) {
> + __led_set_brightness(led_cdev, LED_OFF);
> + return;
> + } else if (!led_cdev->blink_delay_off) {
> + __led_set_brightness(led_cdev, led_cdev->blink_brightness);
> + return;
> + }
> +
> + queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
> +}
> +
> static void set_brightness_delayed(struct work_struct *ws)
> {
> struct led_classdev *led_cdev =
> @@ -234,6 +252,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
> INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);
>
> INIT_DELAYED_WORK(&led_cdev->blink_work, led_work_function);
> + INIT_WORK(&led_cdev->reset_blink_work, reset_blink_work_delayed);
>
> #ifdef CONFIG_LEDS_TRIGGERS
> led_trigger_set_default(led_cdev);
> diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
> index 4bb1168..959510a 100644
> --- a/drivers/leds/led-core.c
> +++ b/drivers/leds/led-core.c
> @@ -40,19 +40,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
> led_cdev->blink_delay_on = delay_on;
> led_cdev->blink_delay_off = delay_off;
>
> - /* never on - just set to off */
> - if (!delay_on) {
> - __led_set_brightness(led_cdev, LED_OFF);
> - return;
> - }
> -
> - /* never off - just set to brightness */
> - if (!delay_off) {
> - __led_set_brightness(led_cdev, led_cdev->blink_brightness);
> - return;
> - }
> -
> - queue_delayed_work(system_wq, &led_cdev->blink_work, 1);
> + schedule_work(&led_cdev->reset_blink_work);
> }
>
>
> @@ -76,8 +64,6 @@ void led_blink_set(struct led_classdev *led_cdev,
> unsigned long *delay_on,
> unsigned long *delay_off)
> {
> - cancel_delayed_work_sync(&led_cdev->blink_work);
> -
> led_cdev->flags &= ~LED_BLINK_ONESHOT;
> led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
>
> diff --git a/include/linux/leds.h b/include/linux/leds.h
> index 6a599dc..6e5523d 100644
> --- a/include/linux/leds.h
> +++ b/include/linux/leds.h
> @@ -69,6 +69,7 @@ struct led_classdev {
>
> unsigned long blink_delay_on, blink_delay_off;
> struct delayed_work blink_work;
> + struct work_struct reset_blink_work;
> int blink_brightness;
>
> struct work_struct set_brightness_work;
> --
> 1.9.1
>
>
Ok I missed that and it did not show up with my searches. I also tested your patch on top of next-20140922 and it seems to work.
-Kari
Kari Suvanto, le Mon 22 Sep 2014 10:30:32 +0300, a ?crit :
> >Ew. I'll have a look.
>
> Any update on this one?
I have already sent it, yes, on Tue, 26 Aug 2014 11:17:25 +0200, here it
is again.
Samuel
Subject: [PATCHv2][input-led] Defer input led work to workqueue
When the kbd changes its led state (e.g. caps lock), this triggers
(led_trigger_event) the kbd-capsl trigger, which is by default
used by the vt::capsl LED, which triggers (led_trigger_event) the
vt-capsl trigger. These two nested led_trigger_event calls take a
trig->leddev_list_lock lock and thus lockdep complains.
Actually the user can make the vt::capsl LED use its own vt-capsl
trigger and thus build a loop. This produces an immediate oops.
This changeset defers the second led_trigger_event call into a
workqueue, which avoids the nested locking altogether. This does
not prevent the user from shooting himself in the foot by creating a
vt::capsl <-> vt-capsl loop, but the only consequence is the workqueue
threads eating some CPU until the user breaks the loop, which is not too
bad.
Signed-off-by: Samuel Thibault <[email protected]>
---
Difference with v1: uses schedule_work instead of its own workqueue.
--- a/drivers/input/leds.c
+++ b/drivers/input/leds.c
@@ -100,13 +100,24 @@ static unsigned long vt_led_registered[B
/* Number of input devices having each LED */
static int vt_led_references[LED_CNT];
+static int vt_led_state[LED_CNT];
+static struct work_struct vt_led_work[LED_CNT];
+
+static void vt_led_cb(struct work_struct *work)
+{
+ int led = work - vt_led_work;
+
+ led_trigger_event(&vt_led_triggers[led], vt_led_state[led]);
+}
+
/* VT LED state change, tell the VT trigger. */
static void vt_led_set(struct led_classdev *cdev,
enum led_brightness brightness)
{
int led = cdev - vt_leds;
- led_trigger_event(&vt_led_triggers[led], !!brightness);
+ vt_led_state[led] = !!brightness;
+ schedule_work(&vt_led_work[led]);
}
/* LED state change for some keyboard, notify that keyboard. */
@@ -244,6 +255,22 @@ void input_led_disconnect(struct input_d
mutex_unlock(&vt_led_registered_lock);
}
+static int __init input_led_init(void)
+{
+ unsigned i;
+
+ for (i = 0; i < LED_CNT; i++)
+ INIT_WORK(&vt_led_work[i], vt_led_cb);
+
+ return 0;
+}
+
+static void __exit input_led_exit(void)
+{
+}
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("User LED support for input layer");
MODULE_AUTHOR("Samuel Thibault <[email protected]>");
+module_init(input_led_init);
+module_exit(input_led_exit);
>Ew. I'll have a look.
sorry, sending again, first one had html in it..
Any update on this one?
I'm seeing this in every boot. I patched this by changing led_trigger_register and led_trigger_register_simple as macros which creates a static lock_class_key like this:
-extern int led_trigger_register(struct led_trigger *trigger);
+
+extern int __led_trigger_register_key(struct led_trigger *trigger,
+ struct lock_class_key *key);
+#ifdef CONFIG_LOCKDEP
+#define led_trigger_register(trigger) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ __led_trigger_register_key(trigger, &__key); \
+})
+#else
+#define led_trigger_register(trigger) \
+ __led_trigger_register_key(trigger, NULL)
+#endif
+
But should every trigger has own key? With my patch all the triggers from vt/keyboard.c are using the same key. Is that a problem? I have not seen any problems with that patch and after that I can see this https://lkml.org/lkml/2014/9/5/462.
-Kari