2007-11-27 16:04:00

by Larry Finger

[permalink] [raw]
Subject: [RFC/T] b43: Fix Radio On/Off LED action

Since addition of the rfkill callback, the LED associated with the off
switch on the radio has not worked because essential data in the rfkill
structure was missing. This hack adds the necessary data and places direct
calls to turn the leds on/off.

Signed-off-by: Larry Finger <[email protected]>
---

Index: wireless-2.6/drivers/net/wireless/b43/rfkill.c
===================================================================
--- wireless-2.6.orig/drivers/net/wireless/b43/rfkill.c
+++ wireless-2.6/drivers/net/wireless/b43/rfkill.c
@@ -23,6 +23,7 @@
*/

#include "rfkill.h"
+#include "leds.h"
#include "b43.h"


@@ -57,6 +58,10 @@ static void b43_rfkill_poll(struct input
report_change = 1;
b43info(wl, "Radio hardware status changed to %s\n",
enabled ? "ENABLED" : "DISABLED");
+ if (enabled)
+ b43_led_turn_on(dev, 1, 1);
+ else
+ b43_led_turn_off(dev, 1, 1);
}
mutex_unlock(&wl->mutex);

@@ -70,11 +75,13 @@ static int b43_rfkill_soft_toggle(void *
struct b43_wldev *dev = data;
struct b43_wl *wl = dev->wl;
int err = 0;
+ int lock = mutex_is_locked(&wl->mutex);

if (!wl->rfkill.registered)
return 0;

- mutex_lock(&wl->mutex);
+ if (!lock)
+ mutex_lock(&wl->mutex);
B43_WARN_ON(b43_status(dev) < B43_STAT_INITIALIZED);
switch (state) {
case RFKILL_STATE_ON:
@@ -93,7 +100,8 @@ static int b43_rfkill_soft_toggle(void *
break;
}
out_unlock:
- mutex_unlock(&wl->mutex);
+ if (!lock)
+ mutex_unlock(&wl->mutex);

return err;
}
@@ -133,6 +141,12 @@ void b43_rfkill_init(struct b43_wldev *d
rfk->poll_dev->poll = b43_rfkill_poll;
rfk->poll_dev->poll_interval = 1000; /* msecs */

+ rfk->poll_dev->input->name = rfk->name;
+ rfk->poll_dev->input->id.bustype = BUS_HOST;
+ rfk->poll_dev->input->id.vendor = dev->dev->bus->boardinfo.vendor;
+ rfk->poll_dev->input->evbit[0] = BIT(EV_KEY);
+ set_bit(KEY_WLAN, rfk->poll_dev->input->keybit);
+
err = rfkill_register(rfk->rfkill);
if (err)
goto err_free_polldev;
Index: wireless-2.6/drivers/net/wireless/b43/leds.c
===================================================================
--- wireless-2.6.orig/drivers/net/wireless/b43/leds.c
+++ wireless-2.6/drivers/net/wireless/b43/leds.c
@@ -30,7 +30,7 @@
#include "leds.h"


-static void b43_led_turn_on(struct b43_wldev *dev, u8 led_index,
+void b43_led_turn_on(struct b43_wldev *dev, u8 led_index,
bool activelow)
{
struct b43_wl *wl = dev->wl;
@@ -47,7 +47,7 @@ static void b43_led_turn_on(struct b43_w
spin_unlock_irqrestore(&wl->leds_lock, flags);
}

-static void b43_led_turn_off(struct b43_wldev *dev, u8 led_index,
+void b43_led_turn_off(struct b43_wldev *dev, u8 led_index,
bool activelow)
{
struct b43_wl *wl = dev->wl;
Index: wireless-2.6/drivers/net/wireless/b43/leds.h
===================================================================
--- wireless-2.6.orig/drivers/net/wireless/b43/leds.h
+++ wireless-2.6/drivers/net/wireless/b43/leds.h
@@ -44,7 +44,10 @@ enum b43_led_behaviour {

void b43_leds_init(struct b43_wldev *dev);
void b43_leds_exit(struct b43_wldev *dev);
-
+void b43_led_turn_on(struct b43_wldev *dev, u8 led_index,
+ bool activelow);
+void b43_led_turn_off(struct b43_wldev *dev, u8 led_index,
+ bool activelow);

#else /* CONFIG_B43_LEDS */
/* LED support disabled */
Index: wireless-2.6/drivers/net/wireless/b43/main.c
===================================================================
--- wireless-2.6.orig/drivers/net/wireless/b43/main.c
+++ wireless-2.6/drivers/net/wireless/b43/main.c
@@ -2799,6 +2799,8 @@ static int b43_op_config(struct ieee8021
b43_interrupt_enable(dev, savedirqs);
mmiowb();
spin_unlock_irqrestore(&wl->irq_lock, flags);
+ if (dev->radio_hw_enable)
+ b43_led_turn_on(dev, 1, 1);
out_unlock_mutex:
mutex_unlock(&wl->mutex);



2007-11-27 18:29:16

by Ehud Gavron

[permalink] [raw]
Subject: Re: [RFC/T] b43: Fix Radio On/Off LED action



Michael Buesch wrote:
> On Tuesday 27 November 2007 17:28:33 Larry Finger wrote:
>
>> Michael Buesch wrote:
>>
>>> On Tuesday 27 November 2007 17:03:57 Larry Finger wrote:
>>> This is not how led triggers work.
>>> You are shortcutting the whole thing here. So you could as well
>>> remove the whole rfkill and LEDs code.
>>>
>> It just plain doesn't work now. What I'm trying to do is get something to the users that will
>> restore the behavior they want while we work out the details of the rfkill and LEDs code.
>>
>
> Well, ok. But we don't apply this to mainline. As
> a temporary patch for users it's OK.
>
Yes, it is! :) Works great!
$ uname -a
Linux egdell.wetwork.net 2.6.24-rc3-LF27NOV2007 #2 SMP Tue Nov 27
09:19:11 MST 2007 x86_64 x86_64 x86_64 GNU/Linux

E


>
>>> Please properly register the LED in the leds code and
>>> add a default LED trigger for the rfkill trigger.
>>> This has several advantages to the user, among the possiblility to
>>> reassign a LED to a different trigger.
>>>
>> How do I do that?
>>
>
> Well, what you basically have to do it restore the old
> mapping in b43_map_led().
> Look at the "case B43_LED_RADIO_ALL" (and below) statement.
> It maps these LEDs to the rfkill trigger.
> So you have to find out which behaviour value your LED has and
> map that to the rfkill trigger in this function.
>
> So when the rfkill LED trigger triggers, it will enable/disable this LED.
> That's all done behind the scenes.
>
>
>>>> @@ -70,11 +75,13 @@ static int b43_rfkill_soft_toggle(void *
>>>> struct b43_wldev *dev = data;
>>>> struct b43_wl *wl = dev->wl;
>>>> int err = 0;
>>>> + int lock = mutex_is_locked(&wl->mutex);
>>>>
>>>> if (!wl->rfkill.registered)
>>>> return 0;
>>>>
>>>> - mutex_lock(&wl->mutex);
>>>> + if (!lock)
>>>> + mutex_lock(&wl->mutex);
>>>>
>>> Nah, it shouldn't be locked by "current" in the first place, here.
>>> (I guess that's what you are trying to check here).
>>> That's what the !registered check above is for.
>>> This !lock check is racy.
>>>
>> If you recall my message from yesterday, I got a locking error. That is what I'm trying to prevent.
>> I know it is racy, but I don't know the correct way to do it.
>>
>
> I think RFkill has a bad design regarding this.
> It does synchronously call back into the driver from a call made by
> the driver. That is broken by design. Maybe it's best to fix this
> in rfkill and let it asynchronously call back on rfkill_init.
> Synchronous callbacks from calls made by drivers are broken by design
> and will lead to recursive lockings. We can not fix this in the driver,
> nor work around it in a sane way. We can hack around it, though, which
> is what the !registered flag tries to do. Though, it seems it doesn't
> work. :)
>
>

2007-11-28 14:12:46

by Michael Büsch

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

On Tuesday 27 November 2007 22:22:22 Larry Finger wrote:
> > I'm wondering who causes this deadlock. "registered" should be false if
> > we are called back from rfkill_initialize, so it should return early before
> > the lock.
>
> The following code has the competing lock:
>
> static int rfkill_toggle_radio(struct rfkill *rfkill,
> enum rfkill_state state)
> {
> int retval;
>
> retval = mutex_lock_interruptible(&rfkill->mutex);
> if (retval)
> return retval;
>
> if (state != rfkill->state) {
> retval = rfkill->toggle_radio(rfkill->data, state);
> if (!retval) {
> rfkill->state = state;
> rfkill_led_trigger(rfkill, state);
> }
> }
>
> mutex_unlock(&rfkill->mutex);
> return retval;
> }

So it's a lock dependency between rfkill->mutex and wl->mutex?
So, now comes the question that really matters. Who is the caller
of rfkill_toggle_radio, in the case where it crashes?

--
Greetings Michael.

2007-11-27 21:22:28

by Larry Finger

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

Michael Buesch wrote:
> On Tuesday 27 November 2007 21:02:47 Larry Finger wrote:
> Is the switch properly polled and is the status change properly reported upstream?
> If yes, you might want to check (add printk to rfkill code) if the LED
> is properly triggered.

I know that b43_rfkill_poll() is being called as I can see the ENABLED/DISABLED messages in the log.
I'll have to work my way through the things it calls.

> I'm wondering who causes this deadlock. "registered" should be false if
> we are called back from rfkill_initialize, so it should return early before
> the lock.

The following code has the competing lock:

static int rfkill_toggle_radio(struct rfkill *rfkill,
enum rfkill_state state)
{
int retval;

retval = mutex_lock_interruptible(&rfkill->mutex);
if (retval)
return retval;

if (state != rfkill->state) {
retval = rfkill->toggle_radio(rfkill->data, state);
if (!retval) {
rfkill->state = state;
rfkill_led_trigger(rfkill, state);
}
}

mutex_unlock(&rfkill->mutex);
return retval;
}

Larry

2007-11-27 20:22:32

by Michael Büsch

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

On Tuesday 27 November 2007 21:02:47 Larry Finger wrote:
> Michael,
>
> I'm getting a little closer. The problem I discovered now is that b43_leds_init() was being called
> before b43_rfkill_init(), which prevented registration of the "radio" LED. Now, the LED is flashed
> on for about 1 second, then it goes off. Changing the switch does nothing.

Ok, good catch.
Is the switch properly polled and is the status change properly reported upstream?
If yes, you might want to check (add printk to rfkill code) if the LED
is properly triggered.

> This version uses mutex_trylock() to see if the mutex is already locked. It should do until rfkill
> is fixed.

No it doesn't. This still introduces a race.

I'm wondering who causes this deadlock. "registered" should be false if
we are called back from rfkill_initialize, so it should return early before
the lock.

--
Greetings Michael.

2007-11-28 17:08:19

by Larry Finger

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

Michael Buesch wrote:
> On Wednesday 28 November 2007 17:41:42 Larry Finger wrote:
>> Michael Buesch wrote:
>>> I think it's a different bug. The backtrace seems corrupted.
>>>
>>> Can you try this patch? There is some circular locking in rfkill.
>> I still get circular locking. The dump is
>
> Ok.
>
> b43 init:
> mutex_lock(wl->mutex)
> rfkill_init()
> mutex_lock(rfkill->mutex)
>
> in operation:
> rfkill poll (no locks held)
> calls into rfkill
> mutex_lock(rfkill->mutex)
> b43_rfkill_soft_toggle()
> mutex_lock(wl->mutex)
>
> As you can see the lock ordering of the two mutexes
> is different. The problem is that we cannot easily drop
> the wl->mutex on b43 init, as that would introduce some
> race conditions. This is specific to b43 and rt2x00 most
> likely doesn't have this requirement.
> I'm not sure how to properly fix this.

Your analysis is correct. I wrapped the call to b43_rfkill_init() with an unlock/lock pair of calls
(races be damned) and the circular locking message went away. BTW, that kind of wrapping is already
done for b43_rfkill_exit(). Curious.

Larry

2007-11-28 16:41:54

by Larry Finger

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

Michael Buesch wrote:
>
> I think it's a different bug. The backtrace seems corrupted.
>
> Can you try this patch? There is some circular locking in rfkill.

I still get circular locking. The dump is

b43-phy0: Radio hardware status changed to DISABLED
b43-phy0: Radio hardware status changed to ENABLED

=======================================================
[ INFO: possible circular locking dependency detected ]
2.6.24-rc3-L2.6-g65d438bf-dirty #25
-------------------------------------------------------
events/0/9 is trying to acquire lock:
(&wl->mutex){--..}, at: [<ffffffff882081d9>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]

but task is already holding lock:
(rfkill_mutex){--..}, at: [<ffffffff8040be7c>] rfkill_switch_all+0x1c/0x78

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (rfkill_mutex){--..}:
[<ffffffff8025951c>] __lock_acquire+0xb34/0xd47
[<ffffffff8040bd58>] rfkill_register+0x87/0x107
[<ffffffff802597b4>] lock_acquire+0x85/0xa9
[<ffffffff8040bd58>] rfkill_register+0x87/0x107
[<ffffffff8040bd58>] rfkill_register+0x87/0x107
[<ffffffff8040e78a>] mutex_lock_nested+0x10e/0x2b6
[<ffffffff8040bd58>] rfkill_register+0x87/0x107
[<ffffffff88208043>] b43_rfkill_init+0x141/0x1a3 [b43]
[<ffffffff881f8ac4>] b43_wireless_core_init+0x682/0x78c [b43]
[<ffffffff881f989e>] b43_op_start+0x33/0x74 [b43]
[<ffffffff881c7a76>] ieee80211_open+0x1c7/0x3dd [mac80211]
[<ffffffff803b2fda>] dev_open+0x4e/0x88
[<ffffffff803b18a2>] dev_change_flags+0xaf/0x16b
[<ffffffff803b9f9d>] do_setlink+0x27a/0x346
[<ffffffff8040fa9c>] _read_unlock+0x26/0x2b
[<ffffffff803bb233>] rtnl_setlink+0xf9/0x11c
[<ffffffff803bb0d3>] rtnetlink_rcv_msg+0x1b6/0x1d5
[<ffffffff803baf1d>] rtnetlink_rcv_msg+0x0/0x1d5
[<ffffffff803c36a5>] netlink_rcv_skb+0x3e/0xaa
[<ffffffff803baf14>] rtnetlink_rcv+0x20/0x29
[<ffffffff803c344c>] netlink_unicast+0x1d9/0x23a
[<ffffffff803ac19e>] __alloc_skb+0x8a/0x138
[<ffffffff803c3c79>] netlink_sendmsg+0x2aa/0x2bd
[<ffffffff803a6249>] sock_sendmsg+0xdf/0xf8
[<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
[<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
[<ffffffff8025970e>] __lock_acquire+0xd26/0xd47
[<ffffffff803a6b7f>] move_addr_to_kernel+0x40/0x49
[<ffffffff803ad5b9>] verify_iovec+0x4f/0x8e
[<ffffffff803a6443>] sys_sendmsg+0x1e1/0x253
[<ffffffff80250916>] up_read+0x26/0x2a
[<ffffffff8022581f>] do_page_fault+0x3bf/0x764
[<ffffffff803a72c4>] sys_getsockname+0x66/0x8c
[<ffffffff80258540>] trace_hardirqs_on+0x11c/0x147
[<ffffffff8040f5b8>] trace_hardirqs_on_thunk+0x35/0x3a
[<ffffffff8020c0de>] system_call+0x7e/0x83
[<ffffffffffffffff>] 0xffffffffffffffff

-> #0 (&wl->mutex){--..}:
[<ffffffff8025941e>] __lock_acquire+0xa36/0xd47
[<ffffffff882081d9>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
[<ffffffff802597b4>] lock_acquire+0x85/0xa9
[<ffffffff882081d9>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
[<ffffffff882081d9>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
[<ffffffff8040e78a>] mutex_lock_nested+0x10e/0x2b6
[<ffffffff80255dd2>] debug_mutex_free_waiter+0x5b/0x5f
[<ffffffff8040bf77>] rfkill_task_handler+0x0/0x54
[<ffffffff882081d9>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
[<ffffffff8040b930>] rfkill_toggle_radio+0x28/0x60
[<ffffffff8040be9e>] rfkill_switch_all+0x3e/0x78
[<ffffffff8040bfb3>] rfkill_task_handler+0x3c/0x54
[<ffffffff80249914>] run_workqueue+0xeb/0x200
[<ffffffff8024a500>] worker_thread+0xed/0xfe
[<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
[<ffffffff8024a413>] worker_thread+0x0/0xfe
[<ffffffff8024da6e>] kthread+0x49/0x77
[<ffffffff8020d018>] child_rip+0xa/0x12
[<ffffffff8020c72f>] restore_args+0x0/0x30
[<ffffffff8024da25>] kthread+0x0/0x77
[<ffffffff8020d00e>] child_rip+0x0/0x12
[<ffffffffffffffff>] 0xffffffffffffffff

other info that might help us debug this:

4 locks held by events/0/9:
#0: (events){--..}, at: [<ffffffff802498c9>] run_workqueue+0xa0/0x200
#1: (rfkill_wlan.work){--..}, at: [<ffffffff802498c9>] run_workqueue+0xa0/0x200
#2: (rfkill_wlan.mutex){--..}, at: [<ffffffff8040bf95>] rfkill_task_handler+0x1e/0x54
#3: (rfkill_mutex){--..}, at: [<ffffffff8040be7c>] rfkill_switch_all+0x1c/0x78

stack backtrace:

Call Trace:
[<ffffffff8025779c>] print_circular_bug_tail+0x70/0x7b
[<ffffffff8025941e>] __lock_acquire+0xa36/0xd47
[<ffffffff882081d9>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
[<ffffffff802597b4>] lock_acquire+0x85/0xa9
[<ffffffff882081d9>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
[<ffffffff882081d9>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
[<ffffffff8040e78a>] mutex_lock_nested+0x10e/0x2b6
[<ffffffff80255dd2>] debug_mutex_free_waiter+0x5b/0x5f
[<ffffffff8040bf77>] rfkill_task_handler+0x0/0x54
[<ffffffff882081d9>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
[<ffffffff8040b930>] rfkill_toggle_radio+0x28/0x60
[<ffffffff8040be9e>] rfkill_switch_all+0x3e/0x78
[<ffffffff8040bfb3>] rfkill_task_handler+0x3c/0x54
[<ffffffff80249914>] run_workqueue+0xeb/0x200
[<ffffffff8024a500>] worker_thread+0xed/0xfe
[<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
[<ffffffff8024a413>] worker_thread+0x0/0xfe
[<ffffffff8024da6e>] kthread+0x49/0x77
[<ffffffff8020d018>] child_rip+0xa/0x12
[<ffffffff8020c72f>] restore_args+0x0/0x30
[<ffffffff8024da25>] kthread+0x0/0x77
[<ffffffff8020d00e>] child_rip+0x0/0x12

b43-phy0: Radio hardware status changed to DISABLED

2007-11-28 16:48:12

by Michael Büsch

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

On Wednesday 28 November 2007 17:41:42 Larry Finger wrote:
> Michael Buesch wrote:
> >
> > I think it's a different bug. The backtrace seems corrupted.
> >
> > Can you try this patch? There is some circular locking in rfkill.
>
> I still get circular locking. The dump is

Ok.

b43 init:
mutex_lock(wl->mutex)
rfkill_init()
mutex_lock(rfkill->mutex)

in operation:
rfkill poll (no locks held)
calls into rfkill
mutex_lock(rfkill->mutex)
b43_rfkill_soft_toggle()
mutex_lock(wl->mutex)

As you can see the lock ordering of the two mutexes
is different. The problem is that we cannot easily drop
the wl->mutex on b43 init, as that would introduce some
race conditions. This is specific to b43 and rt2x00 most
likely doesn't have this requirement.
I'm not sure how to properly fix this.

--
Greetings Michael.

2007-11-28 16:14:48

by Michael Büsch

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

On Wednesday 28 November 2007 16:05:35 Larry Finger wrote:
> Michael Buesch wrote:
> >
> > So it's a lock dependency between rfkill->mutex and wl->mutex?
> > So, now comes the question that really matters. Who is the caller
> > of rfkill_toggle_radio, in the case where it crashes?
> >
>
> Here is the full dump. It looks to me as if b43_rfkill_soft_toggle() calls rfkill_handler(), which
> calls rfkill_toggle_radio() and it is calling b43_rfkill_soft_toggle(). I'd call that a loop.

I think it's a different bug. The backtrace seems corrupted.

Can you try this patch? There is some circular locking in rfkill.

Index: wireless-2.6/net/rfkill/rfkill.c
===================================================================
--- wireless-2.6.orig/net/rfkill/rfkill.c 2007-11-20 19:09:35.000000000 +0100
+++ wireless-2.6/net/rfkill/rfkill.c 2007-11-28 17:09:55.000000000 +0100
@@ -60,11 +60,7 @@ static void rfkill_led_trigger(struct rf
static int rfkill_toggle_radio(struct rfkill *rfkill,
enum rfkill_state state)
{
- int retval;
-
- retval = mutex_lock_interruptible(&rfkill->mutex);
- if (retval)
- return retval;
+ int retval = 0;

if (state != rfkill->state) {
retval = rfkill->toggle_radio(rfkill->data, state);
@@ -74,7 +70,6 @@ static int rfkill_toggle_radio(struct rf
}
}

- mutex_unlock(&rfkill->mutex);
return retval;
}

@@ -158,12 +153,13 @@ static ssize_t rfkill_state_store(struct
if (!capable(CAP_NET_ADMIN))
return -EPERM;

+ if (mutex_lock_interruptible(&rfkill->mutex))
+ return -ERESTARTSYS;
error = rfkill_toggle_radio(rfkill,
state ? RFKILL_STATE_ON : RFKILL_STATE_OFF);
- if (error)
- return error;
+ mutex_unlock(&rfkill->mutex);

- return count;
+ return error ? error : count;
}

static ssize_t rfkill_claim_show(struct device *dev,

--
Greetings Michael.

2007-11-27 16:28:45

by Larry Finger

[permalink] [raw]
Subject: Re: [RFC/T] b43: Fix Radio On/Off LED action

Michael Buesch wrote:
> On Tuesday 27 November 2007 17:03:57 Larry Finger wrote:
> This is not how led triggers work.
> You are shortcutting the whole thing here. So you could as well
> remove the whole rfkill and LEDs code.

It just plain doesn't work now. What I'm trying to do is get something to the users that will
restore the behavior they want while we work out the details of the rfkill and LEDs code.

> Please properly register the LED in the leds code and
> add a default LED trigger for the rfkill trigger.
> This has several advantages to the user, among the possiblility to
> reassign a LED to a different trigger.

How do I do that?

>> @@ -70,11 +75,13 @@ static int b43_rfkill_soft_toggle(void *
>> struct b43_wldev *dev = data;
>> struct b43_wl *wl = dev->wl;
>> int err = 0;
>> + int lock = mutex_is_locked(&wl->mutex);
>>
>> if (!wl->rfkill.registered)
>> return 0;
>>
>> - mutex_lock(&wl->mutex);
>> + if (!lock)
>> + mutex_lock(&wl->mutex);
>
> Nah, it shouldn't be locked by "current" in the first place, here.
> (I guess that's what you are trying to check here).
> That's what the !registered check above is for.
> This !lock check is racy.

If you recall my message from yesterday, I got a locking error. That is what I'm trying to prevent.
I know it is racy, but I don't know the correct way to do it.

Larry


2007-11-27 17:06:42

by Michael Büsch

[permalink] [raw]
Subject: Re: [RFC/T] b43: Fix Radio On/Off LED action

On Tuesday 27 November 2007 17:28:33 Larry Finger wrote:
> Michael Buesch wrote:
> > On Tuesday 27 November 2007 17:03:57 Larry Finger wrote:
> > This is not how led triggers work.
> > You are shortcutting the whole thing here. So you could as well
> > remove the whole rfkill and LEDs code.
>
> It just plain doesn't work now. What I'm trying to do is get something to the users that will
> restore the behavior they want while we work out the details of the rfkill and LEDs code.

Well, ok. But we don't apply this to mainline. As
a temporary patch for users it's OK.

> > Please properly register the LED in the leds code and
> > add a default LED trigger for the rfkill trigger.
> > This has several advantages to the user, among the possiblility to
> > reassign a LED to a different trigger.
>
> How do I do that?

Well, what you basically have to do it restore the old
mapping in b43_map_led().
Look at the "case B43_LED_RADIO_ALL" (and below) statement.
It maps these LEDs to the rfkill trigger.
So you have to find out which behaviour value your LED has and
map that to the rfkill trigger in this function.

So when the rfkill LED trigger triggers, it will enable/disable this LED.
That's all done behind the scenes.

> >> @@ -70,11 +75,13 @@ static int b43_rfkill_soft_toggle(void *
> >> struct b43_wldev *dev = data;
> >> struct b43_wl *wl = dev->wl;
> >> int err = 0;
> >> + int lock = mutex_is_locked(&wl->mutex);
> >>
> >> if (!wl->rfkill.registered)
> >> return 0;
> >>
> >> - mutex_lock(&wl->mutex);
> >> + if (!lock)
> >> + mutex_lock(&wl->mutex);
> >
> > Nah, it shouldn't be locked by "current" in the first place, here.
> > (I guess that's what you are trying to check here).
> > That's what the !registered check above is for.
> > This !lock check is racy.
>
> If you recall my message from yesterday, I got a locking error. That is what I'm trying to prevent.
> I know it is racy, but I don't know the correct way to do it.

I think RFkill has a bad design regarding this.
It does synchronously call back into the driver from a call made by
the driver. That is broken by design. Maybe it's best to fix this
in rfkill and let it asynchronously call back on rfkill_init.
Synchronous callbacks from calls made by drivers are broken by design
and will lead to recursive lockings. We can not fix this in the driver,
nor work around it in a sane way. We can hack around it, though, which
is what the !registered flag tries to do. Though, it seems it doesn't
work. :)

--
Greetings Michael.

2007-11-28 15:05:35

by Larry Finger

[permalink] [raw]
Subject: Re: [RFC/T V2] b43: Fix Radio On/Off LED action

Michael Buesch wrote:
>
> So it's a lock dependency between rfkill->mutex and wl->mutex?
> So, now comes the question that really matters. Who is the caller
> of rfkill_toggle_radio, in the case where it crashes?
>

Here is the full dump. It looks to me as if b43_rfkill_soft_toggle() calls rfkill_handler(), which
calls rfkill_toggle_radio() and it is calling b43_rfkill_soft_toggle(). I'd call that a loop.

kernel:
kernel: =======================================================
kernel: [ INFO: possible circular locking dependency detected ]
kernel: 2.6.24-rc3-L2.6-g65d438bf-dirty #24
kernel: -------------------------------------------------------
kernel: events/0/9 is trying to acquire lock:
kernel: (&wl->mutex){--..}, at: [<ffffffff882a31e1>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
kernel:
kernel: but task is already holding lock:
kernel: (&rfkill->mutex){--..}, at: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel:
kernel: which lock already depends on the new lock.
kernel:
kernel:
kernel: the existing dependency chain (in reverse order) is:
kernel:
kernel: -> #2 (&rfkill->mutex){--..}:
kernel: [<ffffffff8025951c>] __lock_acquire+0xb34/0xd47
kernel: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel: [<ffffffff802597b4>] lock_acquire+0x85/0xa9
kernel: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel: [<ffffffff8040e32e>] mutex_lock_interruptible_nested+0x113/0x31e
kernel: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel: [<ffffffff8040bd64>] rfkill_register+0x9b/0x107
kernel: [<ffffffff882a3040>] b43_rfkill_init+0x15e/0x1cb [b43]
kernel: [<ffffffff88293ad4>] b43_wireless_core_init+0x682/0x784 [b43]
kernel: [<ffffffff882948a6>] b43_op_start+0x33/0x74 [b43]
kernel: [<ffffffff881f2a76>] ieee80211_open+0x1c7/0x3dd [mac80211]
kernel: [<ffffffff803b2fda>] dev_open+0x4e/0x88
kernel: [<ffffffff803b18a2>] dev_change_flags+0xaf/0x16b
kernel: [<ffffffff803b9f9d>] do_setlink+0x27a/0x346
kernel: [<ffffffff8040fa9c>] _read_unlock+0x26/0x2b
kernel: [<ffffffff803bb233>] rtnl_setlink+0xf9/0x11c
kernel: [<ffffffff803bb0d3>] rtnetlink_rcv_msg+0x1b6/0x1d5
kernel: [<ffffffff803baf1d>] rtnetlink_rcv_msg+0x0/0x1d5
kernel: [<ffffffff803c36a5>] netlink_rcv_skb+0x3e/0xaa
kernel: [<ffffffff803baf14>] rtnetlink_rcv+0x20/0x29
kernel: [<ffffffff803c344c>] netlink_unicast+0x1d9/0x23a
kernel: [<ffffffff803ac19e>] __alloc_skb+0x8a/0x138
kernel: [<ffffffff803c3c79>] netlink_sendmsg+0x2aa/0x2bd
kernel: [<ffffffff803a6249>] sock_sendmsg+0xdf/0xf8
kernel: [<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
kernel: [<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
kernel: [<ffffffff8025970e>] __lock_acquire+0xd26/0xd47
kernel: [<ffffffff803a6b7f>] move_addr_to_kernel+0x40/0x49
kernel: [<ffffffff803ad5b9>] verify_iovec+0x4f/0x8e
kernel: [<ffffffff803a6443>] sys_sendmsg+0x1e1/0x253
kernel: [<ffffffff80250916>] up_read+0x26/0x2a
kernel: [<ffffffff8022581f>] do_page_fault+0x3bf/0x764
kernel: [<ffffffff803a72c4>] sys_getsockname+0x66/0x8c
kernel: [<ffffffff80258540>] trace_hardirqs_on+0x11c/0x147
kernel: [<ffffffff8040f5b8>] trace_hardirqs_on_thunk+0x35/0x3a
kernel: [<ffffffff8020c0de>] system_call+0x7e/0x83
kernel: [<ffffffffffffffff>] 0xffffffffffffffff
kernel:
kernel: -> #1 (rfkill_mutex){--..}:
kernel: [<ffffffff8025951c>] __lock_acquire+0xb34/0xd47
kernel: [<ffffffff8040bd50>] rfkill_register+0x87/0x107
kernel: [<ffffffff802597b4>] lock_acquire+0x85/0xa9
kernel: [<ffffffff8040bd50>] rfkill_register+0x87/0x107
kernel: [<ffffffff8040bd50>] rfkill_register+0x87/0x107
kernel: [<ffffffff8040e78a>] mutex_lock_nested+0x10e/0x2b6
kernel: [<ffffffff8040bd50>] rfkill_register+0x87/0x107
kernel: [<ffffffff882a3040>] b43_rfkill_init+0x15e/0x1cb [b43]
kernel: [<ffffffff88293ad4>] b43_wireless_core_init+0x682/0x784 [b43]
kernel: [<ffffffff882948a6>] b43_op_start+0x33/0x74 [b43]
kernel: [<ffffffff881f2a76>] ieee80211_open+0x1c7/0x3dd [mac80211]
kernel: [<ffffffff803b2fda>] dev_open+0x4e/0x88
kernel: [<ffffffff803b18a2>] dev_change_flags+0xaf/0x16b
kernel: [<ffffffff803b9f9d>] do_setlink+0x27a/0x346
kernel: [<ffffffff8040fa9c>] _read_unlock+0x26/0x2b
kernel: [<ffffffff803bb233>] rtnl_setlink+0xf9/0x11c
kernel: [<ffffffff803bb0d3>] rtnetlink_rcv_msg+0x1b6/0x1d5
kernel: [<ffffffff803baf1d>] rtnetlink_rcv_msg+0x0/0x1d5
kernel: [<ffffffff803c36a5>] netlink_rcv_skb+0x3e/0xaa
kernel: [<ffffffff803baf14>] rtnetlink_rcv+0x20/0x29
kernel: [<ffffffff803c344c>] netlink_unicast+0x1d9/0x23a
kernel: [<ffffffff803ac19e>] __alloc_skb+0x8a/0x138
kernel: [<ffffffff803c3c79>] netlink_sendmsg+0x2aa/0x2bd
kernel: [<ffffffff803a6249>] sock_sendmsg+0xdf/0xf8
kernel: [<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
kernel: [<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
kernel: [<ffffffff8025970e>] __lock_acquire+0xd26/0xd47
kernel: [<ffffffff803a6b7f>] move_addr_to_kernel+0x40/0x49
kernel: [<ffffffff803ad5b9>] verify_iovec+0x4f/0x8e
kernel: [<ffffffff803a6443>] sys_sendmsg+0x1e1/0x253
kernel: [<ffffffff80250916>] up_read+0x26/0x2a
kernel: [<ffffffff8022581f>] do_page_fault+0x3bf/0x764
kernel: [<ffffffff803a72c4>] sys_getsockname+0x66/0x8c
kernel: [<ffffffff80258540>] trace_hardirqs_on+0x11c/0x147
kernel: [<ffffffff8040f5b8>] trace_hardirqs_on_thunk+0x35/0x3a
kernel: [<ffffffff8020c0de>] system_call+0x7e/0x83
kernel: [<ffffffffffffffff>] 0xffffffffffffffff
kernel:
kernel: -> #0 (&wl->mutex){--..}:
kernel: [<ffffffff80256dd7>] print_circular_bug_entry+0x48/0x4f
kernel: [<ffffffff8025941e>] __lock_acquire+0xa36/0xd47
kernel: [<ffffffff882a31e1>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
kernel: [<ffffffff802597b4>] lock_acquire+0x85/0xa9
kernel: [<ffffffff882a31e1>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
kernel: [<ffffffff882a31e1>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
kernel: [<ffffffff8040e78a>] mutex_lock_nested+0x10e/0x2b6
kernel: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel: [<ffffffff8040bf6f>] rfkill_task_handler+0x0/0x54
kernel: [<ffffffff882a31e1>] b43_rfkill_soft_toggle+0x33/0xb2 [b43]
kernel: [<ffffffff8040b944>] rfkill_toggle_radio+0x3c/0x7d
kernel: [<ffffffff8040be96>] rfkill_switch_all+0x3e/0x78
kernel: [<ffffffff8040bfab>] rfkill_task_handler+0x3c/0x54
kernel: [<ffffffff80249914>] run_workqueue+0xeb/0x200
kernel: [<ffffffff8024a500>] worker_thread+0xed/0xfe
kernel: [<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
kernel: [<ffffffff8024a413>] worker_thread+0x0/0xfe
kernel: [<ffffffff8024da6e>] kthread+0x49/0x77
kernel: [<ffffffff8020d018>] child_rip+0xa/0x12
kernel: [<ffffffff8020c72f>] restore_args+0x0/0x30
kernel: [<ffffffff8024da25>] kthread+0x0/0x77
kernel: [<ffffffff8020d00e>] child_rip+0x0/0x12
kernel: [<ffffffffffffffff>] 0xffffffffffffffff
kernel:
kernel: other info that might help us debug this:
kernel:
kernel: 5 locks held by events/0/9:
kernel: #0: (events){--..}, at: [<ffffffff802498c9>] run_workqueue+0xa0/0x200
kernel: #1: (rfkill_wlan.work){--..}, at: [<ffffffff802498c9>] run_workqueue+0xa0/0x200
kernel: #2: (rfkill_wlan.mutex){--..}, at: [<ffffffff8040bf8d>] rfkill_task_handler+0x1e/0x54
kernel: #3: (rfkill_mutex){--..}, at: [<ffffffff8040be74>] rfkill_switch_all+0x1c/0x78
kernel: #4: (&rfkill->mutex){--..}, at: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel:
kernel: stack backtrace:
kernel:
kernel: Call Trace:
kernel: [<ffffffff8025779c>] print_circular_bug_tail+0x70/0x7b
kernel: [<ffffffff80256dd7>] print_circular_bug_entry+0x48/0x4f
kernel: [<ffffffff8025941e>] __lock_acquire+0xa36/0xd47
kernel: [<ffffffff882a31e1>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
kernel: [<ffffffff802597b4>] lock_acquire+0x85/0xa9
kernel: [<ffffffff882a31e1>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
kernel: [<ffffffff882a31e1>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
kernel: [<ffffffff8040e78a>] mutex_lock_nested+0x10e/0x2b6
kernel: [<ffffffff8040b927>] rfkill_toggle_radio+0x1f/0x7d
kernel: [<ffffffff8040bf6f>] rfkill_task_handler+0x0/0x54
kernel: [<ffffffff882a31e1>] :b43:b43_rfkill_soft_toggle+0x33/0xb2
kernel: [<ffffffff8040b944>] rfkill_toggle_radio+0x3c/0x7d
kernel: [<ffffffff8040be96>] rfkill_switch_all+0x3e/0x78
kernel: [<ffffffff8040bfab>] rfkill_task_handler+0x3c/0x54
kernel: [<ffffffff80249914>] run_workqueue+0xeb/0x200
kernel: [<ffffffff8024a500>] worker_thread+0xed/0xfe
kernel: [<ffffffff8024db89>] autoremove_wake_function+0x0/0x38
kernel: [<ffffffff8024a413>] worker_thread+0x0/0xfe
kernel: [<ffffffff8024da6e>] kthread+0x49/0x77
kernel: [<ffffffff8020d018>] child_rip+0xa/0x12
kernel: [<ffffffff8020c72f>] restore_args+0x0/0x30
kernel: [<ffffffff8024da25>] kthread+0x0/0x77
kernel: [<ffffffff8020d00e>] child_rip+0x0/0x12


2007-11-27 20:03:04

by Larry Finger

[permalink] [raw]
Subject: [RFC/T V2] b43: Fix Radio On/Off LED action

Michael,

I'm getting a little closer. The problem I discovered now is that b43_leds_init() was being called
before b43_rfkill_init(), which prevented registration of the "radio" LED. Now, the LED is flashed
on for about 1 second, then it goes off. Changing the switch does nothing.

This version uses mutex_trylock() to see if the mutex is already locked. It should do until rfkill
is fixed.

Note to testers: This version does not work. I'm sending it out to get comments.

Larry


Index: wireless-2.6/drivers/net/wireless/b43/rfkill.c
===================================================================
--- wireless-2.6.orig/drivers/net/wireless/b43/rfkill.c
+++ wireless-2.6/drivers/net/wireless/b43/rfkill.c
@@ -70,11 +70,12 @@ static int b43_rfkill_soft_toggle(void *
struct b43_wldev *dev = data;
struct b43_wl *wl = dev->wl;
int err = 0;
+ int have_lock;

if (!wl->rfkill.registered)
return 0;

- mutex_lock(&wl->mutex);
+ have_lock = mutex_trylock(&wl->mutex);
B43_WARN_ON(b43_status(dev) < B43_STAT_INITIALIZED);
switch (state) {
case RFKILL_STATE_ON:
@@ -93,7 +94,8 @@ static int b43_rfkill_soft_toggle(void *
break;
}
out_unlock:
- mutex_unlock(&wl->mutex);
+ if (have_lock)
+ mutex_unlock(&wl->mutex);

return err;
}
@@ -133,6 +135,12 @@ void b43_rfkill_init(struct b43_wldev *d
rfk->poll_dev->poll = b43_rfkill_poll;
rfk->poll_dev->poll_interval = 1000; /* msecs */

+ rfk->poll_dev->input->name = rfk->name;
+ rfk->poll_dev->input->id.bustype = BUS_HOST;
+ rfk->poll_dev->input->id.vendor = dev->dev->bus->boardinfo.vendor;
+ rfk->poll_dev->input->evbit[0] = BIT(EV_KEY);
+ set_bit(KEY_WLAN, rfk->poll_dev->input->keybit);
+
err = rfkill_register(rfk->rfkill);
if (err)
goto err_free_polldev;
Index: wireless-2.6/drivers/net/wireless/b43/main.c
===================================================================
--- wireless-2.6.orig/drivers/net/wireless/b43/main.c
+++ wireless-2.6/drivers/net/wireless/b43/main.c
@@ -2184,7 +2184,6 @@ static int b43_chip_init(struct b43_wlde
err = b43_gpio_init(dev);
if (err)
goto out; /* firmware is released later */
- b43_leds_init(dev);

err = b43_upload_initvals(dev);
if (err)
@@ -3411,6 +3410,7 @@ static int b43_wireless_core_init(struct
b43_security_init(dev);
b43_rfkill_init(dev);
b43_rng_init(wl);
+ b43_leds_init(dev);

b43_set_status(dev, B43_STAT_INITIALIZED);



======

2007-11-27 16:14:20

by Michael Büsch

[permalink] [raw]
Subject: Re: [RFC/T] b43: Fix Radio On/Off LED action

On Tuesday 27 November 2007 17:03:57 Larry Finger wrote:
> Since addition of the rfkill callback, the LED associated with the off
> switch on the radio has not worked because essential data in the rfkill
> structure was missing. This hack adds the necessary data and places direct
> calls to turn the leds on/off.
>
> Signed-off-by: Larry Finger <[email protected]>
> ---
>
> Index: wireless-2.6/drivers/net/wireless/b43/rfkill.c
> ===================================================================
> --- wireless-2.6.orig/drivers/net/wireless/b43/rfkill.c
> +++ wireless-2.6/drivers/net/wireless/b43/rfkill.c
> @@ -23,6 +23,7 @@
> */
>
> #include "rfkill.h"
> +#include "leds.h"
> #include "b43.h"
>
>
> @@ -57,6 +58,10 @@ static void b43_rfkill_poll(struct input
> report_change = 1;
> b43info(wl, "Radio hardware status changed to %s\n",
> enabled ? "ENABLED" : "DISABLED");
> + if (enabled)
> + b43_led_turn_on(dev, 1, 1);
> + else
> + b43_led_turn_off(dev, 1, 1);
> }
> mutex_unlock(&wl->mutex);
>

This is not how led triggers work.
You are shortcutting the whole thing here. So you could as well
remove the whole rfkill and LEDs code.

Please properly register the LED in the leds code and
add a default LED trigger for the rfkill trigger.
This has several advantages to the user, among the possiblility to
reassign a LED to a different trigger.

> @@ -70,11 +75,13 @@ static int b43_rfkill_soft_toggle(void *
> struct b43_wldev *dev = data;
> struct b43_wl *wl = dev->wl;
> int err = 0;
> + int lock = mutex_is_locked(&wl->mutex);
>
> if (!wl->rfkill.registered)
> return 0;
>
> - mutex_lock(&wl->mutex);
> + if (!lock)
> + mutex_lock(&wl->mutex);

Nah, it shouldn't be locked by "current" in the first place, here.
(I guess that's what you are trying to check here).
That's what the !registered check above is for.
This !lock check is racy.

> B43_WARN_ON(b43_status(dev) < B43_STAT_INITIALIZED);
> switch (state) {
> case RFKILL_STATE_ON:
> @@ -93,7 +100,8 @@ static int b43_rfkill_soft_toggle(void *
> break;
> }
> out_unlock:
> - mutex_unlock(&wl->mutex);
> + if (!lock)
> + mutex_unlock(&wl->mutex);
>
> return err;


--
Greetings Michael.