2015-04-13 09:18:30

by Andreas Fenkart

[permalink] [raw]
Subject: [PATCH 1/2] mwifiex: sdio: create global list of adapters

we need to verify that a an adapter pointer, still points to a
valid adapter. this is not possible through other global structures,
such as net_namespace_list. the problem with the net_namespace_list
is that an adapter can have multiple interfaces, hence we had to
de-reference the maybe invalid pointer to get the interface struct.

Signed-off-by: Andreas Fenkart <[email protected]>
---
drivers/net/wireless/mwifiex/sdio.c | 14 ++++++++++++++
drivers/net/wireless/mwifiex/sdio.h | 1 +
2 files changed, 15 insertions(+)

diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c
index 91e36cd..a70e114 100644
--- a/drivers/net/wireless/mwifiex/sdio.c
+++ b/drivers/net/wireless/mwifiex/sdio.c
@@ -69,6 +69,12 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = {
};

/*
+ * list of active cards
+ */
+static LIST_HEAD(cards);
+static DEFINE_MUTEX(cards_mutex);
+
+/*
* SDIO probe.
*
* This function probes an mwifiex device and registers it. It allocates
@@ -130,6 +136,10 @@ mwifiex_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id)
ret = -1;
}

+ mutex_lock(&cards_mutex);
+ list_add(&card->next, &cards);
+ mutex_unlock(&cards_mutex);
+
return ret;
}

@@ -196,6 +206,10 @@ mwifiex_sdio_remove(struct sdio_func *func)
if (!card)
return;

+ mutex_lock(&cards_mutex);
+ list_del(&card->next);
+ mutex_unlock(&cards_mutex);
+
adapter = card->adapter;
if (!adapter || !adapter->priv_num)
return;
diff --git a/drivers/net/wireless/mwifiex/sdio.h b/drivers/net/wireless/mwifiex/sdio.h
index 957cca2..9be5650 100644
--- a/drivers/net/wireless/mwifiex/sdio.h
+++ b/drivers/net/wireless/mwifiex/sdio.h
@@ -260,6 +260,7 @@ struct sdio_mmc_card {

struct mwifiex_sdio_mpa_tx mpa_tx;
struct mwifiex_sdio_mpa_rx mpa_rx;
+ struct list_head next;
};

struct mwifiex_sdio_device {
--
2.1.4



2015-04-16 09:59:20

by Amitkumar Karwar

[permalink] [raw]
Subject: RE: [PATCH 2/2] mwifiex: sdio: bug: dead-lock in card reset

SGkgQW5kcmVhcywNCg0KPiAyMDE1LTA0LTEzIDEyOjI3IEdNVCswMjowMCBBbWl0a3VtYXIgS2Fy
d2FyIDxha2Fyd2FyQG1hcnZlbGwuY29tPjoNCj4gPg0KPiA+IFdlIGhhZCByZWNlbnRseSBzdWJt
aXR0ZWQgYSBwYXRjaCB0byBhZGRyZXNzIHRoaXMgaXNzdWUuIFNvIHRoZXNlIHR3bw0KPiBwYXRj
aGVzIHdvbid0IGJlIG5lZWRlZCBub3cuDQo+ID4gaHR0cDovL3d3dy5zcGluaWNzLm5ldC9saXN0
cy9saW51eC13aXJlbGVzcy9tc2cxMzUxNDYuaHRtbA0KPiA+DQo+ID4gQ291bGQgeW91IHBsZWFz
ZSBjaGVjayBhbmQgbGV0IHVzIGtub3cgaWYgeW91IGhhdmUgYW55IHN1Z2dlc3Rpb25zIGZvcg0K
PiBpbXByb3ZlbWVudD8NCj4gDQo+IFRoZXkgd29yayBmb3IgbXkgdXNlIGNhc2U6IDEgYWRhcHRl
ciAvIG5vbi1yZW1vdmFibGUNCj4gDQo+IFByb2JsZW1zIG5vdCBzb2x2ZWQ6DQo+IC0gdGhlIHVz
ZSBvZiBnbG9iYWwgc3RhdGljIHZhcmlhYmxlIGlzIHJhY3kgaW4gY2FzZSB0aGVyZSB0d28gYWRh
cHRlcnMsDQo+IGFuZCBib3RoIG5lZWQgcmVzZXQNCj4gICBzaW11bHRhbmVvdXNseQ0KPiAtIHNp
bmNlIHdvcmsgc3RydWN0IGlzIGluZGVwZW5kZW50IG9mIHRoZSBpbnRlcmZhY2UgKHNkaW9fZnVu
YyksIGl0IHdpbGwNCj4gYWxzbyBub3QgYmUgbm90aWZpZWQNCj4gICBpZiB0aGUgY2FyZCBpcyBy
ZW1vdmVkIHBoeXNpY2FsbHkNCg0KVGhhbmtzIGZvciBhbiBleHBsYW5hdGlvbi4gWW91IGFyZSBy
aWdodC4gT3VyIHBhdGNoIGRvZXNu4oCZdCBhZGRyZXNzIGFib3ZlIG1lbnRpb25lZCBwcm9ibGVt
cy4NCkFkYXB0ZXIgbGlzdCBhbmQgbXV0ZXggbG9naWMgaW4geW91ciBwYXRjaCB3b3VsZCByZXNv
bHZlIHRoZXNlIGlzc3Vlcy4gQ291bGQgeW91IHBsZWFzZSBwcmVwYXJlIGEgcGF0Y2ggb24gdG9w
IG9mIGxhdGVzdCBjb2RlPw0KDQpSZWdhcmRzLA0KQW1pdA0K

2015-04-28 16:04:58

by Kalle Valo

[permalink] [raw]
Subject: Re: [PATCH 2/2] mwifiex: sdio: bug: dead-lock in card reset

Amitkumar Karwar <[email protected]> writes:

> Hi Andreas,
>
> Thanks for the patch series.
>
>>
>> Card reset is implemented by removing/re-adding the adapter instance.
>> This is implemented by removing the mmc host, which will then trigger a
>> cascade of removal callbacks including mwifiex_sdio_remove.
>> The dead-lock results in the latter function, trying to cancel the work
>> item executing the mmc host removal. This patch adds a driver level, not
>> adapter level, work struct to break the dead-lock
>>
>> Signed-off-by: Andreas Fenkart <[email protected]>
>> ---
>> drivers/net/wireless/mwifiex/main.h | 1 -
>> drivers/net/wireless/mwifiex/sdio.c | 63 +++++++++++++++++++++++++++++--
>> ------
>> 2 files changed, 50 insertions(+), 14 deletions(-)
>>
>
> We had recently submitted a patch to address this issue. So these two
> patches won't be needed now.
> http://www.spinics.net/lists/linux-wireless/msg135146.html
>
> Could you please check and let us know if you have any suggestions for
> improvement?

Ok, so we are talking about this commit which apparently fell through
the cracks:

https://git.kernel.org/cgit/linux/kernel/git/kvalo/wireless-drivers-next.git/commit/?id=2f5872b60146

Like I said as a reply to patch 1, using static variables for this is
ugly. Isn't there really any better way to handle the problem?

--
Kalle Valo

2015-04-13 10:27:29

by Amitkumar Karwar

[permalink] [raw]
Subject: RE: [PATCH 2/2] mwifiex: sdio: bug: dead-lock in card reset

Hi Andreas,

Thanks for the patch series.

>
> Card reset is implemented by removing/re-adding the adapter instance.
> This is implemented by removing the mmc host, which will then trigger a
> cascade of removal callbacks including mwifiex_sdio_remove.
> The dead-lock results in the latter function, trying to cancel the work
> item executing the mmc host removal. This patch adds a driver level, not
> adapter level, work struct to break the dead-lock
>
> Signed-off-by: Andreas Fenkart <[email protected]>
> ---
> drivers/net/wireless/mwifiex/main.h | 1 -
> drivers/net/wireless/mwifiex/sdio.c | 63 +++++++++++++++++++++++++++++--
> ------
> 2 files changed, 50 insertions(+), 14 deletions(-)
>

We had recently submitted a patch to address this issue. So these two patches won't be needed now.
http://www.spinics.net/lists/linux-wireless/msg135146.html

Could you please check and let us know if you have any suggestions for improvement?

Regards,
Amit

2015-04-28 21:15:48

by Andreas Fenkart

[permalink] [raw]
Subject: Re: [PATCH 2/2] mwifiex: sdio: bug: dead-lock in card reset

Hi Kalle,

thanks for reviewing this

2015-04-28 18:04 GMT+02:00 Kalle Valo <[email protected]>:
> Amitkumar Karwar <[email protected]> writes:
>
>>> Card reset is implemented by removing/re-adding the adapter instance.
>>> This is implemented by removing the mmc host, which will then trigger a
>>> cascade of removal callbacks including mwifiex_sdio_remove.
>>> The dead-lock results in the latter function, trying to cancel the work
>>> item executing the mmc host removal. This patch adds a driver level, not
>>> adapter level, work struct to break the dead-lock
>>>
>
> Ok, so we are talking about this commit which apparently fell through
> the cracks:
>
> https://git.kernel.org/cgit/linux/kernel/git/kvalo/wireless-drivers-next.git/commit/?id=2f5872b60146
>
> Like I said as a reply to patch 1, using static variables for this is
> ugly. Isn't there really any better way to handle the problem?

The root of the evil is calling internal mmc-host API to solve a
problem in mwifiex:

target = adapter->card->sdio_func->host;
mmc_remove_host(target);
mdelay(200);
mmc_add_host(target);

This code needs to run on a workqueue that is independent of the
adapter, since all workqueues of the adapter are destroyed.

We can hide the adapter pointer through sdio_set_drvdata, Then from
the workqueue, we can iterate over all sdio_func in the system.
Must be possible through the device model somehow, then use,
a *global* variable to filter out the sdio_func that is ours.
That saves us from de-referencing the weak adapter pointer
(card could be removed by mmc-core workqueue running in parallel).

But it doesn't safe us from a weak mmc host pointer: That particular
card has a BT function as well, and if cmd handler is hosed for one
function, it's hosed for the other one as well. If they both issue card
reset simultaneously we have a race condition. (we can't claim the
host we are going to destroy),
- unless they both run on the same workqueue, and the
workqueue only executes 1 task simultaneously
.
It's also quite unpolite of one function to reset card before consulting
the other functions. Something like request reset / and each function
has a veto. (There is also an FM tuner, that can send it's data via
I2S, not affected by the sdio cmd handler)

The current solution is pragmatic, 99.99% of the cards are
non-removable / 1 adapter per system / no-BT? But it might be
a bit labor intensive to maintain, since changes in the mmc
subsystem might break it by accident.
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=81df6cafe28b358739d121205e1ddaeec2ed5b15

/Andi

2015-04-28 15:58:22

by Kalle Valo

[permalink] [raw]
Subject: Re: [PATCH 1/2] mwifiex: sdio: create global list of adapters

Andreas Fenkart <[email protected]> writes:

> we need to verify that a an adapter pointer, still points to a
> valid adapter. this is not possible through other global structures,
> such as net_namespace_list. the problem with the net_namespace_list
> is that an adapter can have multiple interfaces, hence we had to
> de-reference the maybe invalid pointer to get the interface struct.
>
> Signed-off-by: Andreas Fenkart <[email protected]>
> ---
> drivers/net/wireless/mwifiex/sdio.c | 14 ++++++++++++++
> drivers/net/wireless/mwifiex/sdio.h | 1 +
> 2 files changed, 15 insertions(+)
>
> diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c
> index 91e36cd..a70e114 100644
> --- a/drivers/net/wireless/mwifiex/sdio.c
> +++ b/drivers/net/wireless/mwifiex/sdio.c
> @@ -69,6 +69,12 @@ static struct memory_type_mapping mem_type_mapping_tbl[] = {
> };
>
> /*
> + * list of active cards
> + */
> +static LIST_HEAD(cards);
> +static DEFINE_MUTEX(cards_mutex);
> +
> +/*
> * SDIO probe.
> *
> * This function probes an mwifiex device and registers it. It allocates
> @@ -130,6 +136,10 @@ mwifiex_sdio_probe(struct sdio_func *func, const struct sdio_device_id *id)
> ret = -1;
> }
>
> + mutex_lock(&cards_mutex);
> + list_add(&card->next, &cards);
> + mutex_unlock(&cards_mutex);
> +
> return ret;
> }
>
> @@ -196,6 +206,10 @@ mwifiex_sdio_remove(struct sdio_func *func)
> if (!card)
> return;
>
> + mutex_lock(&cards_mutex);
> + list_del(&card->next);
> + mutex_unlock(&cards_mutex);

This is extremely ugly and the commit log doesn't even tell anything
about the issue you trying to fix so it's difficult to even suggest
anything else. For starters can you provide more information, please?
There has to be a better way to solve it.

--
Kalle Valo

2015-04-16 09:02:51

by Andreas Fenkart

[permalink] [raw]
Subject: Re: [PATCH 2/2] mwifiex: sdio: bug: dead-lock in card reset

Hi Amit,

2015-04-13 12:27 GMT+02:00 Amitkumar Karwar <[email protected]>:
>
> We had recently submitted a patch to address this issue. So these two patches won't be needed now.
> http://www.spinics.net/lists/linux-wireless/msg135146.html
>
> Could you please check and let us know if you have any suggestions for improvement?

They work for my use case: 1 adapter / non-removable

Problems not solved:
- the use of global static variable is racy in case there two
adapters, and both need reset
simultaneously
- since work struct is independent of the interface (sdio_func), it
will also not be notified
if the card is removed physically

for illustrating the latter:
- cmd is hung
- card timeout triggers, and issues card reset, card reset stores adapter
pointer in global variable
- user removes card
- mmc layer triggers removal of card, global pointer becomes invalid
- card reset work struct de-references invalid pointer

regards,
Andreas

2015-04-13 09:18:35

by Andreas Fenkart

[permalink] [raw]
Subject: [PATCH 2/2] mwifiex: sdio: bug: dead-lock in card reset

Card reset is implemented by removing/re-adding the adapter instance.
This is implemented by removing the mmc host, which will then trigger
a cascade of removal callbacks including mwifiex_sdio_remove.
The dead-lock results in the latter function, trying to cancel the work
item executing the mmc host removal. This patch adds a driver level,
not adapter level, work struct to break the dead-lock

Signed-off-by: Andreas Fenkart <[email protected]>
---
drivers/net/wireless/mwifiex/main.h | 1 -
drivers/net/wireless/mwifiex/sdio.c | 63 +++++++++++++++++++++++++++++--------
2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h
index f0a6af1..702f348 100644
--- a/drivers/net/wireless/mwifiex/main.h
+++ b/drivers/net/wireless/mwifiex/main.h
@@ -437,7 +437,6 @@ enum rdwr_status {

enum mwifiex_iface_work_flags {
MWIFIEX_IFACE_WORK_FW_DUMP,
- MWIFIEX_IFACE_WORK_CARD_RESET,
};

struct mwifiex_adapter;
diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c
index a70e114..0996b0e 100644
--- a/drivers/net/wireless/mwifiex/sdio.c
+++ b/drivers/net/wireless/mwifiex/sdio.c
@@ -75,6 +75,19 @@ static LIST_HEAD(cards);
static DEFINE_MUTEX(cards_mutex);

/*
+ * Card removal will remove the interface then re-add it
+ * Hence the work_struct can't be part of the adapter
+ * otherwise there will be deadlock when we remove the
+ * adapter, and cancel the work struct executing the reset
+ */
+static struct
+{
+ struct work_struct work;
+ struct sdio_mmc_card *card;
+} s_card_reset;
+static DEFINE_SEMAPHORE(s_card_reset_sem);
+
+/*
* SDIO probe.
*
* This function probes an mwifiex device and registers it. It allocates
@@ -1964,10 +1977,36 @@ mwifiex_update_mp_end_port(struct mwifiex_adapter *adapter, u16 port)
port, card->mp_data_port_mask);
}

-static void mwifiex_sdio_card_reset_work(struct mwifiex_adapter *adapter)
+static void mwifiex_sdio_card_reset_work(struct work_struct *work)
{
- struct sdio_mmc_card *card = adapter->card;
- struct mmc_host *target = card->func->card->host;
+ struct sdio_mmc_card *card = NULL;
+ struct mmc_host *target;
+ struct list_head *cur;
+
+ WARN_ON(work != &s_card_reset.work);
+
+ /* adapter pointer might have been invalidated
+ * e.g. card removed from slot
+ */
+ mutex_lock(&cards_mutex);
+ list_for_each_prev(cur, &cards) {
+ card = list_entry(cur, struct sdio_mmc_card, next);
+ if (card == s_card_reset.card)
+ break;
+ }
+ if (!card || card != s_card_reset.card) {
+ pr_err("card was removed, cancel card reset\n");
+ mutex_unlock(&cards_mutex);
+ return;
+ }
+
+ /* card pointer still valid inc ref in host, it's all we need */
+ target = card->func->card->host;
+ mmc_claim_host(target);
+ mutex_unlock(&cards_mutex);
+
+ /* release the work struct, we are done with it */
+ up(&s_card_reset_sem);

/* The actual reset operation must be run outside of driver thread.
* This is because mmc_remove_host() will cause the device to be
@@ -1976,13 +2015,14 @@ static void mwifiex_sdio_card_reset_work(struct mwifiex_adapter *adapter)
*
* We run it in a totally independent workqueue.
*/
-
pr_err("Resetting card...\n");
mmc_remove_host(target);
/* 200ms delay is based on experiment with sdhci controller */
mdelay(200);
target->rescan_entered = 0; /* rescan non-removable cards */
mmc_add_host(target);
+
+ mmc_release_host(target);
}

/* This function read/write firmware */
@@ -2160,9 +2200,6 @@ static void mwifiex_sdio_work(struct work_struct *work)
struct mwifiex_adapter *adapter =
container_of(work, struct mwifiex_adapter, iface_work);

- if (test_and_clear_bit(MWIFIEX_IFACE_WORK_CARD_RESET,
- &adapter->iface_work_flags))
- mwifiex_sdio_card_reset_work(adapter);
if (test_and_clear_bit(MWIFIEX_IFACE_WORK_FW_DUMP,
&adapter->iface_work_flags))
mwifiex_sdio_fw_dump_work(work);
@@ -2171,12 +2208,9 @@ static void mwifiex_sdio_work(struct work_struct *work)
/* This function resets the card */
static void mwifiex_sdio_card_reset(struct mwifiex_adapter *adapter)
{
- if (test_bit(MWIFIEX_IFACE_WORK_CARD_RESET, &adapter->iface_work_flags))
- return;
-
- set_bit(MWIFIEX_IFACE_WORK_CARD_RESET, &adapter->iface_work_flags);
-
- schedule_work(&adapter->iface_work);
+ down(&s_card_reset_sem);
+ s_card_reset.card = adapter->card;
+ schedule_work(&s_card_reset.work);
}

/* This function dumps FW information */
@@ -2317,6 +2351,7 @@ static int
mwifiex_sdio_init_module(void)
{
sema_init(&add_remove_card_sem, 1);
+ INIT_WORK(&s_card_reset.work, mwifiex_sdio_card_reset_work);

/* Clear the flag in case user removes the card. */
user_rmmod = 0;
@@ -2339,6 +2374,8 @@ mwifiex_sdio_cleanup_module(void)
if (!down_interruptible(&add_remove_card_sem))
up(&add_remove_card_sem);

+ cancel_work_sync(&s_card_reset.work);
+
/* Set the flag as user is removing this module. */
user_rmmod = 1;

--
2.1.4