2023-05-05 23:59:42

by Ajay Singh

[permalink] [raw]
Subject: [PATCH v2] wifi: wilc1000: fix kernel oops during interface down during background scan

Fix for kernel crash observed with following test procedure:
while true;
do ifconfig wlan0 up;
iw dev wlan0 scan &
ifconfig wlan0 down;
done

During the above test procedure, the scan results are received from firmware
for 'iw scan' command gets queued even when the interface is going down. It
was causing the kernel oops when dereferencing the freed pointers.

For synchronization, 'mac_close()' calls flush_workqueue() to block its
execution till all pending work is completed. Afterwards 'wilc->close' flag
which is set before the flush_workqueue() should avoid adding new work.
Added 'wilc->close' check in wilc_handle_isr() which is common for
SPI/SDIO bus to ignore the interrupts from firmware that inturns adds the
work since the interface is getting closed.

Also, removed isr_uh_routine() as it's not necessary after 'wl->close' check
is added in wilc_handle_isr(). So now the default primary handler would be
used for threaded IRQ.

Cc: [email protected]
Reported-by: Michael Walle <[email protected]>
Link: https://lore.kernel.org/linux-wireless/[email protected]/
Signed-off-by: Ajay Singh <[email protected]>
---
changes since v1:
- updated commit description and included 'Link:' tag
- use atomic_t type for 'close' variable
- set close state after clearing ongoing scan operation
- make use of default primary handler for threaded_irq
- avoid false failure debug message during mac_close

.../wireless/microchip/wilc1000/cfg80211.c | 2 +-
drivers/net/wireless/microchip/wilc1000/hif.c | 2 +-
.../net/wireless/microchip/wilc1000/netdev.c | 33 ++++++-------------
.../net/wireless/microchip/wilc1000/netdev.h | 2 +-
.../net/wireless/microchip/wilc1000/wlan.c | 3 ++
5 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
index b545d93c6e37..a90a75094486 100644
--- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c
+++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
@@ -461,7 +461,7 @@ static int disconnect(struct wiphy *wiphy, struct net_device *dev,
if (!wilc)
return -EIO;

- if (wilc->close) {
+ if (atomic_read(&wilc->close)) {
/* already disconnected done */
cfg80211_disconnected(dev, 0, NULL, 0, true, GFP_KERNEL);
return 0;
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c
index 5adc69d5bcae..6cac92ba0075 100644
--- a/drivers/net/wireless/microchip/wilc1000/hif.c
+++ b/drivers/net/wireless/microchip/wilc1000/hif.c
@@ -983,7 +983,7 @@ static void handle_set_mcast_filter(struct work_struct *work)
memcpy(cur_byte, set_mc->mc_list, set_mc->cnt * ETH_ALEN);

result = wilc_send_config_pkt(vif, WILC_SET_CFG, &wid, 1);
- if (result)
+ if (result && !atomic_read(&vif->wilc->close))
netdev_err(vif->ndev, "Failed to send setup multicast\n");

error:
diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c
index e9f59de31b0b..c8f3b15f029b 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.c
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.c
@@ -23,26 +23,10 @@
#define __WILC1000_FW(api) WILC1000_FW_PREFIX #api ".bin"
#define WILC1000_FW(api) __WILC1000_FW(api)

-static irqreturn_t isr_uh_routine(int irq, void *user_data)
-{
- struct wilc *wilc = user_data;
-
- if (wilc->close) {
- pr_err("Can't handle UH interrupt\n");
- return IRQ_HANDLED;
- }
- return IRQ_WAKE_THREAD;
-}
-
static irqreturn_t isr_bh_routine(int irq, void *userdata)
{
struct wilc *wilc = userdata;

- if (wilc->close) {
- pr_err("Can't handle BH interrupt\n");
- return IRQ_HANDLED;
- }
-
wilc_handle_isr(wilc);

return IRQ_HANDLED;
@@ -54,7 +38,7 @@ static int init_irq(struct net_device *dev)
struct wilc *wl = vif->wilc;
int ret;

- ret = request_threaded_irq(wl->dev_irq_num, isr_uh_routine,
+ ret = request_threaded_irq(wl->dev_irq_num, NULL,
isr_bh_routine,
IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
dev->name, wl);
@@ -150,7 +134,7 @@ static int wilc_txq_task(void *vp)
while (1) {
wait_for_completion(&wl->txq_event);

- if (wl->close) {
+ if (atomic_read(&wl->close)) {
complete(&wl->txq_thread_started);

while (!kthread_should_stop())
@@ -171,7 +155,7 @@ static int wilc_txq_task(void *vp)
}
srcu_read_unlock(&wl->srcu, srcu_idx);
}
- } while (ret == WILC_VMM_ENTRY_FULL_RETRY && !wl->close);
+ } while (ret == WILC_VMM_ENTRY_FULL_RETRY && !atomic_read(&wl->close));
}
return 0;
}
@@ -418,7 +402,7 @@ static void wlan_deinitialize_threads(struct net_device *dev)
struct wilc_vif *vif = netdev_priv(dev);
struct wilc *wl = vif->wilc;

- wl->close = 1;
+ atomic_set(&wl->close, 1);

complete(&wl->txq_event);

@@ -472,7 +456,7 @@ static int wlan_initialize_threads(struct net_device *dev)
"%s-tx", dev->name);
if (IS_ERR(wilc->txq_thread)) {
netdev_err(dev, "couldn't create TXQ thread\n");
- wilc->close = 1;
+ atomic_set(&wilc->close, 1);
return PTR_ERR(wilc->txq_thread);
}
wait_for_completion(&wilc->txq_thread_started);
@@ -487,7 +471,7 @@ static int wilc_wlan_initialize(struct net_device *dev, struct wilc_vif *vif)

if (!wl->initialized) {
wl->mac_status = WILC_MAC_STATUS_INIT;
- wl->close = 0;
+ atomic_set(&wl->close, 0);

ret = wilc_wlan_init(dev);
if (ret)
@@ -782,12 +766,15 @@ static int wilc_mac_close(struct net_device *ndev)
netif_stop_queue(vif->ndev);

wilc_handle_disconnect(vif);
+
+ if (wl->open_ifcs == 0)
+ atomic_set(&wl->close, 1);
+
wilc_deinit_host_int(vif->ndev);
}

if (wl->open_ifcs == 0) {
netdev_dbg(ndev, "Deinitializing wilc1000\n");
- wl->close = 1;
wilc_wlan_deinitialize(ndev);
}

diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.h b/drivers/net/wireless/microchip/wilc1000/netdev.h
index bb1a315a7b7e..44f7e479604e 100644
--- a/drivers/net/wireless/microchip/wilc1000/netdev.h
+++ b/drivers/net/wireless/microchip/wilc1000/netdev.h
@@ -206,7 +206,7 @@ struct wilc {
u32 chipid;
bool power_save_mode;
int dev_irq_num;
- int close;
+ atomic_t close;
u8 vif_num;
struct list_head vif_list;

diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c
index 58bbf50081e4..3eec2acd2f2f 100644
--- a/drivers/net/wireless/microchip/wilc1000/wlan.c
+++ b/drivers/net/wireless/microchip/wilc1000/wlan.c
@@ -1066,6 +1066,9 @@ void wilc_handle_isr(struct wilc *wilc)
{
u32 int_status;

+ if (atomic_read(&wilc->close))
+ return;
+
acquire_bus(wilc, WILC_BUS_ACQUIRE_AND_WAKEUP);
wilc->hif_func->hif_read_int(wilc, &int_status);

--
2.34.1


2023-05-06 05:58:02

by Kalle Valo

[permalink] [raw]
Subject: Re: [PATCH v2] wifi: wilc1000: fix kernel oops during interface down during background scan

<[email protected]> writes:

> Fix for kernel crash observed with following test procedure:
> while true;
> do ifconfig wlan0 up;
> iw dev wlan0 scan &
> ifconfig wlan0 down;
> done
>
> During the above test procedure, the scan results are received from firmware
> for 'iw scan' command gets queued even when the interface is going down. It
> was causing the kernel oops when dereferencing the freed pointers.
>
> For synchronization, 'mac_close()' calls flush_workqueue() to block its
> execution till all pending work is completed. Afterwards 'wilc->close' flag
> which is set before the flush_workqueue() should avoid adding new work.
> Added 'wilc->close' check in wilc_handle_isr() which is common for
> SPI/SDIO bus to ignore the interrupts from firmware that inturns adds the
> work since the interface is getting closed.
>
> Also, removed isr_uh_routine() as it's not necessary after 'wl->close' check
> is added in wilc_handle_isr(). So now the default primary handler would be
> used for threaded IRQ.
>
> Cc: [email protected]
> Reported-by: Michael Walle <[email protected]>
> Link: https://lore.kernel.org/linux-wireless/[email protected]/
> Signed-off-by: Ajay Singh <[email protected]>
> ---
> changes since v1:
> - updated commit description and included 'Link:' tag
> - use atomic_t type for 'close' variable
> - set close state after clearing ongoing scan operation
> - make use of default primary handler for threaded_irq
> - avoid false failure debug message during mac_close

Like I said in v1, atomic_t with only values 0 and 1 does not really
make sense.

--
https://patchwork.kernel.org/project/linux-wireless/list/

https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

2023-05-06 06:57:56

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH v2] wifi: wilc1000: fix kernel oops during interface down during background scan

On Fri, May 05, 2023 at 11:42:51PM +0000, [email protected] wrote:
> Fix for kernel crash observed with following test procedure:
> while true;
> do ifconfig wlan0 up;
> iw dev wlan0 scan &
> ifconfig wlan0 down;
> done
>
> During the above test procedure, the scan results are received from firmware
> for 'iw scan' command gets queued even when the interface is going down. It
> was causing the kernel oops when dereferencing the freed pointers.
>
> For synchronization, 'mac_close()' calls flush_workqueue() to block its
> execution till all pending work is completed. Afterwards 'wilc->close' flag
> which is set before the flush_workqueue() should avoid adding new work.
> Added 'wilc->close' check in wilc_handle_isr() which is common for
> SPI/SDIO bus to ignore the interrupts from firmware that inturns adds the
> work since the interface is getting closed.
>
> Also, removed isr_uh_routine() as it's not necessary after 'wl->close' check
> is added in wilc_handle_isr(). So now the default primary handler would be
> used for threaded IRQ.
>
> Cc: [email protected]
> Reported-by: Michael Walle <[email protected]>
> Link: https://lore.kernel.org/linux-wireless/[email protected]/
> Signed-off-by: Ajay Singh <[email protected]>
> ---
> changes since v1:
> - updated commit description and included 'Link:' tag
> - use atomic_t type for 'close' variable
> - set close state after clearing ongoing scan operation
> - make use of default primary handler for threaded_irq
> - avoid false failure debug message during mac_close
>
> .../wireless/microchip/wilc1000/cfg80211.c | 2 +-
> drivers/net/wireless/microchip/wilc1000/hif.c | 2 +-
> .../net/wireless/microchip/wilc1000/netdev.c | 33 ++++++-------------
> .../net/wireless/microchip/wilc1000/netdev.h | 2 +-
> .../net/wireless/microchip/wilc1000/wlan.c | 3 ++
> 5 files changed, 16 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
> index b545d93c6e37..a90a75094486 100644
> --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c
> +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
> @@ -461,7 +461,7 @@ static int disconnect(struct wiphy *wiphy, struct net_device *dev,
> if (!wilc)
> return -EIO;
>
> - if (wilc->close) {
> + if (atomic_read(&wilc->close)) {

What happens if this changes right after you read from this?

Don't reimplement locks on your own, use a real one please.

thanks,

greg k-h

2023-05-10 00:04:11

by Ajay Singh

[permalink] [raw]
Subject: Re: [PATCH v2] wifi: wilc1000: fix kernel oops during interface down during background scan

Hi Greg,

On 5/5/23 22:52, Greg KH wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>
> On Fri, May 05, 2023 at 11:42:51PM +0000, [email protected] wrote:
>> Fix for kernel crash observed with following test procedure:
>> while true;
>> do ifconfig wlan0 up;
>> iw dev wlan0 scan &
>> ifconfig wlan0 down;
>> done
>>
>> During the above test procedure, the scan results are received from firmware
>> for 'iw scan' command gets queued even when the interface is going down. It
>> was causing the kernel oops when dereferencing the freed pointers.
>>
>> For synchronization, 'mac_close()' calls flush_workqueue() to block its
>> execution till all pending work is completed. Afterwards 'wilc->close' flag
>> which is set before the flush_workqueue() should avoid adding new work.
>> Added 'wilc->close' check in wilc_handle_isr() which is common for
>> SPI/SDIO bus to ignore the interrupts from firmware that inturns adds the
>> work since the interface is getting closed.
>>
>> Also, removed isr_uh_routine() as it's not necessary after 'wl->close' check
>> is added in wilc_handle_isr(). So now the default primary handler would be
>> used for threaded IRQ.
>>
>> Cc: [email protected]
>> Reported-by: Michael Walle <[email protected]>
>> Link: https://lore.kernel.org/linux-wireless/[email protected]/
>> Signed-off-by: Ajay Singh <[email protected]>
>> ---
>> changes since v1:
>> - updated commit description and included 'Link:' tag
>> - use atomic_t type for 'close' variable
>> - set close state after clearing ongoing scan operation
>> - make use of default primary handler for threaded_irq
>> - avoid false failure debug message during mac_close
>>
>> .../wireless/microchip/wilc1000/cfg80211.c | 2 +-
>> drivers/net/wireless/microchip/wilc1000/hif.c | 2 +-
>> .../net/wireless/microchip/wilc1000/netdev.c | 33 ++++++-------------
>> .../net/wireless/microchip/wilc1000/netdev.h | 2 +-
>> .../net/wireless/microchip/wilc1000/wlan.c | 3 ++
>> 5 files changed, 16 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
>> index b545d93c6e37..a90a75094486 100644
>> --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c
>> +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c
>> @@ -461,7 +461,7 @@ static int disconnect(struct wiphy *wiphy, struct net_device *dev,
>> if (!wilc)
>> return -EIO;
>>
>> - if (wilc->close) {
>> + if (atomic_read(&wilc->close)) {
>
> What happens if this changes right after you read from this?
>

Yeah, there is a possible race condition between
'cfg80211_ops->disconnect()' and 'ndo_stop()'. The above check is not
enough to handle that scenario. For that, I thought of using a lock
between 'wifi disconnect' and 'interface close' to serialize the access
and submit it in a separate patch(maybe I can add a patch for that as
patch series). However, this patch helps to resolve the issue which was
reported in the test procedure. By setting the 'close' state
correctly(i.e before flushing the workqueue) the new work doesn't get
added for processing when the interface is going down.


> Don't reimplement locks on your own, use a real one please.

Sure, I will work on updating the patch using a lock between
'disconnect()' & 'ndo_stop()'.


Regards,
Ajay