2022-07-18 20:57:35

by Sean Wang

[permalink] [raw]
Subject: [PATCH 1/4] mt76: mt7921e: fix race issue between reset and suspend/resume

From: Sean Wang <[email protected]>

It is unexpected that the reset work is running simultaneously with
the suspend or resume context and it is possible that reset work is still
running even after mt7921 is suspended if we don't fix the race issue.

Thus, the suspend procedure should be waiting until the reset is completed
at the beginning and ignore the subsequent the reset requests.

In case there is an error that happens during either suspend or resume
handler, we will schedule a reset task to recover the error before
returning the error code to ensure we can immediately fix the error there.

Fixes: 0c1ce9884607 ("mt76: mt7921: add wifi reset support")
Co-developed-by: YN Chen <[email protected]>
Signed-off-by: YN Chen <[email protected]>
Signed-off-by: Sean Wang <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 5 +++++
drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 13 +++++++++----
2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 47f0aa81ab02..6bd9fc9228a2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -780,6 +780,7 @@ void mt7921_mac_reset_work(struct work_struct *work)
void mt7921_reset(struct mt76_dev *mdev)
{
struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
+ struct mt76_connac_pm *pm = &dev->pm;

if (!dev->hw_init_done)
return;
@@ -787,8 +788,12 @@ void mt7921_reset(struct mt76_dev *mdev)
if (dev->hw_full_reset)
return;

+ if (pm->suspended)
+ return;
+
queue_work(dev->mt76.wq, &dev->reset_work);
}
+EXPORT_SYMBOL_GPL(mt7921_reset);

void mt7921_mac_update_mib_stats(struct mt7921_phy *phy)
{
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 9d1ba838e54f..07573ea55389 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -363,6 +363,7 @@ static int mt7921_pci_suspend(struct device *device)
int i, err;

pm->suspended = true;
+ cancel_work_sync(&dev->reset_work);
cancel_delayed_work_sync(&pm->ps_work);
cancel_work_sync(&pm->wake_work);

@@ -424,6 +425,9 @@ static int mt7921_pci_suspend(struct device *device)
restore_suspend:
pm->suspended = false;

+ if (err < 0)
+ mt7921_reset(&dev->mt76);
+
return err;
}

@@ -437,7 +441,7 @@ static int mt7921_pci_resume(struct device *device)

err = mt7921_mcu_drv_pmctrl(dev);
if (err < 0)
- return err;
+ goto failed;

mt7921_wpdma_reinit_cond(dev);

@@ -467,11 +471,12 @@ static int mt7921_pci_resume(struct device *device)
mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);

err = mt76_connac_mcu_set_hif_suspend(mdev, false);
- if (err)
- return err;
-
+failed:
pm->suspended = false;

+ if (err < 0)
+ mt7921_reset(&dev->mt76);
+
return err;
}

--
2.25.1


2022-07-18 20:57:59

by Sean Wang

[permalink] [raw]
Subject: [PATCH 3/4] mt76: mt7921u: fix race issue between reset and suspend/resume

From: Sean Wang <[email protected]>

It is unexpected that the reset work is running simultaneously with
the suspend or resume context and it is possible that reset work is still
running even after mt7921 is suspended if we don't fix the race issue.

Thus, the suspend procedure should be waiting until the reset is completed
at the beginning and ignore the subsequent the reset requests.

In case there is an error that happens during either suspend or resume
handler, we will schedule a reset task to recover the error before
returning the error code to ensure we can immediately fix the error there.

Fixes: df3e4143ba8a ("mt76: mt7921u: add suspend/resume support")
Co-developed-by: YN Chen <[email protected]>
Signed-off-by: YN Chen <[email protected]>
Signed-off-by: Sean Wang <[email protected]>
---
.../net/wireless/mediatek/mt76/mt7921/usb.c | 28 ++++++++++++++++---
1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
index dd3b8884e162..124d62c0dfa4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/usb.c
@@ -300,11 +300,15 @@ static void mt7921u_disconnect(struct usb_interface *usb_intf)
static int mt7921u_suspend(struct usb_interface *intf, pm_message_t state)
{
struct mt7921_dev *dev = usb_get_intfdata(intf);
+ struct mt76_connac_pm *pm = &dev->pm;
int err;

+ pm->suspended = true;
+ cancel_work_sync(&dev->reset_work);
+
err = mt76_connac_mcu_set_hif_suspend(&dev->mt76, true);
if (err)
- return err;
+ goto failed;

mt76u_stop_rx(&dev->mt76);
mt76u_stop_tx(&dev->mt76);
@@ -312,11 +316,20 @@ static int mt7921u_suspend(struct usb_interface *intf, pm_message_t state)
set_bit(MT76_STATE_SUSPEND, &dev->mphy.state);

return 0;
+
+failed:
+ pm->suspended = false;
+
+ if (err < 0)
+ mt7921_reset(&dev->mt76);
+
+ return err;
}

static int mt7921u_resume(struct usb_interface *intf)
{
struct mt7921_dev *dev = usb_get_intfdata(intf);
+ struct mt76_connac_pm *pm = &dev->pm;
bool reinit = true;
int err, i;

@@ -338,16 +351,23 @@ static int mt7921u_resume(struct usb_interface *intf)
if (reinit || mt7921_dma_need_reinit(dev)) {
err = mt7921u_dma_init(dev, true);
if (err)
- return err;
+ goto failed;
}

clear_bit(MT76_STATE_SUSPEND, &dev->mphy.state);

err = mt76u_resume_rx(&dev->mt76);
if (err < 0)
- return err;
+ goto failed;
+
+ err = mt76_connac_mcu_set_hif_suspend(&dev->mt76, false);
+failed:
+ pm->suspended = false;
+
+ if (err < 0)
+ mt7921_reset(&dev->mt76);

- return mt76_connac_mcu_set_hif_suspend(&dev->mt76, false);
+ return err;
}
#endif /* CONFIG_PM */

--
2.25.1

2022-07-18 22:12:14

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH 1/4] mt76: mt7921e: fix race issue between reset and suspend/resume

> From: Sean Wang <[email protected]>
>
> It is unexpected that the reset work is running simultaneously with
> the suspend or resume context and it is possible that reset work is still
> running even after mt7921 is suspended if we don't fix the race issue.
>
> Thus, the suspend procedure should be waiting until the reset is completed
> at the beginning and ignore the subsequent the reset requests.
>
> In case there is an error that happens during either suspend or resume
> handler, we will schedule a reset task to recover the error before
> returning the error code to ensure we can immediately fix the error there.
>
> Fixes: 0c1ce9884607 ("mt76: mt7921: add wifi reset support")
> Co-developed-by: YN Chen <[email protected]>
> Signed-off-by: YN Chen <[email protected]>
> Signed-off-by: Sean Wang <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 5 +++++
> drivers/net/wireless/mediatek/mt76/mt7921/pci.c | 13 +++++++++----
> 2 files changed, 14 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
> index 47f0aa81ab02..6bd9fc9228a2 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
> @@ -780,6 +780,7 @@ void mt7921_mac_reset_work(struct work_struct *work)
> void mt7921_reset(struct mt76_dev *mdev)
> {
> struct mt7921_dev *dev = container_of(mdev, struct mt7921_dev, mt76);
> + struct mt76_connac_pm *pm = &dev->pm;
>
> if (!dev->hw_init_done)
> return;
> @@ -787,8 +788,12 @@ void mt7921_reset(struct mt76_dev *mdev)
> if (dev->hw_full_reset)
> return;
>
> + if (pm->suspended)
> + return;
> +
> queue_work(dev->mt76.wq, &dev->reset_work);
> }
> +EXPORT_SYMBOL_GPL(mt7921_reset);
>
> void mt7921_mac_update_mib_stats(struct mt7921_phy *phy)
> {
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
> index 9d1ba838e54f..07573ea55389 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
> @@ -363,6 +363,7 @@ static int mt7921_pci_suspend(struct device *device)
> int i, err;
>
> pm->suspended = true;
> + cancel_work_sync(&dev->reset_work);

should we just wait for the reset to complete here instead of cancelling? (e.g. flush_work)

Regards,
Lorenzo

> cancel_delayed_work_sync(&pm->ps_work);
> cancel_work_sync(&pm->wake_work);
>
> @@ -424,6 +425,9 @@ static int mt7921_pci_suspend(struct device *device)
> restore_suspend:
> pm->suspended = false;
>
> + if (err < 0)
> + mt7921_reset(&dev->mt76);
> +
> return err;
> }
>
> @@ -437,7 +441,7 @@ static int mt7921_pci_resume(struct device *device)
>
> err = mt7921_mcu_drv_pmctrl(dev);
> if (err < 0)
> - return err;
> + goto failed;
>
> mt7921_wpdma_reinit_cond(dev);
>
> @@ -467,11 +471,12 @@ static int mt7921_pci_resume(struct device *device)
> mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);
>
> err = mt76_connac_mcu_set_hif_suspend(mdev, false);
> - if (err)
> - return err;
> -
> +failed:
> pm->suspended = false;
>
> + if (err < 0)
> + mt7921_reset(&dev->mt76);
> +
> return err;
> }
>
> --
> 2.25.1
>


Attachments:
(No filename) (3.33 kB)
signature.asc (235.00 B)
Download all attachments

2022-07-19 20:48:15

by Sean Wang

[permalink] [raw]
Subject: Re: [PATCH 1/4] mt76: mt7921e: fix race issue between reset and suspend/resume

From: Sean Wang <[email protected]>

>> From: Sean Wang <[email protected]>
>>
>> It is unexpected that the reset work is running simultaneously with
>> the suspend or resume context and it is possible that reset work is

<snip>

>> @@ -363,6 +363,7 @@ static int mt7921_pci_suspend(struct device *device)
>> int i, err;
>>
>> pm->suspended = true;
>> + cancel_work_sync(&dev->reset_work);
>
>should we just wait for the reset to complete here instead of cancelling? (e.g. flush_work)
>

yes, that seemed look better to me. It would wait until the queued reset request finishes.

>Regards,
>Lorenzo
>
>> cancel_delayed_work_sync(&pm->ps_work);
>> cancel_work_sync(&pm->wake_work);
>>
>> @@ -424,6 +425,9 @@ static int mt7921_pci_suspend(struct device
>> *device)
>> restore_suspend:
>> pm->suspended = false;
>>
>> + if (err < 0)
>> + mt7921_reset(&dev->mt76);
>> +
>> return err;
>> }
>>
>> @@ -437,7 +441,7 @@ static int mt7921_pci_resume(struct device
>> *device)
>>
>> err = mt7921_mcu_drv_pmctrl(dev);
>> if (err < 0)
>> - return err;
>> + goto failed;
>>
>> mt7921_wpdma_reinit_cond(dev);
>>
>> @@ -467,11 +471,12 @@ static int mt7921_pci_resume(struct device *device)
>> mt76_connac_mcu_set_deep_sleep(&dev->mt76, false);
>>
>> err = mt76_connac_mcu_set_hif_suspend(mdev, false);
>> - if (err)
>> - return err;
>> -
>> +failed:
>> pm->suspended = false;
>>
>> + if (err < 0)
>> + mt7921_reset(&dev->mt76);
>> +
>> return err;
>> }
>>
>> --
>> 2.25.1
>>
>
>