2022-04-05 03:19:13

by Ben Greear

[permalink] [raw]
Subject: [PATCH 1/2] mt76: mt7915: retry pci probe logic up to 3 times.

From: Ben Greear <[email protected]>

On my x86-64 system with 6 mtk7915 radios, one or more radios
often fails to start upon reboot. So, do up to 3 retries in
the pci probe logic. Example of this mitigation working:

[ 8.637908] mt7915e 0000:06:00.0: ASIC revision: 79150000
[ 8.638026] mt7915e 0000:06:00.0: mt7915: register_device Driver-Version: 5.17.0-ct
[ 28.863203] mt7915e 0000:06:00.0: MCU: Initial Failure: Message 00000010 (cid 10 ext_cid: 0 seq 1) timeout. Last successful cmd: 0x0
[ 28.873960] mt7915e 0000:06:00.0: Failed to get patch semaphore: -110
[ 28.879141] mt7915e 0000:06:00.0: mcu-init: Failed to load firmware, err: -11
[ 28.879143] mt7915e 0000:06:00.0: mt7915_register_device failed, ret: -11
[ 28.884796] mt7915e 0000:06:00.0: mt7915_pci_probe had error on try 1/3, ret: -11
[ 28.891161] mt7915e 0000:06:00.0: ASIC revision: 79150094
[ 28.891261] mt7915e 0000:06:00.0: mt7915: register_device Driver-Version: 5.17.0-ct
[ 29.022875] mt7915e 0000:06:00.0: HW/SW Version: 0x8a108a10, Build Time: 20211222184017a
[ 29.033934] mt7915e 0000:06:00.0: WM Firmware Version: ____000000, Build Time: 20211222184052
[ 29.051992] mt7915e 0000:06:00.0: WA Firmware Version: DEV_000000, Build Time: 20211222184111
[ 29.446014] mt7915e 0000:06:00.0: mt7915_pci_probe succeeded on try 2/3

A quick test of traffic shows the radio works fine after recovery.

Signed-off-by: Ben Greear <[email protected]>
---
.../net/wireless/mediatek/mt76/mt7915/pci.c | 61 +++++++++++++------
1 file changed, 44 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
index 6f819c41a4c4..07cb8a699b75 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -92,8 +92,8 @@ static int mt7915_pci_hif2_probe(struct pci_dev *pdev)
return 0;
}

-static int mt7915_pci_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int _mt7915_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
{
struct mt7915_dev *dev;
struct mt76_dev *mdev;
@@ -101,20 +101,6 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
int irq;
int ret;

- ret = pcim_enable_device(pdev);
- if (ret)
- return ret;
-
- ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
- if (ret)
- return ret;
-
- pci_set_master(pdev);
-
- ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
- if (ret)
- return ret;
-
mt76_pci_disable_aspm(pdev);

if (id->device == 0x7916 || id->device == 0x790a)
@@ -161,8 +147,11 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
}

ret = mt7915_register_device(dev);
- if (ret)
+ if (ret) {
+ dev_err(dev->mt76.dev, "mt7915_register_device failed, ret: %d",
+ ret);
goto free_hif2_irq;
+ }

return 0;

@@ -181,6 +170,44 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
return ret;
}

+static int mt7915_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ int z;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+ if (ret)
+ return ret;
+
+ pci_set_master(pdev);
+
+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+ if (ret)
+ return ret;
+
+ for (z = 0; z<3; z++) {
+ ret = _mt7915_pci_probe(pdev, id);
+ if (ret) {
+ dev_err(&pdev->dev, "mt7915_pci_probe had error on try %d/3, ret: %d",
+ z + 1, ret);
+ } else {
+ /* It is worth a message to let user know we succeeded if
+ * earlier attempts failed.
+ */
+ if (z > 0)
+ dev_info(&pdev->dev, "mt7915_pci_probe succeeded on try %d/3",
+ z + 1);
+ break;
+ }
+ }
+ return ret;
+}
+
static void mt7915_hif_remove(struct pci_dev *pdev)
{
struct mt7915_hif *hif = pci_get_drvdata(pdev);
--
2.20.1