2019-10-24 10:32:21

by Lorenzo Bianconi

[permalink] [raw]
Subject: [PATCH wireless-drivers 0/2] fix mt76x2e hangs on U7612E mini-pcie

Various mt76x2e issues have been reported on U7612E mini-pcie card [1].
On U7612E-H1 PCIE_ASPM causes continuous mcu hangs and instability and
so patch 1/2 disable it by default.
Moreover mt76 does not properly unmap dma buffers for non-linear skbs.
This issue may result in hw hangs if the system relies on IOMMU.
Patch 2/2 fix the problem properly unmapping data fragments on
non-linear skbs.

[1]: https://lore.kernel.org/netdev/[email protected]/

Lorenzo Bianconi (2):
mt76: mt76x2e: disable pcie_aspm by default
mt76: dma: fix buffer unmap with non-linear skbs

drivers/net/wireless/mediatek/mt76/dma.c | 10 ++--
drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
.../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
4 files changed, 57 insertions(+), 3 deletions(-)

--
2.21.0


2019-10-24 10:32:21

by Lorenzo Bianconi

[permalink] [raw]
Subject: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
instability and so let's disable PCIE_ASPM by default. This patch has
been successfully tested on U7612E-H1 mini-pice card

Signed-off-by: Felix Fietkau <[email protected]>
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
.../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
3 files changed, 50 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mmio.c b/drivers/net/wireless/mediatek/mt76/mmio.c
index 1c974df1fe25..f991a8f1c42a 100644
--- a/drivers/net/wireless/mediatek/mt76/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mmio.c
@@ -3,6 +3,8 @@
* Copyright (C) 2016 Felix Fietkau <[email protected]>
*/

+#include <linux/pci.h>
+
#include "mt76.h"
#include "trace.h"

@@ -78,6 +80,51 @@ void mt76_set_irq_mask(struct mt76_dev *dev, u32 addr,
}
EXPORT_SYMBOL_GPL(mt76_set_irq_mask);

+void mt76_mmio_disable_aspm(struct pci_dev *pdev)
+{
+ struct pci_dev *parent = pdev->bus->self;
+ u16 aspm_conf, parent_aspm_conf = 0;
+
+ pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
+ aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+ if (parent) {
+ pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
+ &parent_aspm_conf);
+ parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+ }
+
+ if (!aspm_conf && (!parent || !parent_aspm_conf)) {
+ /* aspm already disabled */
+ return;
+ }
+
+ dev_info(&pdev->dev, "disabling ASPM %s %s\n",
+ (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
+ (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
+
+#ifdef CONFIG_PCIEASPM
+ pci_disable_link_state(pdev, aspm_conf);
+
+ /* Double-check ASPM control. If not disabled by the above, the
+ * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
+ * not enabled); override by writing PCI config space directly.
+ */
+ pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
+ if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
+ return;
+#endif /* CONFIG_PCIEASPM */
+
+ /* Both device and parent should have the same ASPM setting.
+ * Disable ASPM in downstream component first and then upstream.
+ */
+ pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
+
+ if (parent)
+ pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
+ aspm_conf);
+}
+EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
+
void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
{
static const struct mt76_bus_ops mt76_mmio_ops = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 570c159515a0..962812b6247d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
#define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)

void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
+void mt76_mmio_disable_aspm(struct pci_dev *pdev);

static inline u16 mt76_chip(struct mt76_dev *dev)
{
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
index 73c3104f8858..264bef87e5c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);

+ mt76_mmio_disable_aspm(pdev);
+
return 0;

error:
--
2.21.0

2019-10-24 10:32:21

by Lorenzo Bianconi

[permalink] [raw]
Subject: [PATCH wireless-drivers 2/2] mt76: dma: fix buffer unmap with non-linear skbs

mt76 dma layer is supposed to unmap skb data buffers while keep txwi
mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does
not unmap data fragments in even positions for non-linear skbs. This
issue may result in hw hangs with A-MSDU if the system relies on IOMMU
or SWIOTLB. Fix this behaviour properly unmapping data fragments on
non-linear skbs.

Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets")
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
drivers/net/wireless/mediatek/mt76/dma.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index c747eb24581c..8c27956875e7 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -93,11 +93,14 @@ static void
mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
struct mt76_queue_entry *prev_e)
{
- struct mt76_queue_entry *e = &q->entry[idx];
__le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
+ struct mt76_queue_entry *e = &q->entry[idx];
u32 ctrl = le32_to_cpu(__ctrl);
+ bool mcu = e->skb && !e->txwi;
+ bool first = e->skb == DMA_DUMMY_DATA || e->txwi == DMA_DUMMY_DATA ||
+ (e->skb && !skb_is_nonlinear(e->skb));

- if (!e->txwi || !e->skb) {
+ if (!first || mcu) {
__le32 addr = READ_ONCE(q->desc[idx].buf0);
u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl);

@@ -105,7 +108,8 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
DMA_TO_DEVICE);
}

- if (!(ctrl & MT_DMA_CTL_LAST_SEC0)) {
+ if (!(ctrl & MT_DMA_CTL_LAST_SEC0) ||
+ e->txwi == DMA_DUMMY_DATA) {
__le32 addr = READ_ONCE(q->desc[idx].buf1);
u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN1, ctrl);

--
2.21.0

2019-10-24 20:24:44

by Kalle Valo

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

Lorenzo Bianconi <[email protected]> writes:

> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> instability and so let's disable PCIE_ASPM by default. This patch has
> been successfully tested on U7612E-H1 mini-pice card
>
> Signed-off-by: Felix Fietkau <[email protected]>
> Signed-off-by: Lorenzo Bianconi <[email protected]>

[...]

> +void mt76_mmio_disable_aspm(struct pci_dev *pdev)
> +{
> + struct pci_dev *parent = pdev->bus->self;
> + u16 aspm_conf, parent_aspm_conf = 0;
> +
> + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> + aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> + if (parent) {
> + pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
> + &parent_aspm_conf);
> + parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> + }
> +
> + if (!aspm_conf && (!parent || !parent_aspm_conf)) {
> + /* aspm already disabled */
> + return;
> + }
> +
> + dev_info(&pdev->dev, "disabling ASPM %s %s\n",
> + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
> + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
> +
> +#ifdef CONFIG_PCIEASPM
> + pci_disable_link_state(pdev, aspm_conf);
> +
> + /* Double-check ASPM control. If not disabled by the above, the
> + * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
> + * not enabled); override by writing PCI config space directly.
> + */
> + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> + if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
> + return;
> +#endif /* CONFIG_PCIEASPM */

A minor comment, but 'if IS_ENABLED(CONFIG_PCIEASPM)' is preferred over
#ifdef. Better compiler coverage and so on.

--
https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches

2019-10-24 20:36:56

by Oleksandr Natalenko

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 0/2] fix mt76x2e hangs on U7612E mini-pcie

Hi.

On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> Various mt76x2e issues have been reported on U7612E mini-pcie card [1].
> On U7612E-H1 PCIE_ASPM causes continuous mcu hangs and instability and
> so patch 1/2 disable it by default.
> Moreover mt76 does not properly unmap dma buffers for non-linear skbs.
> This issue may result in hw hangs if the system relies on IOMMU.
> Patch 2/2 fix the problem properly unmapping data fragments on
> non-linear skbs.
>
> [1]:
> https://lore.kernel.org/netdev/[email protected]/
>
> Lorenzo Bianconi (2):
> mt76: mt76x2e: disable pcie_aspm by default
> mt76: dma: fix buffer unmap with non-linear skbs
>
> drivers/net/wireless/mediatek/mt76/dma.c | 10 ++--
> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
> 4 files changed, 57 insertions(+), 3 deletions(-)

Feel free to add my

Reported-by: Oleksandr Natalenko <[email protected]>
Tested-by: Oleksandr Natalenko <[email protected]>

and thanks for the submission.

--
Oleksandr Natalenko (post-factum)

2019-10-24 20:46:00

by Felix Fietkau

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 2/2] mt76: dma: fix buffer unmap with non-linear skbs

On 2019-10-24 00:23, Lorenzo Bianconi wrote:
> mt76 dma layer is supposed to unmap skb data buffers while keep txwi
> mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does
> not unmap data fragments in even positions for non-linear skbs. This
> issue may result in hw hangs with A-MSDU if the system relies on IOMMU
> or SWIOTLB. Fix this behaviour properly unmapping data fragments on
> non-linear skbs.
>
> Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets")
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/dma.c | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
> index c747eb24581c..8c27956875e7 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -93,11 +93,14 @@ static void
> mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
> struct mt76_queue_entry *prev_e)
> {
> - struct mt76_queue_entry *e = &q->entry[idx];
> __le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
> + struct mt76_queue_entry *e = &q->entry[idx];
> u32 ctrl = le32_to_cpu(__ctrl);
> + bool mcu = e->skb && !e->txwi;
> + bool first = e->skb == DMA_DUMMY_DATA || e->txwi == DMA_DUMMY_DATA ||
> + (e->skb && !skb_is_nonlinear(e->skb));
It seems to me that these conditions could be true not just for the
first entry, but also following entries except for the last one.
I think we should add a 'bool has_txwi' field in struct mt76_queue_entry
to indicate that the first dma address points to a txwi that should not
be unmapped.

- Felix

2019-10-24 23:04:18

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

> Lorenzo Bianconi <[email protected]> writes:
>
> > On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> > instability and so let's disable PCIE_ASPM by default. This patch has
> > been successfully tested on U7612E-H1 mini-pice card
> >
> > Signed-off-by: Felix Fietkau <[email protected]>
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
>
> [...]
>
> > +void mt76_mmio_disable_aspm(struct pci_dev *pdev)
> > +{
> > + struct pci_dev *parent = pdev->bus->self;
> > + u16 aspm_conf, parent_aspm_conf = 0;
> > +
> > + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> > + aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> > + if (parent) {
> > + pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
> > + &parent_aspm_conf);
> > + parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> > + }
> > +
> > + if (!aspm_conf && (!parent || !parent_aspm_conf)) {
> > + /* aspm already disabled */
> > + return;
> > + }
> > +
> > + dev_info(&pdev->dev, "disabling ASPM %s %s\n",
> > + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
> > + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
> > +
> > +#ifdef CONFIG_PCIEASPM
> > + pci_disable_link_state(pdev, aspm_conf);
> > +
> > + /* Double-check ASPM control. If not disabled by the above, the
> > + * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
> > + * not enabled); override by writing PCI config space directly.
> > + */
> > + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> > + if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
> > + return;
> > +#endif /* CONFIG_PCIEASPM */
>
> A minor comment, but 'if IS_ENABLED(CONFIG_PCIEASPM)' is preferred over
> #ifdef. Better compiler coverage and so on.

Hi Kalle,

ack, I will fix it in v2.

Regards,
Lorenzo

>
> --
> https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches


Attachments:
(No filename) (1.87 kB)
signature.asc (235.00 B)
Download all attachments

2019-10-24 23:08:28

by Stanislaw Gruszka

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 2/2] mt76: dma: fix buffer unmap with non-linear skbs

On Thu, Oct 24, 2019 at 12:23:16AM +0200, Lorenzo Bianconi wrote:
> mt76 dma layer is supposed to unmap skb data buffers while keep txwi
> mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does
> not unmap data fragments in even positions for non-linear skbs. This
> issue may result in hw hangs with A-MSDU if the system relies on IOMMU
> or SWIOTLB. Fix this behaviour properly unmapping data fragments on
> non-linear skbs.

If we have to keep txwi mapped, before unmap fragments, when then
txwi is unmaped ?

Stanislaw

> Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets")
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/dma.c | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
> index c747eb24581c..8c27956875e7 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -93,11 +93,14 @@ static void
> mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
> struct mt76_queue_entry *prev_e)
> {
> - struct mt76_queue_entry *e = &q->entry[idx];
> __le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
> + struct mt76_queue_entry *e = &q->entry[idx];
> u32 ctrl = le32_to_cpu(__ctrl);
> + bool mcu = e->skb && !e->txwi;
> + bool first = e->skb == DMA_DUMMY_DATA || e->txwi == DMA_DUMMY_DATA ||
> + (e->skb && !skb_is_nonlinear(e->skb));
>
> - if (!e->txwi || !e->skb) {
> + if (!first || mcu) {
> __le32 addr = READ_ONCE(q->desc[idx].buf0);
> u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl);
>
> @@ -105,7 +108,8 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
> DMA_TO_DEVICE);
> }
>
> - if (!(ctrl & MT_DMA_CTL_LAST_SEC0)) {
> + if (!(ctrl & MT_DMA_CTL_LAST_SEC0) ||
> + e->txwi == DMA_DUMMY_DATA) {
> __le32 addr = READ_ONCE(q->desc[idx].buf1);
> u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN1, ctrl);
>
> --
> 2.21.0
>

2019-10-24 23:14:47

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 2/2] mt76: dma: fix buffer unmap with non-linear skbs

On Oct 24, Felix Fietkau wrote:
> On 2019-10-24 00:23, Lorenzo Bianconi wrote:
> > mt76 dma layer is supposed to unmap skb data buffers while keep txwi
> > mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does
> > not unmap data fragments in even positions for non-linear skbs. This
> > issue may result in hw hangs with A-MSDU if the system relies on IOMMU
> > or SWIOTLB. Fix this behaviour properly unmapping data fragments on
> > non-linear skbs.
> >
> > Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets")
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> > drivers/net/wireless/mediatek/mt76/dma.c | 10 +++++++---
> > 1 file changed, 7 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
> > index c747eb24581c..8c27956875e7 100644
> > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > @@ -93,11 +93,14 @@ static void
> > mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
> > struct mt76_queue_entry *prev_e)
> > {
> > - struct mt76_queue_entry *e = &q->entry[idx];
> > __le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
> > + struct mt76_queue_entry *e = &q->entry[idx];
> > u32 ctrl = le32_to_cpu(__ctrl);
> > + bool mcu = e->skb && !e->txwi;
> > + bool first = e->skb == DMA_DUMMY_DATA || e->txwi == DMA_DUMMY_DATA ||
> > + (e->skb && !skb_is_nonlinear(e->skb));
> It seems to me that these conditions could be true not just for the
> first entry, but also following entries except for the last one.
> I think we should add a 'bool has_txwi' field in struct mt76_queue_entry
> to indicate that the first dma address points to a txwi that should not
> be unmapped.

I agree 'first' is misleading since this condition is used to unamp even the
very last data fragment if we have an even number of data fragments
(e.g linear area + one fragment). For the following entries except for the last
one 'first' is false since e->skb is NULL (e->skb is not NULL just for the very
last entry), right? Btw we can remove mcu bool.
In order to improve code readability I agree to add a bool in mt76_queue_entry to
unmap buf0. I will fix it in v2.

Regards,
Lorenzo

>
> - Felix


Attachments:
(No filename) (2.30 kB)
signature.asc (235.00 B)
Download all attachments

2019-10-24 23:14:54

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 2/2] mt76: dma: fix buffer unmap with non-linear skbs

> On Thu, Oct 24, 2019 at 12:23:16AM +0200, Lorenzo Bianconi wrote:
> > mt76 dma layer is supposed to unmap skb data buffers while keep txwi
> > mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does
> > not unmap data fragments in even positions for non-linear skbs. This
> > issue may result in hw hangs with A-MSDU if the system relies on IOMMU
> > or SWIOTLB. Fix this behaviour properly unmapping data fragments on
> > non-linear skbs.
>
> If we have to keep txwi mapped, before unmap fragments, when then
> txwi is unmaped ?

txwi are mapped when they are crated in mt76_alloc_txwi(). Whenever we need to
modify them we sync the DMA in mt76_dma_tx_queue_skb(). txwi are unmapped in
mt76_tx_free() at driver unload.

Regards,
Lorenzo

>
> Stanislaw
>
> > Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets")
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> > drivers/net/wireless/mediatek/mt76/dma.c | 10 +++++++---
> > 1 file changed, 7 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
> > index c747eb24581c..8c27956875e7 100644
> > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > @@ -93,11 +93,14 @@ static void
> > mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
> > struct mt76_queue_entry *prev_e)
> > {
> > - struct mt76_queue_entry *e = &q->entry[idx];
> > __le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
> > + struct mt76_queue_entry *e = &q->entry[idx];
> > u32 ctrl = le32_to_cpu(__ctrl);
> > + bool mcu = e->skb && !e->txwi;
> > + bool first = e->skb == DMA_DUMMY_DATA || e->txwi == DMA_DUMMY_DATA ||
> > + (e->skb && !skb_is_nonlinear(e->skb));
> >
> > - if (!e->txwi || !e->skb) {
> > + if (!first || mcu) {
> > __le32 addr = READ_ONCE(q->desc[idx].buf0);
> > u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl);
> >
> > @@ -105,7 +108,8 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
> > DMA_TO_DEVICE);
> > }
> >
> > - if (!(ctrl & MT_DMA_CTL_LAST_SEC0)) {
> > + if (!(ctrl & MT_DMA_CTL_LAST_SEC0) ||
> > + e->txwi == DMA_DUMMY_DATA) {
> > __le32 addr = READ_ONCE(q->desc[idx].buf1);
> > u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN1, ctrl);
> >
> > --
> > 2.21.0
> >
>


Attachments:
(No filename) (2.40 kB)
signature.asc (235.00 B)
Download all attachments

2019-10-25 18:57:50

by Heiner Kallweit

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> instability and so let's disable PCIE_ASPM by default. This patch has
> been successfully tested on U7612E-H1 mini-pice card
>
> Signed-off-by: Felix Fietkau <[email protected]>
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
> 3 files changed, 50 insertions(+)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/mmio.c b/drivers/net/wireless/mediatek/mt76/mmio.c
> index 1c974df1fe25..f991a8f1c42a 100644
> --- a/drivers/net/wireless/mediatek/mt76/mmio.c
> +++ b/drivers/net/wireless/mediatek/mt76/mmio.c
> @@ -3,6 +3,8 @@
> * Copyright (C) 2016 Felix Fietkau <[email protected]>
> */
>
> +#include <linux/pci.h>
> +
> #include "mt76.h"
> #include "trace.h"
>
> @@ -78,6 +80,51 @@ void mt76_set_irq_mask(struct mt76_dev *dev, u32 addr,
> }
> EXPORT_SYMBOL_GPL(mt76_set_irq_mask);
>
> +void mt76_mmio_disable_aspm(struct pci_dev *pdev)
> +{
> + struct pci_dev *parent = pdev->bus->self;
> + u16 aspm_conf, parent_aspm_conf = 0;
> +
> + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> + aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> + if (parent) {
> + pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
> + &parent_aspm_conf);
> + parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
> + }
> +
> + if (!aspm_conf && (!parent || !parent_aspm_conf)) {
> + /* aspm already disabled */
> + return;
> + }
> +
> + dev_info(&pdev->dev, "disabling ASPM %s %s\n",
> + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
> + (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
> +
> +#ifdef CONFIG_PCIEASPM
> + pci_disable_link_state(pdev, aspm_conf);
> +
> + /* Double-check ASPM control. If not disabled by the above, the
> + * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
> + * not enabled); override by writing PCI config space directly.
> + */
> + pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
> + if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
> + return;
> +#endif /* CONFIG_PCIEASPM */
> +
> + /* Both device and parent should have the same ASPM setting.
> + * Disable ASPM in downstream component first and then upstream.
> + */
> + pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
> +
> + if (parent)
> + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> + aspm_conf);

+ linux-pci mailing list

All this seems to be legacy code copied from e1000e.
Fiddling with the low-level PCI(e) registers should be left to the
PCI core. It shouldn't be needed here, a simple call to
pci_disable_link_state() should be sufficient. Note that this function
has a return value meanwhile that you can check instead of reading
back low-level registers.
If BIOS forbids that OS changes ASPM settings, then this should be
respected (like PCI core does). Instead the network chip may provide
the option to configure whether it activates certain ASPM (sub-)states
or not. We went through a similar exercise with the r8169 driver,
you can check how it's done there.

> +}
> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> +
> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> {
> static const struct mt76_bus_ops mt76_mmio_ops = {
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 570c159515a0..962812b6247d 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>
> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>
> static inline u16 mt76_chip(struct mt76_dev *dev)
> {
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> index 73c3104f8858..264bef87e5c7 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>
> + mt76_mmio_disable_aspm(pdev);
> +
> return 0;
>
> error:
>

2019-10-25 19:08:53

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> > On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> > instability and so let's disable PCIE_ASPM by default. This patch has
> > been successfully tested on U7612E-H1 mini-pice card
> >
> > Signed-off-by: Felix Fietkau <[email protected]>
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> > drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
> > drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> > .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
> > 3 files changed, 50 insertions(+)
> >

[...]

> > +
> > + if (parent)
> > + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> > + aspm_conf);
>
> + linux-pci mailing list

Hi Heiner,

>
> All this seems to be legacy code copied from e1000e.
> Fiddling with the low-level PCI(e) registers should be left to the
> PCI core. It shouldn't be needed here, a simple call to
> pci_disable_link_state() should be sufficient. Note that this function
> has a return value meanwhile that you can check instead of reading
> back low-level registers.

ack, I will add it to v2

> If BIOS forbids that OS changes ASPM settings, then this should be
> respected (like PCI core does). Instead the network chip may provide
> the option to configure whether it activates certain ASPM (sub-)states
> or not. We went through a similar exercise with the r8169 driver,
> you can check how it's done there.

looking at the vendor sdk (at least in the version I currently have) there are
no particular ASPM configurations, it just optionally disables it writing directly
in pci registers.
Moreover there are multiple drivers that are currently using this approach:
- ath9k in ath_pci_aspm_init()
- tg3 in tg3_chip_reset()
- e1000e in __e1000e_disable_aspm()
- r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()

Is disabling the ASPM for the system the only option to make this minipcie
work?

Regards,
Lorenzo

>
> > +}
> > +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> > +
> > void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> > {
> > static const struct mt76_bus_ops mt76_mmio_ops = {
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> > index 570c159515a0..962812b6247d 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> > +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> > @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> > #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >
> > void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> > +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >
> > static inline u16 mt76_chip(struct mt76_dev *dev)
> > {
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> > index 73c3104f8858..264bef87e5c7 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> > +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> > @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> > /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> > mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >
> > + mt76_mmio_disable_aspm(pdev);
> > +
> > return 0;
> >
> > error:
> >
>


Attachments:
(No filename) (3.37 kB)
signature.asc (235.00 B)
Download all attachments

2019-10-25 19:11:49

by Heiner Kallweit

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

On 24.10.2019 23:54, Lorenzo Bianconi wrote:
>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
>>> instability and so let's disable PCIE_ASPM by default. This patch has
>>> been successfully tested on U7612E-H1 mini-pice card
>>>
>>> Signed-off-by: Felix Fietkau <[email protected]>
>>> Signed-off-by: Lorenzo Bianconi <[email protected]>
>>> ---
>>> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
>>> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
>>> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
>>> 3 files changed, 50 insertions(+)
>>>
>
> [...]
>
>>> +
>>> + if (parent)
>>> + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
>>> + aspm_conf);
>>
>> + linux-pci mailing list
>
> Hi Heiner,
>
>>
>> All this seems to be legacy code copied from e1000e.
>> Fiddling with the low-level PCI(e) registers should be left to the
>> PCI core. It shouldn't be needed here, a simple call to
>> pci_disable_link_state() should be sufficient. Note that this function
>> has a return value meanwhile that you can check instead of reading
>> back low-level registers.
>
> ack, I will add it to v2
>
>> If BIOS forbids that OS changes ASPM settings, then this should be
>> respected (like PCI core does). Instead the network chip may provide
>> the option to configure whether it activates certain ASPM (sub-)states
>> or not. We went through a similar exercise with the r8169 driver,
>> you can check how it's done there.
>
> looking at the vendor sdk (at least in the version I currently have) there are
> no particular ASPM configurations, it just optionally disables it writing directly
> in pci registers.
> Moreover there are multiple drivers that are currently using this approach:
> - ath9k in ath_pci_aspm_init()
> - tg3 in tg3_chip_reset()
> - e1000e in __e1000e_disable_aspm()
> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
>
All these drivers include quite some legacy code. I can mainly speak for r8169:
First versions of the driver are almost as old as Linux. And even though I
refactored most of the driver still some legacy code for older chip versions
(like the two functions you mentioned) is included.

> Is disabling the ASPM for the system the only option to make this minipcie
> work?
>

No. What we do in r8169:

- call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
- If it returns 0, then ASPM (including the L1 sub-states) is disabled.
- If it returns an errno, then disabling ASPM failed (most likely due to
BIOS forbidding ASPM changes - pci_disable_link_state will spit out
a related warning). In this case r8169 configures the chip to not initiate
transitions to L0s/L1 (the other end of the link may still try to enter
ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
to avoid the ASPM-related problems with certain versions of this chip.
Maybe your HW provides similar functionality.

> Regards,
> Lorenzo
>
Heiner

>>
>>> +}
>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
>>> +
>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
>>> {
>>> static const struct mt76_bus_ops mt76_mmio_ops = {
>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>>> index 570c159515a0..962812b6247d 100644
>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
>>> #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>>>
>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>>>
>>> static inline u16 mt76_chip(struct mt76_dev *dev)
>>> {
>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>> index 73c3104f8858..264bef87e5c7 100644
>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>>> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>>>
>>> + mt76_mmio_disable_aspm(pdev);
>>> +
>>> return 0;
>>>
>>> error:
>>>
>>

2019-10-25 19:14:43

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
> >> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> >>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> >>> instability and so let's disable PCIE_ASPM by default. This patch has
> >>> been successfully tested on U7612E-H1 mini-pice card
> >>>
> >>> Signed-off-by: Felix Fietkau <[email protected]>
> >>> Signed-off-by: Lorenzo Bianconi <[email protected]>
> >>> ---
> >>> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
> >>> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> >>> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
> >>> 3 files changed, 50 insertions(+)
> >>>
> >
> > [...]
> >
> >>> +
> >>> + if (parent)
> >>> + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> >>> + aspm_conf);
> >>
> >> + linux-pci mailing list
> >
> > Hi Heiner,
> >
> >>
> >> All this seems to be legacy code copied from e1000e.
> >> Fiddling with the low-level PCI(e) registers should be left to the
> >> PCI core. It shouldn't be needed here, a simple call to
> >> pci_disable_link_state() should be sufficient. Note that this function
> >> has a return value meanwhile that you can check instead of reading
> >> back low-level registers.
> >
> > ack, I will add it to v2
> >
> >> If BIOS forbids that OS changes ASPM settings, then this should be
> >> respected (like PCI core does). Instead the network chip may provide
> >> the option to configure whether it activates certain ASPM (sub-)states
> >> or not. We went through a similar exercise with the r8169 driver,
> >> you can check how it's done there.
> >
> > looking at the vendor sdk (at least in the version I currently have) there are
> > no particular ASPM configurations, it just optionally disables it writing directly
> > in pci registers.
> > Moreover there are multiple drivers that are currently using this approach:
> > - ath9k in ath_pci_aspm_init()
> > - tg3 in tg3_chip_reset()
> > - e1000e in __e1000e_disable_aspm()
> > - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
> >
> All these drivers include quite some legacy code. I can mainly speak for r8169:
> First versions of the driver are almost as old as Linux. And even though I
> refactored most of the driver still some legacy code for older chip versions
> (like the two functions you mentioned) is included.
>
> > Is disabling the ASPM for the system the only option to make this minipcie
> > work?
> >
>
> No. What we do in r8169:
>
> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
> - If it returns an errno, then disabling ASPM failed (most likely due to
> BIOS forbidding ASPM changes - pci_disable_link_state will spit out
> a related warning). In this case r8169 configures the chip to not initiate
> transitions to L0s/L1 (the other end of the link may still try to enter
> ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
> to avoid the ASPM-related problems with certain versions of this chip.
> Maybe your HW provides similar functionality.

yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
unfortunately there is no specific code or documentation I can use for mt76x2e.
So as last chance I decided to disable ASPM directly (in this way the chip is
working fine).
Do you think a kernel parameter to disable ASPM directly would be acceptable?

Regards,
Lorenzo

>
> > Regards,
> > Lorenzo
> >
> Heiner
>
> >>
> >>> +}
> >>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> >>> +
> >>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> >>> {
> >>> static const struct mt76_bus_ops mt76_mmio_ops = {
> >>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>> index 570c159515a0..962812b6247d 100644
> >>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> >>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> >>> #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >>>
> >>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> >>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >>>
> >>> static inline u16 mt76_chip(struct mt76_dev *dev)
> >>> {
> >>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>> index 73c3104f8858..264bef87e5c7 100644
> >>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> >>> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >>>
> >>> + mt76_mmio_disable_aspm(pdev);
> >>> +
> >>> return 0;
> >>>
> >>> error:
> >>>
> >>
>


Attachments:
(No filename) (4.99 kB)
signature.asc (235.00 B)
Download all attachments

2019-10-25 19:33:56

by Stanislaw Gruszka

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 2/2] mt76: dma: fix buffer unmap with non-linear skbs

On Thu, Oct 24, 2019 at 11:01:48AM +0200, Lorenzo Bianconi wrote:
> > On Thu, Oct 24, 2019 at 12:23:16AM +0200, Lorenzo Bianconi wrote:
> > > mt76 dma layer is supposed to unmap skb data buffers while keep txwi
> > > mapped on hw dma ring. At the moment mt76 wrongly unmap txwi or does
> > > not unmap data fragments in even positions for non-linear skbs. This
> > > issue may result in hw hangs with A-MSDU if the system relies on IOMMU
> > > or SWIOTLB. Fix this behaviour properly unmapping data fragments on
> > > non-linear skbs.
> >
> > If we have to keep txwi mapped, before unmap fragments, when then
> > txwi is unmaped ?
>
> txwi are mapped when they are crated in mt76_alloc_txwi(). Whenever we need to
> modify them we sync the DMA in mt76_dma_tx_queue_skb(). txwi are unmapped in
> mt76_tx_free() at driver unload.

So not only txwi is wrongly unmapped on runtime, but we can call
dma_sync_single_for_cpu/device() after dma_unmap_single().
That serious bug, good you spotted it and fixed!

Stanislaw

2019-10-25 19:42:57

by Heiner Kallweit

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

On 25.10.2019 01:07, Lorenzo Bianconi wrote:
>> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
>>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
>>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
>>>>> instability and so let's disable PCIE_ASPM by default. This patch has
>>>>> been successfully tested on U7612E-H1 mini-pice card
>>>>>
>>>>> Signed-off-by: Felix Fietkau <[email protected]>
>>>>> Signed-off-by: Lorenzo Bianconi <[email protected]>
>>>>> ---
>>>>> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
>>>>> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
>>>>> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
>>>>> 3 files changed, 50 insertions(+)
>>>>>
>>>
>>> [...]
>>>
>>>>> +
>>>>> + if (parent)
>>>>> + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
>>>>> + aspm_conf);
>>>>
>>>> + linux-pci mailing list
>>>
>>> Hi Heiner,
>>>
>>>>
>>>> All this seems to be legacy code copied from e1000e.
>>>> Fiddling with the low-level PCI(e) registers should be left to the
>>>> PCI core. It shouldn't be needed here, a simple call to
>>>> pci_disable_link_state() should be sufficient. Note that this function
>>>> has a return value meanwhile that you can check instead of reading
>>>> back low-level registers.
>>>
>>> ack, I will add it to v2
>>>
>>>> If BIOS forbids that OS changes ASPM settings, then this should be
>>>> respected (like PCI core does). Instead the network chip may provide
>>>> the option to configure whether it activates certain ASPM (sub-)states
>>>> or not. We went through a similar exercise with the r8169 driver,
>>>> you can check how it's done there.
>>>
>>> looking at the vendor sdk (at least in the version I currently have) there are
>>> no particular ASPM configurations, it just optionally disables it writing directly
>>> in pci registers.
>>> Moreover there are multiple drivers that are currently using this approach:
>>> - ath9k in ath_pci_aspm_init()
>>> - tg3 in tg3_chip_reset()
>>> - e1000e in __e1000e_disable_aspm()
>>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
>>>
>> All these drivers include quite some legacy code. I can mainly speak for r8169:
>> First versions of the driver are almost as old as Linux. And even though I
>> refactored most of the driver still some legacy code for older chip versions
>> (like the two functions you mentioned) is included.
>>
>>> Is disabling the ASPM for the system the only option to make this minipcie
>>> work?
>>>
>>
>> No. What we do in r8169:
>>
>> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
>> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
>> - If it returns an errno, then disabling ASPM failed (most likely due to
>> BIOS forbidding ASPM changes - pci_disable_link_state will spit out
>> a related warning). In this case r8169 configures the chip to not initiate
>> transitions to L0s/L1 (the other end of the link may still try to enter
>> ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
>> to avoid the ASPM-related problems with certain versions of this chip.
>> Maybe your HW provides similar functionality.
>
> yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
> unfortunately there is no specific code or documentation I can use for mt76x2e.
> So as last chance I decided to disable ASPM directly (in this way the chip is
> working fine).
> Do you think a kernel parameter to disable ASPM directly would be acceptable?
>
Module parameters are not the preferred approach, even though some maintainers
may consider it acceptable. I think it should be ok if you disable ASPM per
default. Who wants ASPM can enable the individual states via brand-new
sysfs attributes (provided BIOS allows OS to control ASPM).
However changing ASPM settings via direct register writes may cause
inconsistencies between PCI core and actual settings.
I'm not sure whether there's any general best practice how to deal with the
scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
change ASPM settings.
Maybe the PCI guys can advise on these points.

> Regards,
> Lorenzo
>
Heiner

>>
>>> Regards,
>>> Lorenzo
>>>
>> Heiner
>>
>>>>
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
>>>>> +
>>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
>>>>> {
>>>>> static const struct mt76_bus_ops mt76_mmio_ops = {
>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>> index 570c159515a0..962812b6247d 100644
>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
>>>>> #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>>>>>
>>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
>>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>>>>>
>>>>> static inline u16 mt76_chip(struct mt76_dev *dev)
>>>>> {
>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>> index 73c3104f8858..264bef87e5c7 100644
>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>>> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>>>>> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>>>>>
>>>>> + mt76_mmio_disable_aspm(pdev);
>>>>> +
>>>>> return 0;
>>>>>
>>>>> error:
>>>>>
>>>>
>>

2019-10-25 19:56:39

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

> On 25.10.2019 01:07, Lorenzo Bianconi wrote:
> >> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
> >>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> >>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> >>>>> instability and so let's disable PCIE_ASPM by default. This patch has
> >>>>> been successfully tested on U7612E-H1 mini-pice card
> >>>>>
> >>>>> Signed-off-by: Felix Fietkau <[email protected]>
> >>>>> Signed-off-by: Lorenzo Bianconi <[email protected]>
> >>>>> ---
> >>>>> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
> >>>>> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> >>>>> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
> >>>>> 3 files changed, 50 insertions(+)
> >>>>>
> >>>
> >>> [...]
> >>>
> >>>>> +
> >>>>> + if (parent)
> >>>>> + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> >>>>> + aspm_conf);
> >>>>
> >>>> + linux-pci mailing list
> >>>
> >>> Hi Heiner,
> >>>
> >>>>
> >>>> All this seems to be legacy code copied from e1000e.
> >>>> Fiddling with the low-level PCI(e) registers should be left to the
> >>>> PCI core. It shouldn't be needed here, a simple call to
> >>>> pci_disable_link_state() should be sufficient. Note that this function
> >>>> has a return value meanwhile that you can check instead of reading
> >>>> back low-level registers.
> >>>
> >>> ack, I will add it to v2
> >>>
> >>>> If BIOS forbids that OS changes ASPM settings, then this should be
> >>>> respected (like PCI core does). Instead the network chip may provide
> >>>> the option to configure whether it activates certain ASPM (sub-)states
> >>>> or not. We went through a similar exercise with the r8169 driver,
> >>>> you can check how it's done there.
> >>>
> >>> looking at the vendor sdk (at least in the version I currently have) there are
> >>> no particular ASPM configurations, it just optionally disables it writing directly
> >>> in pci registers.
> >>> Moreover there are multiple drivers that are currently using this approach:
> >>> - ath9k in ath_pci_aspm_init()
> >>> - tg3 in tg3_chip_reset()
> >>> - e1000e in __e1000e_disable_aspm()
> >>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
> >>>
> >> All these drivers include quite some legacy code. I can mainly speak for r8169:
> >> First versions of the driver are almost as old as Linux. And even though I
> >> refactored most of the driver still some legacy code for older chip versions
> >> (like the two functions you mentioned) is included.
> >>
> >>> Is disabling the ASPM for the system the only option to make this minipcie
> >>> work?
> >>>
> >>
> >> No. What we do in r8169:
> >>
> >> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
> >> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
> >> - If it returns an errno, then disabling ASPM failed (most likely due to
> >> BIOS forbidding ASPM changes - pci_disable_link_state will spit out
> >> a related warning). In this case r8169 configures the chip to not initiate
> >> transitions to L0s/L1 (the other end of the link may still try to enter
> >> ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
> >> to avoid the ASPM-related problems with certain versions of this chip.
> >> Maybe your HW provides similar functionality.
> >
> > yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
> > unfortunately there is no specific code or documentation I can use for mt76x2e.
> > So as last chance I decided to disable ASPM directly (in this way the chip is
> > working fine).
> > Do you think a kernel parameter to disable ASPM directly would be acceptable?
> >
> Module parameters are not the preferred approach, even though some maintainers
> may consider it acceptable. I think it should be ok if you disable ASPM per
> default. Who wants ASPM can enable the individual states via brand-new
> sysfs attributes (provided BIOS allows OS to control ASPM).
> However changing ASPM settings via direct register writes may cause
> inconsistencies between PCI core and actual settings.
> I'm not sure whether there's any general best practice how to deal with the
> scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
> change ASPM settings.
> Maybe the PCI guys can advise on these points.

Hi Heiner,

I reviewed the mtk sdk and it seems mt7662/mt7612/mt7602 series does not
have hw pcie ps support (not sure if it just not implemented or so). In my
scenario without disabling ASPM the card does not work at all, so I guess we
can proceed with current approach and then try to understand if we can do
something better. What do you think?

@Ryder, Sean: do you have any hint on this topic?

Regards,
Lorenzo

>
> > Regards,
> > Lorenzo
> >
> Heiner
>
> >>
> >>> Regards,
> >>> Lorenzo
> >>>
> >> Heiner
> >>
> >>>>
> >>>>> +}
> >>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> >>>>> +
> >>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> >>>>> {
> >>>>> static const struct mt76_bus_ops mt76_mmio_ops = {
> >>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>> index 570c159515a0..962812b6247d 100644
> >>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> >>>>> #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >>>>>
> >>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> >>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >>>>>
> >>>>> static inline u16 mt76_chip(struct mt76_dev *dev)
> >>>>> {
> >>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>> index 73c3104f8858..264bef87e5c7 100644
> >>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>>>> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> >>>>> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >>>>>
> >>>>> + mt76_mmio_disable_aspm(pdev);
> >>>>> +
> >>>>> return 0;
> >>>>>
> >>>>> error:
> >>>>>
> >>>>
> >>
>


Attachments:
(No filename) (6.39 kB)
signature.asc (235.00 B)
Download all attachments

2019-10-25 20:46:13

by Heiner Kallweit

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

On 25.10.2019 13:46, Lorenzo Bianconi wrote:
>> On 25.10.2019 01:07, Lorenzo Bianconi wrote:
>>>> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
>>>>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
>>>>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
>>>>>>> instability and so let's disable PCIE_ASPM by default. This patch has
>>>>>>> been successfully tested on U7612E-H1 mini-pice card
>>>>>>>
>>>>>>> Signed-off-by: Felix Fietkau <[email protected]>
>>>>>>> Signed-off-by: Lorenzo Bianconi <[email protected]>
>>>>>>> ---
>>>>>>> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
>>>>>>> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
>>>>>>> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
>>>>>>> 3 files changed, 50 insertions(+)
>>>>>>>
>>>>>
>>>>> [...]
>>>>>
>>>>>>> +
>>>>>>> + if (parent)
>>>>>>> + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
>>>>>>> + aspm_conf);
>>>>>>
>>>>>> + linux-pci mailing list
>>>>>
>>>>> Hi Heiner,
>>>>>
>>>>>>
>>>>>> All this seems to be legacy code copied from e1000e.
>>>>>> Fiddling with the low-level PCI(e) registers should be left to the
>>>>>> PCI core. It shouldn't be needed here, a simple call to
>>>>>> pci_disable_link_state() should be sufficient. Note that this function
>>>>>> has a return value meanwhile that you can check instead of reading
>>>>>> back low-level registers.
>>>>>
>>>>> ack, I will add it to v2
>>>>>
>>>>>> If BIOS forbids that OS changes ASPM settings, then this should be
>>>>>> respected (like PCI core does). Instead the network chip may provide
>>>>>> the option to configure whether it activates certain ASPM (sub-)states
>>>>>> or not. We went through a similar exercise with the r8169 driver,
>>>>>> you can check how it's done there.
>>>>>
>>>>> looking at the vendor sdk (at least in the version I currently have) there are
>>>>> no particular ASPM configurations, it just optionally disables it writing directly
>>>>> in pci registers.
>>>>> Moreover there are multiple drivers that are currently using this approach:
>>>>> - ath9k in ath_pci_aspm_init()
>>>>> - tg3 in tg3_chip_reset()
>>>>> - e1000e in __e1000e_disable_aspm()
>>>>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
>>>>>
>>>> All these drivers include quite some legacy code. I can mainly speak for r8169:
>>>> First versions of the driver are almost as old as Linux. And even though I
>>>> refactored most of the driver still some legacy code for older chip versions
>>>> (like the two functions you mentioned) is included.
>>>>
>>>>> Is disabling the ASPM for the system the only option to make this minipcie
>>>>> work?
>>>>>
>>>>
>>>> No. What we do in r8169:
>>>>
>>>> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
>>>> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
>>>> - If it returns an errno, then disabling ASPM failed (most likely due to
>>>> BIOS forbidding ASPM changes - pci_disable_link_state will spit out
>>>> a related warning). In this case r8169 configures the chip to not initiate
>>>> transitions to L0s/L1 (the other end of the link may still try to enter
>>>> ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
>>>> to avoid the ASPM-related problems with certain versions of this chip.
>>>> Maybe your HW provides similar functionality.
>>>
>>> yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
>>> unfortunately there is no specific code or documentation I can use for mt76x2e.
>>> So as last chance I decided to disable ASPM directly (in this way the chip is
>>> working fine).
>>> Do you think a kernel parameter to disable ASPM directly would be acceptable?
>>>
>> Module parameters are not the preferred approach, even though some maintainers
>> may consider it acceptable. I think it should be ok if you disable ASPM per
>> default. Who wants ASPM can enable the individual states via brand-new
>> sysfs attributes (provided BIOS allows OS to control ASPM).
>> However changing ASPM settings via direct register writes may cause
>> inconsistencies between PCI core and actual settings.
>> I'm not sure whether there's any general best practice how to deal with the
>> scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
>> change ASPM settings.
>> Maybe the PCI guys can advise on these points.
>
> Hi Heiner,
>
> I reviewed the mtk sdk and it seems mt7662/mt7612/mt7602 series does not
> have hw pcie ps support (not sure if it just not implemented or so). In my
> scenario without disabling ASPM the card does not work at all, so I guess we
> can proceed with current approach and then try to understand if we can do
> something better. What do you think?
>

If there's no proper way to deal with the issue, then you may have to go with
the current approach. Just what you could do is calling pci_disable_link_state()
first and do the direct register modification limbo only as a fallback if
function returns an errno or CONFIG_PCIEASPM isn't defined.

It may make sense to change the return value of the pci_disable_link_state
dummy if CONFIG_PCIEASPM isn't defined: from 0 to e.g. -EOPNOTSUPP
Then in the driver it wouldn't be needed to check CONFIG_PCIEASPM.

Interestingly in mt76x2/pci.c there's the following:

/* Fix up ASPM configuration */

/* RG_SSUSB_G1_CDR_BIR_LTR = 0x9 */
mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9);

/* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */
mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf);

/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);

Would be helpful to know what this does and whether it may need to be adjusted.
At least in your case this fix attempt doesn't seem to be sufficient.


> @Ryder, Sean: do you have any hint on this topic?
>
> Regards,
> Lorenzo
>
>>
>>> Regards,
>>> Lorenzo
>>>
>> Heiner
>>
>>>>
>>>>> Regards,
>>>>> Lorenzo
>>>>>
>>>> Heiner
>>>>
>>>>>>
>>>>>>> +}
>>>>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
>>>>>>> +
>>>>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
>>>>>>> {
>>>>>>> static const struct mt76_bus_ops mt76_mmio_ops = {
>>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>>>> index 570c159515a0..962812b6247d 100644
>>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
>>>>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
>>>>>>> #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
>>>>>>>
>>>>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
>>>>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
>>>>>>>
>>>>>>> static inline u16 mt76_chip(struct mt76_dev *dev)
>>>>>>> {
>>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>>>> index 73c3104f8858..264bef87e5c7 100644
>>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
>>>>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>>>>> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
>>>>>>> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>>>>>>>
>>>>>>> + mt76_mmio_disable_aspm(pdev);
>>>>>>> +
>>>>>>> return 0;
>>>>>>>
>>>>>>> error:
>>>>>>>
>>>>>>
>>>>
>>

2019-10-25 20:49:20

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

>
> On 25.10.2019 13:46, Lorenzo Bianconi wrote:
> >> On 25.10.2019 01:07, Lorenzo Bianconi wrote:
> >>>> On 24.10.2019 23:54, Lorenzo Bianconi wrote:
> >>>>>> On 24.10.2019 00:23, Lorenzo Bianconi wrote:
> >>>>>>> On same device (e.g. U7612E-H1) PCIE_ASPM causes continuous mcu hangs and
> >>>>>>> instability and so let's disable PCIE_ASPM by default. This patch has
> >>>>>>> been successfully tested on U7612E-H1 mini-pice card
> >>>>>>>
> >>>>>>> Signed-off-by: Felix Fietkau <[email protected]>
> >>>>>>> Signed-off-by: Lorenzo Bianconi <[email protected]>
> >>>>>>> ---
> >>>>>>> drivers/net/wireless/mediatek/mt76/mmio.c | 47 +++++++++++++++++++
> >>>>>>> drivers/net/wireless/mediatek/mt76/mt76.h | 1 +
> >>>>>>> .../net/wireless/mediatek/mt76/mt76x2/pci.c | 2 +
> >>>>>>> 3 files changed, 50 insertions(+)
> >>>>>>>
> >>>>>
> >>>>> [...]
> >>>>>
> >>>>>>> +
> >>>>>>> + if (parent)
> >>>>>>> + pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
> >>>>>>> + aspm_conf);
> >>>>>>
> >>>>>> + linux-pci mailing list
> >>>>>
> >>>>> Hi Heiner,
> >>>>>
> >>>>>>
> >>>>>> All this seems to be legacy code copied from e1000e.
> >>>>>> Fiddling with the low-level PCI(e) registers should be left to the
> >>>>>> PCI core. It shouldn't be needed here, a simple call to
> >>>>>> pci_disable_link_state() should be sufficient. Note that this function
> >>>>>> has a return value meanwhile that you can check instead of reading
> >>>>>> back low-level registers.
> >>>>>
> >>>>> ack, I will add it to v2
> >>>>>
> >>>>>> If BIOS forbids that OS changes ASPM settings, then this should be
> >>>>>> respected (like PCI core does). Instead the network chip may provide
> >>>>>> the option to configure whether it activates certain ASPM (sub-)states
> >>>>>> or not. We went through a similar exercise with the r8169 driver,
> >>>>>> you can check how it's done there.
> >>>>>
> >>>>> looking at the vendor sdk (at least in the version I currently have) there are
> >>>>> no particular ASPM configurations, it just optionally disables it writing directly
> >>>>> in pci registers.
> >>>>> Moreover there are multiple drivers that are currently using this approach:
> >>>>> - ath9k in ath_pci_aspm_init()
> >>>>> - tg3 in tg3_chip_reset()
> >>>>> - e1000e in __e1000e_disable_aspm()
> >>>>> - r8169 in rtl_enable_clock_request()/rtl_disable_clock_request()
> >>>>>
> >>>> All these drivers include quite some legacy code. I can mainly speak for r8169:
> >>>> First versions of the driver are almost as old as Linux. And even though I
> >>>> refactored most of the driver still some legacy code for older chip versions
> >>>> (like the two functions you mentioned) is included.
> >>>>
> >>>>> Is disabling the ASPM for the system the only option to make this minipcie
> >>>>> work?
> >>>>>
> >>>>
> >>>> No. What we do in r8169:
> >>>>
> >>>> - call pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1)
> >>>> - If it returns 0, then ASPM (including the L1 sub-states) is disabled.
> >>>> - If it returns an errno, then disabling ASPM failed (most likely due to
> >>>> BIOS forbidding ASPM changes - pci_disable_link_state will spit out
> >>>> a related warning). In this case r8169 configures the chip to not initiate
> >>>> transitions to L0s/L1 (the other end of the link may still try to enter
> >>>> ASPM states). See rtl_hw_aspm_clkreq_enable(). That's sufficient
> >>>> to avoid the ASPM-related problems with certain versions of this chip.
> >>>> Maybe your HW provides similar functionality.
> >>>
> >>> yep, I looked at rtl_hw_aspm_clkreq_enable. This is more or less what I did but
> >>> unfortunately there is no specific code or documentation I can use for mt76x2e.
> >>> So as last chance I decided to disable ASPM directly (in this way the chip is
> >>> working fine).
> >>> Do you think a kernel parameter to disable ASPM directly would be acceptable?
> >>>
> >> Module parameters are not the preferred approach, even though some maintainers
> >> may consider it acceptable. I think it should be ok if you disable ASPM per
> >> default. Who wants ASPM can enable the individual states via brand-new
> >> sysfs attributes (provided BIOS allows OS to control ASPM).
> >> However changing ASPM settings via direct register writes may cause
> >> inconsistencies between PCI core and actual settings.
> >> I'm not sure whether there's any general best practice how to deal with the
> >> scenario that a device misbehaves with ASPM enabled and OS isn't allowed to
> >> change ASPM settings.
> >> Maybe the PCI guys can advise on these points.
> >
> > Hi Heiner,
> >
> > I reviewed the mtk sdk and it seems mt7662/mt7612/mt7602 series does not
> > have hw pcie ps support (not sure if it just not implemented or so). In my
> > scenario without disabling ASPM the card does not work at all, so I guess we
> > can proceed with current approach and then try to understand if we can do
> > something better. What do you think?
> >
>
> If there's no proper way to deal with the issue, then you may have to go with
> the current approach. Just what you could do is calling pci_disable_link_state()
> first and do the direct register modification limbo only as a fallback if
> function returns an errno or CONFIG_PCIEASPM isn't defined.

Hi Heiner,

ack, will do in v2

>
> It may make sense to change the return value of the pci_disable_link_state
> dummy if CONFIG_PCIEASPM isn't defined: from 0 to e.g. -EOPNOTSUPP
> Then in the driver it wouldn't be needed to check CONFIG_PCIEASPM.
>
> Interestingly in mt76x2/pci.c there's the following:
>
> /* Fix up ASPM configuration */
>
> /* RG_SSUSB_G1_CDR_BIR_LTR = 0x9 */
> mt76_rmw_field(dev, 0x15a10, 0x1f << 16, 0x9);
>
> /* RG_SSUSB_G1_CDR_BIC_LTR = 0xf */
> mt76_rmw_field(dev, 0x15a0c, 0xf << 28, 0xf);
>
> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
>
> Would be helpful to know what this does and whether it may need to be adjusted.
> At least in your case this fix attempt doesn't seem to be sufficient.

unfortunately not.

Regards,
Lorenzo

>
>
> > @Ryder, Sean: do you have any hint on this topic?
> >
> > Regards,
> > Lorenzo
> >
> >>
> >>> Regards,
> >>> Lorenzo
> >>>
> >> Heiner
> >>
> >>>>
> >>>>> Regards,
> >>>>> Lorenzo
> >>>>>
> >>>> Heiner
> >>>>
> >>>>>>
> >>>>>>> +}
> >>>>>>> +EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
> >>>>>>> +
> >>>>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs)
> >>>>>>> {
> >>>>>>> static const struct mt76_bus_ops mt76_mmio_ops = {
> >>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>>>> index 570c159515a0..962812b6247d 100644
> >>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> >>>>>>> @@ -578,6 +578,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
> >>>>>>> #define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
> >>>>>>>
> >>>>>>> void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
> >>>>>>> +void mt76_mmio_disable_aspm(struct pci_dev *pdev);
> >>>>>>>
> >>>>>>> static inline u16 mt76_chip(struct mt76_dev *dev)
> >>>>>>> {
> >>>>>>> diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>>>> index 73c3104f8858..264bef87e5c7 100644
> >>>>>>> --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>>>> +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
> >>>>>>> @@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >>>>>>> /* RG_SSUSB_CDR_BR_PE1D = 0x3 */
> >>>>>>> mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
> >>>>>>>
> >>>>>>> + mt76_mmio_disable_aspm(pdev);
> >>>>>>> +
> >>>>>>> return 0;
> >>>>>>>
> >>>>>>> error:
> >>>>>>>
> >>>>>>
> >>>>
> >>
>

2019-10-26 01:40:23

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH wireless-drivers 1/2] mt76: mt76x2e: disable pcie_aspm by default

Hi Lorenzo,

I love your patch! Yet something to improve:

[auto build test ERROR on wireless-drivers/master]

url: https://github.com/0day-ci/linux/commits/Lorenzo-Bianconi/fix-mt76x2e-hangs-on-U7612E-mini-pcie/20191026-054624
base: https://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git master
config: sh-allmodconfig (attached as .config)
compiler: sh4-linux-gcc (GCC) 7.4.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.4.0 make.cross ARCH=sh

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot <[email protected]>

All errors (new ones prefixed by >>):

drivers/net/wireless/mediatek/mt76/mmio.c: In function 'mt76_mmio_disable_aspm':
>> drivers/net/wireless/mediatek/mt76/mmio.c:88:2: error: implicit declaration of function 'pcie_capability_read_word'; did you mean 'has_capability_noaudit'? [-Werror=implicit-function-declaration]
pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
^~~~~~~~~~~~~~~~~~~~~~~~~
has_capability_noaudit
>> drivers/net/wireless/mediatek/mt76/mmio.c:120:2: error: implicit declaration of function 'pcie_capability_clear_word'; did you mean 'has_capability_noaudit'? [-Werror=implicit-function-declaration]
pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
^~~~~~~~~~~~~~~~~~~~~~~~~~
has_capability_noaudit
cc1: some warnings being treated as errors

vim +88 drivers/net/wireless/mediatek/mt76/mmio.c

82
83 void mt76_mmio_disable_aspm(struct pci_dev *pdev)
84 {
85 struct pci_dev *parent = pdev->bus->self;
86 u16 aspm_conf, parent_aspm_conf = 0;
87
> 88 pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
89 aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
90 if (parent) {
91 pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
92 &parent_aspm_conf);
93 parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
94 }
95
96 if (!aspm_conf && (!parent || !parent_aspm_conf)) {
97 /* aspm already disabled */
98 return;
99 }
100
101 dev_info(&pdev->dev, "disabling ASPM %s %s\n",
102 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
103 (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
104
105 #ifdef CONFIG_PCIEASPM
106 pci_disable_link_state(pdev, aspm_conf);
107
108 /* Double-check ASPM control. If not disabled by the above, the
109 * BIOS is preventing that from happening (or CONFIG_PCIEASPM is
110 * not enabled); override by writing PCI config space directly.
111 */
112 pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
113 if (!(aspm_conf & PCI_EXP_LNKCTL_ASPMC))
114 return;
115 #endif /* CONFIG_PCIEASPM */
116
117 /* Both device and parent should have the same ASPM setting.
118 * Disable ASPM in downstream component first and then upstream.
119 */
> 120 pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
121
122 if (parent)
123 pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
124 aspm_conf);
125 }
126 EXPORT_SYMBOL_GPL(mt76_mmio_disable_aspm);
127

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation


Attachments:
(No filename) (3.48 kB)
.config.gz (51.02 kB)
Download all attachments