2019-06-06 15:37:30

by Lorenzo Bianconi

[permalink] [raw]
Subject: [PATCH] mt7601u: fix possible memory leak when the device is disconnected

When the device is disconnected while passing traffic it is possible
to receive out of order urbs causing a memory leak since the skb liked
to the current tx urb is not removed. Fix the issue deallocating the skb
cleaning up the tx ring. Moreover this patch fixes the following kernel
warning

[ 57.480771] usb 1-1: USB disconnect, device number 2
[ 57.483451] ------------[ cut here ]------------
[ 57.483462] TX urb mismatch
[ 57.483481] WARNING: CPU: 1 PID: 32 at drivers/net/wireless/mediatek/mt7601u/dma.c:245 mt7601u_complete_tx+0x165/00
[ 57.483483] Modules linked in:
[ 57.483496] CPU: 1 PID: 32 Comm: kworker/1:1 Not tainted 5.2.0-rc1+ #72
[ 57.483498] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-2.fc30 04/01/2014
[ 57.483502] Workqueue: usb_hub_wq hub_event
[ 57.483507] RIP: 0010:mt7601u_complete_tx+0x165/0x1e0
[ 57.483510] Code: 8b b5 10 04 00 00 8b 8d 14 04 00 00 eb 8b 80 3d b1 cb e1 00 00 75 9e 48 c7 c7 a4 ea 05 82 c6 05 f
[ 57.483513] RSP: 0000:ffffc900000a0d28 EFLAGS: 00010092
[ 57.483516] RAX: 000000000000000f RBX: ffff88802c0a62c0 RCX: ffffc900000a0c2c
[ 57.483518] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff810a8371
[ 57.483520] RBP: ffff88803ced6858 R08: 0000000000000000 R09: 0000000000000001
[ 57.483540] R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000046
[ 57.483542] R13: ffff88802c0a6c88 R14: ffff88803baab540 R15: ffff88803a0cc078
[ 57.483548] FS: 0000000000000000(0000) GS:ffff88803eb00000(0000) knlGS:0000000000000000
[ 57.483550] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 57.483552] CR2: 000055e7f6780100 CR3: 0000000028c86000 CR4: 00000000000006a0
[ 57.483554] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 57.483556] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 57.483559] Call Trace:
[ 57.483561] <IRQ>
[ 57.483565] __usb_hcd_giveback_urb+0x77/0xe0
[ 57.483570] xhci_giveback_urb_in_irq.isra.0+0x8b/0x140
[ 57.483574] handle_cmd_completion+0xf5b/0x12c0
[ 57.483577] xhci_irq+0x1f6/0x1810
[ 57.483581] ? lockdep_hardirqs_on+0x9e/0x180
[ 57.483584] ? _raw_spin_unlock_irq+0x24/0x30
[ 57.483588] __handle_irq_event_percpu+0x3a/0x260
[ 57.483592] handle_irq_event_percpu+0x1c/0x60
[ 57.483595] handle_irq_event+0x2f/0x4c
[ 57.483599] handle_edge_irq+0x7e/0x1a0
[ 57.483603] handle_irq+0x17/0x20
[ 57.483607] do_IRQ+0x54/0x110
[ 57.483610] common_interrupt+0xf/0xf
[ 57.483612] </IRQ>

Fixes: c869f77d6abb ("add mt7601u driver")
Signed-off-by: Lorenzo Bianconi <[email protected]>
---
drivers/net/wireless/mediatek/mt7601u/dma.c | 24 +++++++++++++++------
drivers/net/wireless/mediatek/mt7601u/tx.c | 4 ++--
2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
index e7703990b291..bbf1deed7f3b 100644
--- a/drivers/net/wireless/mediatek/mt7601u/dma.c
+++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
@@ -238,14 +238,25 @@ static void mt7601u_complete_tx(struct urb *urb)
struct sk_buff *skb;
unsigned long flags;

- spin_lock_irqsave(&dev->tx_lock, flags);
+ switch (urb->status) {
+ case -ECONNRESET:
+ case -ESHUTDOWN:
+ case -ENOENT:
+ return;
+ default:
+ dev_err_ratelimited(dev->dev, "tx urb failed: %d\n",
+ urb->status);
+ /* fall through */
+ case 0:
+ break;
+ }

- if (mt7601u_urb_has_error(urb))
- dev_err(dev->dev, "Error: TX urb failed:%d\n", urb->status);
+ spin_lock_irqsave(&dev->tx_lock, flags);
if (WARN_ONCE(q->e[q->start].urb != urb, "TX urb mismatch"))
goto out;

skb = q->e[q->start].skb;
+ q->e[q->start].skb = NULL;
trace_mt_tx_dma_done(dev, skb);

__skb_queue_tail(&dev->tx_skb_done, skb);
@@ -446,10 +457,10 @@ static void mt7601u_free_tx_queue(struct mt7601u_tx_queue *q)
{
int i;

- WARN_ON(q->used);
-
for (i = 0; i < q->entries; i++) {
usb_poison_urb(q->e[i].urb);
+ if (q->e[i].skb)
+ mt7601u_tx_status(q->dev, q->e[i].skb);
usb_free_urb(q->e[i].urb);
}
}
@@ -463,6 +474,7 @@ static void mt7601u_free_tx(struct mt7601u_dev *dev)

for (i = 0; i < __MT_EP_OUT_MAX; i++)
mt7601u_free_tx_queue(&dev->tx_q[i]);
+ tasklet_kill(&dev->tx_tasklet);
}

static int mt7601u_alloc_tx_queue(struct mt7601u_dev *dev,
@@ -528,6 +540,4 @@ void mt7601u_dma_cleanup(struct mt7601u_dev *dev)

mt7601u_free_rx(dev);
mt7601u_free_tx(dev);
-
- tasklet_kill(&dev->tx_tasklet);
}
diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c
index 3600e911a63e..4d81c45722fb 100644
--- a/drivers/net/wireless/mediatek/mt7601u/tx.c
+++ b/drivers/net/wireless/mediatek/mt7601u/tx.c
@@ -117,9 +117,9 @@ void mt7601u_tx_status(struct mt7601u_dev *dev, struct sk_buff *skb)
info->status.rates[0].idx = -1;
info->flags |= IEEE80211_TX_STAT_ACK;

- spin_lock(&dev->mac_lock);
+ spin_lock_bh(&dev->mac_lock);
ieee80211_tx_status(dev->hw, skb);
- spin_unlock(&dev->mac_lock);
+ spin_unlock_bh(&dev->mac_lock);
}

static int mt7601u_skb_rooms(struct mt7601u_dev *dev, struct sk_buff *skb)
--
2.21.0


2019-06-06 20:16:23

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH] mt7601u: fix possible memory leak when the device is disconnected

On Thu, 6 Jun 2019 17:34:16 +0200, Lorenzo Bianconi wrote:
> When the device is disconnected while passing traffic it is possible
> to receive out of order urbs causing a memory leak since the skb liked
> to the current tx urb is not removed. Fix the issue deallocating the skb
> cleaning up the tx ring. Moreover this patch fixes the following kernel
> warning

Ugh if we don't have ordering guarantees then the entire "ring" scheme
no longer works :( Should we move to URB queues, I don't remember now,
but there seem to had been a better way to manage URBs :S

> [ 57.480771] usb 1-1: USB disconnect, device number 2
> [ 57.483451] ------------[ cut here ]------------
> [ 57.483462] TX urb mismatch
> [ 57.483481] WARNING: CPU: 1 PID: 32 at drivers/net/wireless/mediatek/mt7601u/dma.c:245 mt7601u_complete_tx+0x165/00
> [ 57.483483] Modules linked in:
> [ 57.483496] CPU: 1 PID: 32 Comm: kworker/1:1 Not tainted 5.2.0-rc1+ #72
> [ 57.483498] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-2.fc30 04/01/2014
> [ 57.483502] Workqueue: usb_hub_wq hub_event
> [ 57.483507] RIP: 0010:mt7601u_complete_tx+0x165/0x1e0
> [ 57.483510] Code: 8b b5 10 04 00 00 8b 8d 14 04 00 00 eb 8b 80 3d b1 cb e1 00 00 75 9e 48 c7 c7 a4 ea 05 82 c6 05 f
> [ 57.483513] RSP: 0000:ffffc900000a0d28 EFLAGS: 00010092
> [ 57.483516] RAX: 000000000000000f RBX: ffff88802c0a62c0 RCX: ffffc900000a0c2c
> [ 57.483518] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff810a8371
> [ 57.483520] RBP: ffff88803ced6858 R08: 0000000000000000 R09: 0000000000000001
> [ 57.483540] R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000046
> [ 57.483542] R13: ffff88802c0a6c88 R14: ffff88803baab540 R15: ffff88803a0cc078
> [ 57.483548] FS: 0000000000000000(0000) GS:ffff88803eb00000(0000) knlGS:0000000000000000
> [ 57.483550] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 57.483552] CR2: 000055e7f6780100 CR3: 0000000028c86000 CR4: 00000000000006a0
> [ 57.483554] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 57.483556] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 57.483559] Call Trace:
> [ 57.483561] <IRQ>
> [ 57.483565] __usb_hcd_giveback_urb+0x77/0xe0
> [ 57.483570] xhci_giveback_urb_in_irq.isra.0+0x8b/0x140
> [ 57.483574] handle_cmd_completion+0xf5b/0x12c0
> [ 57.483577] xhci_irq+0x1f6/0x1810
> [ 57.483581] ? lockdep_hardirqs_on+0x9e/0x180
> [ 57.483584] ? _raw_spin_unlock_irq+0x24/0x30
> [ 57.483588] __handle_irq_event_percpu+0x3a/0x260
> [ 57.483592] handle_irq_event_percpu+0x1c/0x60
> [ 57.483595] handle_irq_event+0x2f/0x4c
> [ 57.483599] handle_edge_irq+0x7e/0x1a0
> [ 57.483603] handle_irq+0x17/0x20
> [ 57.483607] do_IRQ+0x54/0x110
> [ 57.483610] common_interrupt+0xf/0xf
> [ 57.483612] </IRQ>
>
> Fixes: c869f77d6abb ("add mt7601u driver")
> Signed-off-by: Lorenzo Bianconi <[email protected]>
> ---
> drivers/net/wireless/mediatek/mt7601u/dma.c | 24 +++++++++++++++------
> drivers/net/wireless/mediatek/mt7601u/tx.c | 4 ++--
> 2 files changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
> index e7703990b291..bbf1deed7f3b 100644
> --- a/drivers/net/wireless/mediatek/mt7601u/dma.c
> +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
> @@ -238,14 +238,25 @@ static void mt7601u_complete_tx(struct urb *urb)
> struct sk_buff *skb;
> unsigned long flags;
>
> - spin_lock_irqsave(&dev->tx_lock, flags);
> + switch (urb->status) {
> + case -ECONNRESET:
> + case -ESHUTDOWN:
> + case -ENOENT:
> + return;
> + default:
> + dev_err_ratelimited(dev->dev, "tx urb failed: %d\n",
> + urb->status);
> + /* fall through */
> + case 0:
> + break;
> + }
>
> - if (mt7601u_urb_has_error(urb))
> - dev_err(dev->dev, "Error: TX urb failed:%d\n", urb->status);
> + spin_lock_irqsave(&dev->tx_lock, flags);
> if (WARN_ONCE(q->e[q->start].urb != urb, "TX urb mismatch"))
> goto out;
>
> skb = q->e[q->start].skb;
> + q->e[q->start].skb = NULL;
> trace_mt_tx_dma_done(dev, skb);
>
> __skb_queue_tail(&dev->tx_skb_done, skb);
> @@ -446,10 +457,10 @@ static void mt7601u_free_tx_queue(struct mt7601u_tx_queue *q)
> {
> int i;
>
> - WARN_ON(q->used);
> -
> for (i = 0; i < q->entries; i++) {
> usb_poison_urb(q->e[i].urb);
> + if (q->e[i].skb)
> + mt7601u_tx_status(q->dev, q->e[i].skb);

Perhaps a separate patch?

> usb_free_urb(q->e[i].urb);
> }
> }
> @@ -463,6 +474,7 @@ static void mt7601u_free_tx(struct mt7601u_dev *dev)
>
> for (i = 0; i < __MT_EP_OUT_MAX; i++)
> mt7601u_free_tx_queue(&dev->tx_q[i]);
> + tasklet_kill(&dev->tx_tasklet);
> }
>
> static int mt7601u_alloc_tx_queue(struct mt7601u_dev *dev,
> @@ -528,6 +540,4 @@ void mt7601u_dma_cleanup(struct mt7601u_dev *dev)
>
> mt7601u_free_rx(dev);
> mt7601u_free_tx(dev);
> -
> - tasklet_kill(&dev->tx_tasklet);
> }
> diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c
> index 3600e911a63e..4d81c45722fb 100644
> --- a/drivers/net/wireless/mediatek/mt7601u/tx.c
> +++ b/drivers/net/wireless/mediatek/mt7601u/tx.c
> @@ -117,9 +117,9 @@ void mt7601u_tx_status(struct mt7601u_dev *dev, struct sk_buff *skb)
> info->status.rates[0].idx = -1;
> info->flags |= IEEE80211_TX_STAT_ACK;
>
> - spin_lock(&dev->mac_lock);
> + spin_lock_bh(&dev->mac_lock);
> ieee80211_tx_status(dev->hw, skb);
> - spin_unlock(&dev->mac_lock);
> + spin_unlock_bh(&dev->mac_lock);
> }
>
> static int mt7601u_skb_rooms(struct mt7601u_dev *dev, struct sk_buff *skb)

2019-06-06 22:31:36

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH] mt7601u: fix possible memory leak when the device is disconnected

On Thu, Jun 6, 2019 at 8:49 PM Jakub Kicinski <[email protected]> wrote:
>
> On Thu, 6 Jun 2019 17:34:16 +0200, Lorenzo Bianconi wrote:
> > When the device is disconnected while passing traffic it is possible
> > to receive out of order urbs causing a memory leak since the skb liked
> > to the current tx urb is not removed. Fix the issue deallocating the skb
> > cleaning up the tx ring. Moreover this patch fixes the following kernel
> > warning
>
> Ugh if we don't have ordering guarantees then the entire "ring" scheme
> no longer works :( Should we move to URB queues, I don't remember now,
> but there seem to had been a better way to manage URBs :S
>

actually I have observed these issues on tx/rx side just during device
disconnection while passing traffic.
I guess we can assume a proper urb ordering during normal operation
(and so tx/rx ring works fine).
Btw what do you mean with 'URB queues'? :)

> > [ 57.480771] usb 1-1: USB disconnect, device number 2
> > [ 57.483451] ------------[ cut here ]------------
> > [ 57.483462] TX urb mismatch
> > [ 57.483481] WARNING: CPU: 1 PID: 32 at drivers/net/wireless/mediatek/mt7601u/dma.c:245 mt7601u_complete_tx+0x165/00
> > [ 57.483483] Modules linked in:
> > [ 57.483496] CPU: 1 PID: 32 Comm: kworker/1:1 Not tainted 5.2.0-rc1+ #72
> > [ 57.483498] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-2.fc30 04/01/2014
> > [ 57.483502] Workqueue: usb_hub_wq hub_event
> > [ 57.483507] RIP: 0010:mt7601u_complete_tx+0x165/0x1e0
> > [ 57.483510] Code: 8b b5 10 04 00 00 8b 8d 14 04 00 00 eb 8b 80 3d b1 cb e1 00 00 75 9e 48 c7 c7 a4 ea 05 82 c6 05 f
> > [ 57.483513] RSP: 0000:ffffc900000a0d28 EFLAGS: 00010092
> > [ 57.483516] RAX: 000000000000000f RBX: ffff88802c0a62c0 RCX: ffffc900000a0c2c
> > [ 57.483518] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff810a8371
> > [ 57.483520] RBP: ffff88803ced6858 R08: 0000000000000000 R09: 0000000000000001
> > [ 57.483540] R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000046
> > [ 57.483542] R13: ffff88802c0a6c88 R14: ffff88803baab540 R15: ffff88803a0cc078
> > [ 57.483548] FS: 0000000000000000(0000) GS:ffff88803eb00000(0000) knlGS:0000000000000000
> > [ 57.483550] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 57.483552] CR2: 000055e7f6780100 CR3: 0000000028c86000 CR4: 00000000000006a0
> > [ 57.483554] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > [ 57.483556] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > [ 57.483559] Call Trace:
> > [ 57.483561] <IRQ>
> > [ 57.483565] __usb_hcd_giveback_urb+0x77/0xe0
> > [ 57.483570] xhci_giveback_urb_in_irq.isra.0+0x8b/0x140
> > [ 57.483574] handle_cmd_completion+0xf5b/0x12c0
> > [ 57.483577] xhci_irq+0x1f6/0x1810
> > [ 57.483581] ? lockdep_hardirqs_on+0x9e/0x180
> > [ 57.483584] ? _raw_spin_unlock_irq+0x24/0x30
> > [ 57.483588] __handle_irq_event_percpu+0x3a/0x260
> > [ 57.483592] handle_irq_event_percpu+0x1c/0x60
> > [ 57.483595] handle_irq_event+0x2f/0x4c
> > [ 57.483599] handle_edge_irq+0x7e/0x1a0
> > [ 57.483603] handle_irq+0x17/0x20
> > [ 57.483607] do_IRQ+0x54/0x110
> > [ 57.483610] common_interrupt+0xf/0xf
> > [ 57.483612] </IRQ>
> >
> > Fixes: c869f77d6abb ("add mt7601u driver")
> > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > ---
> > drivers/net/wireless/mediatek/mt7601u/dma.c | 24 +++++++++++++++------
> > drivers/net/wireless/mediatek/mt7601u/tx.c | 4 ++--
> > 2 files changed, 19 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
> > index e7703990b291..bbf1deed7f3b 100644
> > --- a/drivers/net/wireless/mediatek/mt7601u/dma.c
> > +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
> > @@ -238,14 +238,25 @@ static void mt7601u_complete_tx(struct urb *urb)
> > struct sk_buff *skb;
> > unsigned long flags;
> >
> > - spin_lock_irqsave(&dev->tx_lock, flags);
> > + switch (urb->status) {
> > + case -ECONNRESET:
> > + case -ESHUTDOWN:
> > + case -ENOENT:
> > + return;
> > + default:
> > + dev_err_ratelimited(dev->dev, "tx urb failed: %d\n",
> > + urb->status);
> > + /* fall through */
> > + case 0:
> > + break;
> > + }
> >
> > - if (mt7601u_urb_has_error(urb))
> > - dev_err(dev->dev, "Error: TX urb failed:%d\n", urb->status);
> > + spin_lock_irqsave(&dev->tx_lock, flags);
> > if (WARN_ONCE(q->e[q->start].urb != urb, "TX urb mismatch"))
> > goto out;
> >
> > skb = q->e[q->start].skb;
> > + q->e[q->start].skb = NULL;
> > trace_mt_tx_dma_done(dev, skb);
> >
> > __skb_queue_tail(&dev->tx_skb_done, skb);
> > @@ -446,10 +457,10 @@ static void mt7601u_free_tx_queue(struct mt7601u_tx_queue *q)
> > {
> > int i;
> >
> > - WARN_ON(q->used);
> > -
> > for (i = 0; i < q->entries; i++) {
> > usb_poison_urb(q->e[i].urb);
> > + if (q->e[i].skb)
> > + mt7601u_tx_status(q->dev, q->e[i].skb);
>
> Perhaps a separate patch?
>

As I did for rx side, if we do not schedule the tx tasklet when the
device has been disconnected I guess we need this chuck in the same
patch. What do you think?

Regards,
Lorenzo

> > usb_free_urb(q->e[i].urb);
> > }
> > }
> > @@ -463,6 +474,7 @@ static void mt7601u_free_tx(struct mt7601u_dev *dev)
> >
> > for (i = 0; i < __MT_EP_OUT_MAX; i++)
> > mt7601u_free_tx_queue(&dev->tx_q[i]);
> > + tasklet_kill(&dev->tx_tasklet);
> > }
> >
> > static int mt7601u_alloc_tx_queue(struct mt7601u_dev *dev,
> > @@ -528,6 +540,4 @@ void mt7601u_dma_cleanup(struct mt7601u_dev *dev)
> >
> > mt7601u_free_rx(dev);
> > mt7601u_free_tx(dev);
> > -
> > - tasklet_kill(&dev->tx_tasklet);
> > }
> > diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c
> > index 3600e911a63e..4d81c45722fb 100644
> > --- a/drivers/net/wireless/mediatek/mt7601u/tx.c
> > +++ b/drivers/net/wireless/mediatek/mt7601u/tx.c
> > @@ -117,9 +117,9 @@ void mt7601u_tx_status(struct mt7601u_dev *dev, struct sk_buff *skb)
> > info->status.rates[0].idx = -1;
> > info->flags |= IEEE80211_TX_STAT_ACK;
> >
> > - spin_lock(&dev->mac_lock);
> > + spin_lock_bh(&dev->mac_lock);
> > ieee80211_tx_status(dev->hw, skb);
> > - spin_unlock(&dev->mac_lock);
> > + spin_unlock_bh(&dev->mac_lock);
> > }
> >
> > static int mt7601u_skb_rooms(struct mt7601u_dev *dev, struct sk_buff *skb)
>

2019-06-06 22:35:22

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH] mt7601u: fix possible memory leak when the device is disconnected

On Thu, 6 Jun 2019 23:10:15 +0200, Lorenzo Bianconi wrote:
> On Thu, Jun 6, 2019 at 8:49 PM Jakub Kicinski <[email protected]> wrote:
> >
> > On Thu, 6 Jun 2019 17:34:16 +0200, Lorenzo Bianconi wrote:
> > > When the device is disconnected while passing traffic it is possible
> > > to receive out of order urbs causing a memory leak since the skb liked
> > > to the current tx urb is not removed. Fix the issue deallocating the skb
> > > cleaning up the tx ring. Moreover this patch fixes the following kernel
> > > warning
> >
> > Ugh if we don't have ordering guarantees then the entire "ring" scheme
> > no longer works :( Should we move to URB queues, I don't remember now,
> > but there seem to had been a better way to manage URBs :S
> >
>
> actually I have observed these issues on tx/rx side just during device
> disconnection while passing traffic.
> I guess we can assume a proper urb ordering during normal operation
> (and so tx/rx ring works fine).

Ah, phew, okay. That's fine then.

> Btw what do you mean with 'URB queues'? :)

I think it may have been URB anchors. I remember looking at carl9170
and liking how the DMAs operated there.

> > > [ 57.480771] usb 1-1: USB disconnect, device number 2
> > > [ 57.483451] ------------[ cut here ]------------
> > > [ 57.483462] TX urb mismatch
> > > [ 57.483481] WARNING: CPU: 1 PID: 32 at drivers/net/wireless/mediatek/mt7601u/dma.c:245 mt7601u_complete_tx+0x165/00
> > > [ 57.483483] Modules linked in:
> > > [ 57.483496] CPU: 1 PID: 32 Comm: kworker/1:1 Not tainted 5.2.0-rc1+ #72
> > > [ 57.483498] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-2.fc30 04/01/2014
> > > [ 57.483502] Workqueue: usb_hub_wq hub_event
> > > [ 57.483507] RIP: 0010:mt7601u_complete_tx+0x165/0x1e0
> > > [ 57.483510] Code: 8b b5 10 04 00 00 8b 8d 14 04 00 00 eb 8b 80 3d b1 cb e1 00 00 75 9e 48 c7 c7 a4 ea 05 82 c6 05 f
> > > [ 57.483513] RSP: 0000:ffffc900000a0d28 EFLAGS: 00010092
> > > [ 57.483516] RAX: 000000000000000f RBX: ffff88802c0a62c0 RCX: ffffc900000a0c2c
> > > [ 57.483518] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff810a8371
> > > [ 57.483520] RBP: ffff88803ced6858 R08: 0000000000000000 R09: 0000000000000001
> > > [ 57.483540] R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000046
> > > [ 57.483542] R13: ffff88802c0a6c88 R14: ffff88803baab540 R15: ffff88803a0cc078
> > > [ 57.483548] FS: 0000000000000000(0000) GS:ffff88803eb00000(0000) knlGS:0000000000000000
> > > [ 57.483550] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [ 57.483552] CR2: 000055e7f6780100 CR3: 0000000028c86000 CR4: 00000000000006a0
> > > [ 57.483554] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > [ 57.483556] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > [ 57.483559] Call Trace:
> > > [ 57.483561] <IRQ>
> > > [ 57.483565] __usb_hcd_giveback_urb+0x77/0xe0
> > > [ 57.483570] xhci_giveback_urb_in_irq.isra.0+0x8b/0x140
> > > [ 57.483574] handle_cmd_completion+0xf5b/0x12c0
> > > [ 57.483577] xhci_irq+0x1f6/0x1810
> > > [ 57.483581] ? lockdep_hardirqs_on+0x9e/0x180
> > > [ 57.483584] ? _raw_spin_unlock_irq+0x24/0x30
> > > [ 57.483588] __handle_irq_event_percpu+0x3a/0x260
> > > [ 57.483592] handle_irq_event_percpu+0x1c/0x60
> > > [ 57.483595] handle_irq_event+0x2f/0x4c
> > > [ 57.483599] handle_edge_irq+0x7e/0x1a0
> > > [ 57.483603] handle_irq+0x17/0x20
> > > [ 57.483607] do_IRQ+0x54/0x110
> > > [ 57.483610] common_interrupt+0xf/0xf
> > > [ 57.483612] </IRQ>
> > >
> > > Fixes: c869f77d6abb ("add mt7601u driver")
> > > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > > ---
> > > drivers/net/wireless/mediatek/mt7601u/dma.c | 24 +++++++++++++++------
> > > drivers/net/wireless/mediatek/mt7601u/tx.c | 4 ++--
> > > 2 files changed, 19 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
> > > index e7703990b291..bbf1deed7f3b 100644
> > > --- a/drivers/net/wireless/mediatek/mt7601u/dma.c
> > > +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
> > > @@ -238,14 +238,25 @@ static void mt7601u_complete_tx(struct urb *urb)
> > > struct sk_buff *skb;
> > > unsigned long flags;
> > >
> > > - spin_lock_irqsave(&dev->tx_lock, flags);
> > > + switch (urb->status) {
> > > + case -ECONNRESET:
> > > + case -ESHUTDOWN:
> > > + case -ENOENT:
> > > + return;
> > > + default:
> > > + dev_err_ratelimited(dev->dev, "tx urb failed: %d\n",
> > > + urb->status);
> > > + /* fall through */
> > > + case 0:
> > > + break;
> > > + }
> > >
> > > - if (mt7601u_urb_has_error(urb))
> > > - dev_err(dev->dev, "Error: TX urb failed:%d\n", urb->status);
> > > + spin_lock_irqsave(&dev->tx_lock, flags);
> > > if (WARN_ONCE(q->e[q->start].urb != urb, "TX urb mismatch"))
> > > goto out;
> > >
> > > skb = q->e[q->start].skb;
> > > + q->e[q->start].skb = NULL;
> > > trace_mt_tx_dma_done(dev, skb);
> > >
> > > __skb_queue_tail(&dev->tx_skb_done, skb);
> > > @@ -446,10 +457,10 @@ static void mt7601u_free_tx_queue(struct mt7601u_tx_queue *q)
> > > {
> > > int i;
> > >
> > > - WARN_ON(q->used);
> > > -
> > > for (i = 0; i < q->entries; i++) {
> > > usb_poison_urb(q->e[i].urb);
> > > + if (q->e[i].skb)
> > > + mt7601u_tx_status(q->dev, q->e[i].skb);
> >
> > Perhaps a separate patch?
>
> As I did for rx side, if we do not schedule the tx tasklet when the
> device has been disconnected I guess we need this chuck in the same
> patch. What do you think?

Yeah, I see.

2019-06-06 22:40:50

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH] mt7601u: fix possible memory leak when the device is disconnected

On Fri, 7 Jun 2019 00:05:56 +0200, Lorenzo Bianconi wrote:
> I guess this can be a future improvement, do you agree?

Def.

> Do I need to resubmit this patch?

My only concern is the Fixes tag, then. In the good old days we could
just wait a bit ourselves, now some bot will autoselect it, ugh.

But up to you, for the code:

Acked-by: Jakub Kicinski <[email protected]>

2019-06-06 22:40:59

by Lorenzo Bianconi

[permalink] [raw]
Subject: Re: [PATCH] mt7601u: fix possible memory leak when the device is disconnected

>
> On Thu, 6 Jun 2019 23:10:15 +0200, Lorenzo Bianconi wrote:
> > On Thu, Jun 6, 2019 at 8:49 PM Jakub Kicinski <[email protected]> wrote:
> > >
> > > On Thu, 6 Jun 2019 17:34:16 +0200, Lorenzo Bianconi wrote:
> > > > When the device is disconnected while passing traffic it is possible
> > > > to receive out of order urbs causing a memory leak since the skb liked
> > > > to the current tx urb is not removed. Fix the issue deallocating the skb
> > > > cleaning up the tx ring. Moreover this patch fixes the following kernel
> > > > warning
> > >
> > > Ugh if we don't have ordering guarantees then the entire "ring" scheme
> > > no longer works :( Should we move to URB queues, I don't remember now,
> > > but there seem to had been a better way to manage URBs :S
> > >
> >
> > actually I have observed these issues on tx/rx side just during device
> > disconnection while passing traffic.
> > I guess we can assume a proper urb ordering during normal operation
> > (and so tx/rx ring works fine).
>
> Ah, phew, okay. That's fine then.
>
> > Btw what do you mean with 'URB queues'? :)
>
> I think it may have been URB anchors. I remember looking at carl9170
> and liking how the DMAs operated there.

Uhm, interesting. AFAIK urb anchors are used to cease I/O operations
but maybe they can be used even for other purposes (it will be
interesting even for mt76).
I guess this can be a future improvement, do you agree? Do I need to
resubmit this patch?

Regards,
Lorenzo

>
> > > > [ 57.480771] usb 1-1: USB disconnect, device number 2
> > > > [ 57.483451] ------------[ cut here ]------------
> > > > [ 57.483462] TX urb mismatch
> > > > [ 57.483481] WARNING: CPU: 1 PID: 32 at drivers/net/wireless/mediatek/mt7601u/dma.c:245 mt7601u_complete_tx+0x165/00
> > > > [ 57.483483] Modules linked in:
> > > > [ 57.483496] CPU: 1 PID: 32 Comm: kworker/1:1 Not tainted 5.2.0-rc1+ #72
> > > > [ 57.483498] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.12.0-2.fc30 04/01/2014
> > > > [ 57.483502] Workqueue: usb_hub_wq hub_event
> > > > [ 57.483507] RIP: 0010:mt7601u_complete_tx+0x165/0x1e0
> > > > [ 57.483510] Code: 8b b5 10 04 00 00 8b 8d 14 04 00 00 eb 8b 80 3d b1 cb e1 00 00 75 9e 48 c7 c7 a4 ea 05 82 c6 05 f
> > > > [ 57.483513] RSP: 0000:ffffc900000a0d28 EFLAGS: 00010092
> > > > [ 57.483516] RAX: 000000000000000f RBX: ffff88802c0a62c0 RCX: ffffc900000a0c2c
> > > > [ 57.483518] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff810a8371
> > > > [ 57.483520] RBP: ffff88803ced6858 R08: 0000000000000000 R09: 0000000000000001
> > > > [ 57.483540] R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000046
> > > > [ 57.483542] R13: ffff88802c0a6c88 R14: ffff88803baab540 R15: ffff88803a0cc078
> > > > [ 57.483548] FS: 0000000000000000(0000) GS:ffff88803eb00000(0000) knlGS:0000000000000000
> > > > [ 57.483550] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > > [ 57.483552] CR2: 000055e7f6780100 CR3: 0000000028c86000 CR4: 00000000000006a0
> > > > [ 57.483554] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > > [ 57.483556] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> > > > [ 57.483559] Call Trace:
> > > > [ 57.483561] <IRQ>
> > > > [ 57.483565] __usb_hcd_giveback_urb+0x77/0xe0
> > > > [ 57.483570] xhci_giveback_urb_in_irq.isra.0+0x8b/0x140
> > > > [ 57.483574] handle_cmd_completion+0xf5b/0x12c0
> > > > [ 57.483577] xhci_irq+0x1f6/0x1810
> > > > [ 57.483581] ? lockdep_hardirqs_on+0x9e/0x180
> > > > [ 57.483584] ? _raw_spin_unlock_irq+0x24/0x30
> > > > [ 57.483588] __handle_irq_event_percpu+0x3a/0x260
> > > > [ 57.483592] handle_irq_event_percpu+0x1c/0x60
> > > > [ 57.483595] handle_irq_event+0x2f/0x4c
> > > > [ 57.483599] handle_edge_irq+0x7e/0x1a0
> > > > [ 57.483603] handle_irq+0x17/0x20
> > > > [ 57.483607] do_IRQ+0x54/0x110
> > > > [ 57.483610] common_interrupt+0xf/0xf
> > > > [ 57.483612] </IRQ>
> > > >
> > > > Fixes: c869f77d6abb ("add mt7601u driver")
> > > > Signed-off-by: Lorenzo Bianconi <[email protected]>
> > > > ---
> > > > drivers/net/wireless/mediatek/mt7601u/dma.c | 24 +++++++++++++++------
> > > > drivers/net/wireless/mediatek/mt7601u/tx.c | 4 ++--
> > > > 2 files changed, 19 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
> > > > index e7703990b291..bbf1deed7f3b 100644
> > > > --- a/drivers/net/wireless/mediatek/mt7601u/dma.c
> > > > +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
> > > > @@ -238,14 +238,25 @@ static void mt7601u_complete_tx(struct urb *urb)
> > > > struct sk_buff *skb;
> > > > unsigned long flags;
> > > >
> > > > - spin_lock_irqsave(&dev->tx_lock, flags);
> > > > + switch (urb->status) {
> > > > + case -ECONNRESET:
> > > > + case -ESHUTDOWN:
> > > > + case -ENOENT:
> > > > + return;
> > > > + default:
> > > > + dev_err_ratelimited(dev->dev, "tx urb failed: %d\n",
> > > > + urb->status);
> > > > + /* fall through */
> > > > + case 0:
> > > > + break;
> > > > + }
> > > >
> > > > - if (mt7601u_urb_has_error(urb))
> > > > - dev_err(dev->dev, "Error: TX urb failed:%d\n", urb->status);
> > > > + spin_lock_irqsave(&dev->tx_lock, flags);
> > > > if (WARN_ONCE(q->e[q->start].urb != urb, "TX urb mismatch"))
> > > > goto out;
> > > >
> > > > skb = q->e[q->start].skb;
> > > > + q->e[q->start].skb = NULL;
> > > > trace_mt_tx_dma_done(dev, skb);
> > > >
> > > > __skb_queue_tail(&dev->tx_skb_done, skb);
> > > > @@ -446,10 +457,10 @@ static void mt7601u_free_tx_queue(struct mt7601u_tx_queue *q)
> > > > {
> > > > int i;
> > > >
> > > > - WARN_ON(q->used);
> > > > -
> > > > for (i = 0; i < q->entries; i++) {
> > > > usb_poison_urb(q->e[i].urb);
> > > > + if (q->e[i].skb)
> > > > + mt7601u_tx_status(q->dev, q->e[i].skb);
> > >
> > > Perhaps a separate patch?
> >
> > As I did for rx side, if we do not schedule the tx tasklet when the
> > device has been disconnected I guess we need this chuck in the same
> > patch. What do you think?
>
> Yeah, I see.