2020-02-19 10:54:09

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net 0/4] net: ll_temac: Bugfixes

Fix a number of bugs which have been present since the first commit.

The bugs fixed in patch 1,2 and 4 have all been observed in real systems, and
was relatively easy to reproduce given an appropriate stress setup.

Esben Haabendal (4):
net: ll_temac: Fix race condition causing TX hang
net: ll_temac: Add more error handling of dma_map_single() calls
net: ll_temac: Fix RX buffer descriptor handling on GFP_ATOMIC
pressure
net: ll_temac: Handle DMA halt condition caused by buffer underrun

drivers/net/ethernet/xilinx/ll_temac.h | 4 +
drivers/net/ethernet/xilinx/ll_temac_main.c | 204 ++++++++++++++++----
2 files changed, 170 insertions(+), 38 deletions(-)

--
2.25.0


2020-02-19 10:54:48

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net 3/4] net: ll_temac: Fix RX buffer descriptor handling on GFP_ATOMIC pressure

Failures caused by GFP_ATOMIC memory pressure have been observed, and
due to the missing error handling, results in kernel crash such as

[1876998.350133] kernel BUG at mm/slub.c:3952!
[1876998.350141] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[1876998.350147] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.3.0-scnxt #1
[1876998.350150] Hardware name: N/A N/A/COMe-bIP2, BIOS CCR2R920 03/01/2017
[1876998.350160] RIP: 0010:kfree+0x1ca/0x220
[1876998.350164] Code: 85 db 74 49 48 8b 95 68 01 00 00 48 31 c2 48 89 10 e9 d7 fe ff ff 49 8b 04 24 a9 00 00 01 00 75 0b 49 8b 44 24 08 a8 01 75 02 <0f> 0b 49 8b 04 24 31 f6 a9 00 00 01 00 74 06 41 0f b6 74 24
5b
[1876998.350172] RSP: 0018:ffffc900000f0df0 EFLAGS: 00010246
[1876998.350177] RAX: ffffea00027f0708 RBX: ffff888008d78000 RCX: 0000000000391372
[1876998.350181] RDX: 0000000000000000 RSI: ffffe8ffffd01400 RDI: ffff888008d78000
[1876998.350185] RBP: ffff8881185a5d00 R08: ffffc90000087dd8 R09: 000000000000280a
[1876998.350189] R10: 0000000000000002 R11: 0000000000000000 R12: ffffea0000235e00
[1876998.350193] R13: ffff8881185438a0 R14: 0000000000000000 R15: ffff888118543870
[1876998.350198] FS: 0000000000000000(0000) GS:ffff88811f300000(0000) knlGS:0000000000000000
[1876998.350203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
s#1 Part1
[1876998.350206] CR2: 00007f8dac7b09f0 CR3: 000000011e20a006 CR4: 00000000001606e0
[1876998.350210] Call Trace:
[1876998.350215] <IRQ>
[1876998.350224] ? __netif_receive_skb_core+0x70a/0x920
[1876998.350229] kfree_skb+0x32/0xb0
[1876998.350234] __netif_receive_skb_core+0x70a/0x920
[1876998.350240] __netif_receive_skb_one_core+0x36/0x80
[1876998.350245] process_backlog+0x8b/0x150
[1876998.350250] net_rx_action+0xf7/0x340
[1876998.350255] __do_softirq+0x10f/0x353
[1876998.350262] irq_exit+0xb2/0xc0
[1876998.350265] do_IRQ+0x77/0xd0
[1876998.350271] common_interrupt+0xf/0xf
[1876998.350274] </IRQ>

In order to handle such failures more graceful, this change splits the
receive loop into one for consuming the received buffers, and one for
allocating new buffers.

When GFP_ATOMIC allocations fail, the receive will continue with the
buffers that is still there, and with the expectation that the allocations
will succeed in a later call to receive.

Fixes: 92744989533c ("net: add Xilinx ll_temac device driver")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac.h | 1 +
drivers/net/ethernet/xilinx/ll_temac_main.c | 112 ++++++++++++++------
2 files changed, 82 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 276292bca334..99fe059e5c7f 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -375,6 +375,7 @@ struct temac_local {
int tx_bd_next;
int tx_bd_tail;
int rx_bd_ci;
+ int rx_bd_tail;

/* DMA channel control setup */
u32 tx_chnl_ctrl;
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index c368c3914bda..255207f2fd27 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -389,12 +389,13 @@ static int temac_dma_bd_init(struct net_device *ndev)
lp->tx_bd_next = 0;
lp->tx_bd_tail = 0;
lp->rx_bd_ci = 0;
+ lp->rx_bd_tail = RX_BD_NUM - 1;

/* Enable RX DMA transfers */
wmb();
lp->dma_out(lp, RX_CURDESC_PTR, lp->rx_bd_p);
lp->dma_out(lp, RX_TAILDESC_PTR,
- lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+ lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * lp->rx_bd_tail));

/* Prepare for TX DMA transfer */
lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p);
@@ -923,27 +924,41 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
static void ll_temac_recv(struct net_device *ndev)
{
struct temac_local *lp = netdev_priv(ndev);
- struct sk_buff *skb, *new_skb;
- unsigned int bdstat;
- struct cdmac_bd *cur_p;
- dma_addr_t tail_p, skb_dma_addr;
- int length;
unsigned long flags;
+ int rx_bd;
+ bool update_tail = false;

spin_lock_irqsave(&lp->rx_lock, flags);

- tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
-
- bdstat = be32_to_cpu(cur_p->app0);
- while ((bdstat & STS_CTRL_APP0_CMPLT)) {
+ /* Process all received buffers, passing them on network
+ * stack. After this, the buffer descriptors will be in an
+ * un-allocated stage, where no skb is allocated for it, and
+ * they are therefore not available for TEMAC/DMA.
+ */
+ do {
+ struct cdmac_bd *bd = &lp->rx_bd_v[lp->rx_bd_ci];
+ struct sk_buff *skb = lp->rx_skb[lp->rx_bd_ci];
+ unsigned int bdstat = be32_to_cpu(bd->app0);
+ int length;
+
+ /* While this should not normally happen, we can end
+ * here when GFP_ATOMIC allocations fail, and we
+ * therefore have un-allocated buffers.
+ */
+ if (!skb)
+ break;

- skb = lp->rx_skb[lp->rx_bd_ci];
- length = be32_to_cpu(cur_p->app4) & 0x3FFF;
+ /* Loop over all completed buffer descriptors */
+ if (!(bdstat & STS_CTRL_APP0_CMPLT))
+ break;

- dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
+ dma_unmap_single(ndev->dev.parent, be32_to_cpu(bd->phys),
XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE);
+ /* The buffer is not valid for DMA anymore */
+ bd->phys = 0;
+ bd->len = 0;

+ length = be32_to_cpu(bd->app4) & 0x3FFF;
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, ndev);
skb_checksum_none_assert(skb);
@@ -958,39 +973,74 @@ static void ll_temac_recv(struct net_device *ndev)
* (back) for proper IP checksum byte order
* (be16).
*/
- skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF);
+ skb->csum = htons(be32_to_cpu(bd->app3) & 0xFFFF);
skb->ip_summed = CHECKSUM_COMPLETE;
}

if (!skb_defer_rx_timestamp(skb))
netif_rx(skb);
+ /* The skb buffer is now owned by network stack above */
+ lp->rx_skb[lp->rx_bd_ci] = NULL;

ndev->stats.rx_packets++;
ndev->stats.rx_bytes += length;

- new_skb = netdev_alloc_skb_ip_align(ndev,
- XTE_MAX_JUMBO_FRAME_SIZE);
- if (!new_skb) {
- spin_unlock_irqrestore(&lp->rx_lock, flags);
- return;
+ rx_bd = lp->rx_bd_ci;
+ if (++lp->rx_bd_ci >= RX_BD_NUM)
+ lp->rx_bd_ci = 0;
+ } while (rx_bd != lp->rx_bd_tail);
+
+ /* Allocate new buffers for those buffer descriptors that were
+ * passed to network stack. Note that GFP_ATOMIC allocations
+ * can fail (e.g. when a larger burst of GFP_ATOMIC
+ * allocations occurs), so while we try to allocate all
+ * buffers in the same interrupt where they were processed, we
+ * continue with what we could get in case of allocation
+ * failure. Allocation of remaining buffers will be retried
+ * in following calls.
+ */
+ while (1) {
+ struct sk_buff *skb;
+ struct cdmac_bd *bd;
+ dma_addr_t skb_dma_addr;
+
+ rx_bd = lp->rx_bd_tail + 1;
+ if (rx_bd >= RX_BD_NUM)
+ rx_bd = 0;
+ bd = &lp->rx_bd_v[rx_bd];
+
+ if (bd->phys)
+ break; /* All skb's allocated */
+
+ skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE);
+ if (!skb) {
+ dev_warn(&ndev->dev, "skb alloc failed\n");
+ break;
}

- cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
- skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data,
+ skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
- cur_p->phys = cpu_to_be32(skb_dma_addr);
- cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
- lp->rx_skb[lp->rx_bd_ci] = new_skb;
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent,
+ skb_dma_addr))) {
+ dev_kfree_skb_any(skb);
+ break;
+ }

- lp->rx_bd_ci++;
- if (lp->rx_bd_ci >= RX_BD_NUM)
- lp->rx_bd_ci = 0;
+ bd->phys = cpu_to_be32(skb_dma_addr);
+ bd->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
+ bd->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
+ lp->rx_skb[rx_bd] = skb;
+
+ lp->rx_bd_tail = rx_bd;
+ update_tail = true;
+ }

- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
- bdstat = be32_to_cpu(cur_p->app0);
+ /* Move tail pointer when buffers have been allocated */
+ if (update_tail) {
+ lp->dma_out(lp, RX_TAILDESC_PTR,
+ lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_tail);
}
- lp->dma_out(lp, RX_TAILDESC_PTR, tail_p);

spin_unlock_irqrestore(&lp->rx_lock, flags);
}
--
2.25.0

2020-02-19 10:55:00

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net 4/4] net: ll_temac: Handle DMA halt condition caused by buffer underrun

The SDMA engine used by TEMAC halts operation when it has finished
processing of the last buffer descriptor in the buffer ring.
Unfortunately, no interrupt event is generated when this happens,
so we need to setup another mechanism to make sure DMA operation is
restarted when enough buffers have been added to the ring.

Fixes: 92744989533c ("net: add Xilinx ll_temac device driver")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac.h | 3 ++
drivers/net/ethernet/xilinx/ll_temac_main.c | 47 ++++++++++++++++++++-
2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 99fe059e5c7f..53fb8141f1a6 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -380,6 +380,9 @@ struct temac_local {
/* DMA channel control setup */
u32 tx_chnl_ctrl;
u32 rx_chnl_ctrl;
+ u8 coalesce_count_rx;
+
+ struct delayed_work restart_work;
};

/* Wrappers for temac_ior()/temac_iow() function pointers above */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 255207f2fd27..90b486becb5b 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -51,6 +51,7 @@
#include <linux/ip.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/workqueue.h>
#include <linux/dma-mapping.h>
#include <linux/processor.h>
#include <linux/platform_data/xilinx-ll-temac.h>
@@ -920,6 +921,17 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}

+static int ll_temac_recv_buffers_available(struct temac_local *lp)
+{
+ int available;
+
+ if (!lp->rx_skb[lp->rx_bd_ci])
+ return 0;
+ available = 1 + lp->rx_bd_tail - lp->rx_bd_ci;
+ if (available <= 0)
+ available += RX_BD_NUM;
+ return available;
+}

static void ll_temac_recv(struct net_device *ndev)
{
@@ -990,6 +1002,18 @@ static void ll_temac_recv(struct net_device *ndev)
lp->rx_bd_ci = 0;
} while (rx_bd != lp->rx_bd_tail);

+ /* DMA operations will halt when the last buffer descriptor is
+ * processed (ie. the one pointed to by RX_TAILDESC_PTR).
+ * When that happens, no more interrupt events will be
+ * generated. No IRQ_COAL or IRQ_DLY, and not even an
+ * IRQ_ERR. To avoid stalling, we schedule a delayed work
+ * when there is a potential risk of that happening. The work
+ * will call this function, and thus re-schedule itself until
+ * enough buffers are available again.
+ */
+ if (ll_temac_recv_buffers_available(lp) < lp->coalesce_count_rx)
+ schedule_delayed_work(&lp->restart_work, HZ / 1000);
+
/* Allocate new buffers for those buffer descriptors that were
* passed to network stack. Note that GFP_ATOMIC allocations
* can fail (e.g. when a larger burst of GFP_ATOMIC
@@ -1045,6 +1069,18 @@ static void ll_temac_recv(struct net_device *ndev)
spin_unlock_irqrestore(&lp->rx_lock, flags);
}

+/* Function scheduled to ensure a restart in case of DMA halt
+ * condition caused by running out of buffer descriptors.
+ */
+static void ll_temac_restart_work_func(struct work_struct *work)
+{
+ struct temac_local *lp = container_of(work, struct temac_local,
+ restart_work.work);
+ struct net_device *ndev = lp->ndev;
+
+ ll_temac_recv(ndev);
+}
+
static irqreturn_t ll_temac_tx_irq(int irq, void *_ndev)
{
struct net_device *ndev = _ndev;
@@ -1137,6 +1173,8 @@ static int temac_stop(struct net_device *ndev)

dev_dbg(&ndev->dev, "temac_close()\n");

+ cancel_delayed_work_sync(&lp->restart_work);
+
free_irq(lp->tx_irq, ndev);
free_irq(lp->rx_irq, ndev);

@@ -1258,6 +1296,7 @@ static int temac_probe(struct platform_device *pdev)
lp->dev = &pdev->dev;
lp->options = XTE_OPTION_DEFAULTS;
spin_lock_init(&lp->rx_lock);
+ INIT_DELAYED_WORK(&lp->restart_work, ll_temac_restart_work_func);

/* Setup mutex for synchronization of indirect register access */
if (pdata) {
@@ -1364,6 +1403,7 @@ static int temac_probe(struct platform_device *pdev)
*/
lp->tx_chnl_ctrl = 0x10220000;
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;

/* Finished with the DMA node; drop the reference */
of_node_put(dma_np);
@@ -1395,11 +1435,14 @@ static int temac_probe(struct platform_device *pdev)
(pdata->tx_irq_count << 16);
else
lp->tx_chnl_ctrl = 0x10220000;
- if (pdata->rx_irq_timeout || pdata->rx_irq_count)
+ if (pdata->rx_irq_timeout || pdata->rx_irq_count) {
lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) |
(pdata->rx_irq_count << 16);
- else
+ lp->coalesce_count_rx = pdata->rx_irq_count;
+ } else {
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;
+ }
}

/* Error handle returned DMA RX and TX interrupts */
--
2.25.0

2020-02-19 10:55:23

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net 1/4] net: ll_temac: Fix race condition causing TX hang

It is possible that the interrupt handler fires and frees up space in
the TX ring in between checking for sufficient TX ring space and
stopping the TX queue in temac_start_xmit. If this happens, the
queue wake from the interrupt handler will occur before the queue is
stopped, causing a lost wakeup and the adapter's transmit hanging.

To avoid this, after stopping the queue, check again whether there is
sufficient space in the TX ring. If so, wake up the queue again.

This is a port of the similar fix in axienet driver,
commit 7de44285c1f6 ("net: axienet: Fix race condition causing TX hang").

Fixes: 23ecc4bde21f ("net: ll_temac: fix checksum offload logic")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac_main.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 6f11f52c9a9e..996004ef8bd4 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -788,6 +788,9 @@ static void temac_start_xmit_done(struct net_device *ndev)
stat = be32_to_cpu(cur_p->app0);
}

+ /* Matches barrier in temac_start_xmit */
+ smp_mb();
+
netif_wake_queue(ndev);
}

@@ -830,9 +833,19 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];

if (temac_check_tx_bd_space(lp, num_frag + 1)) {
- if (!netif_queue_stopped(ndev))
- netif_stop_queue(ndev);
- return NETDEV_TX_BUSY;
+ if (netif_queue_stopped(ndev))
+ return NETDEV_TX_BUSY;
+
+ netif_stop_queue(ndev);
+
+ /* Matches barrier in temac_start_xmit_done */
+ smp_mb();
+
+ /* Space might have just been freed - check again */
+ if (temac_check_tx_bd_space(lp, num_frag))
+ return NETDEV_TX_BUSY;
+
+ netif_wake_queue(ndev);
}

cur_p->app0 = 0;
--
2.25.0

2020-02-19 10:55:35

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net 2/4] net: ll_temac: Add more error handling of dma_map_single() calls

This adds error handling to the remaining dma_map_single() calls, so that
behavior is well defined if/when we run out of DMA memory.

Fixes: 92744989533c ("net: add Xilinx ll_temac device driver")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac_main.c | 26 +++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 996004ef8bd4..c368c3914bda 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -367,6 +367,8 @@ static int temac_dma_bd_init(struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr))
+ goto out;
lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr);
lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
@@ -863,12 +865,13 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
cur_p->len = cpu_to_be32(skb_headlen(skb));
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr)))
+ return NETDEV_TX_BUSY;
cur_p->phys = cpu_to_be32(skb_dma_addr);
ptr_to_txbd((void *)skb, cur_p);

for (ii = 0; ii < num_frag; ii++) {
- lp->tx_bd_tail++;
- if (lp->tx_bd_tail >= TX_BD_NUM)
+ if (++lp->tx_bd_tail >= TX_BD_NUM)
lp->tx_bd_tail = 0;

cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
@@ -876,6 +879,25 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_frag_address(frag),
skb_frag_size(frag),
DMA_TO_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) {
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ while (--ii >= 0) {
+ --frag;
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ }
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_headlen(skb), DMA_TO_DEVICE);
+ return NETDEV_TX_BUSY;
+ }
cur_p->phys = cpu_to_be32(skb_dma_addr);
cur_p->len = cpu_to_be32(skb_frag_size(frag));
cur_p->app0 = 0;
--
2.25.0

2020-02-19 19:00:15

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net 2/4] net: ll_temac: Add more error handling of dma_map_single() calls

From: Esben Haabendal <[email protected]>
Date: Wed, 19 Feb 2020 11:54:00 +0100

> @@ -863,12 +865,13 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
> skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
> skb_headlen(skb), DMA_TO_DEVICE);
> cur_p->len = cpu_to_be32(skb_headlen(skb));
> + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr)))
> + return NETDEV_TX_BUSY;

The appropriate behavior in this situation is to drop the packet and return
NETDEV_TX_OK.

2020-02-20 08:33:19

by Esben Haabendal

[permalink] [raw]
Subject: Re: [PATCH net 2/4] net: ll_temac: Add more error handling of dma_map_single() calls

David Miller <[email protected]> writes:

> From: Esben Haabendal <[email protected]>
> Date: Wed, 19 Feb 2020 11:54:00 +0100
>
>> @@ -863,12 +865,13 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>> skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
>> skb_headlen(skb), DMA_TO_DEVICE);
>> cur_p->len = cpu_to_be32(skb_headlen(skb));
>> + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr)))
>> + return NETDEV_TX_BUSY;
>
> The appropriate behavior in this situation is to drop the packet and return
> NETDEV_TX_OK.

Ok, and I guess the same goes for the error handling of dma_map_single()
of one of the fragments later in same function.

/Esben

2020-02-20 18:11:25

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net 2/4] net: ll_temac: Add more error handling of dma_map_single() calls

From: Esben Haabendal <[email protected]>
Date: Thu, 20 Feb 2020 09:32:58 +0100

> David Miller <[email protected]> writes:
>
>> From: Esben Haabendal <[email protected]>
>> Date: Wed, 19 Feb 2020 11:54:00 +0100
>>
>>> @@ -863,12 +865,13 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
>>> skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
>>> skb_headlen(skb), DMA_TO_DEVICE);
>>> cur_p->len = cpu_to_be32(skb_headlen(skb));
>>> + if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr)))
>>> + return NETDEV_TX_BUSY;
>>
>> The appropriate behavior in this situation is to drop the packet and return
>> NETDEV_TX_OK.
>
> Ok, and I guess the same goes for the error handling of dma_map_single()
> of one of the fragments later in same function.

Yes.

2020-02-21 06:47:44

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net v2 0/4] net: ll_temac: Bugfixes

Fix a number of bugs which have been present since the first commit.

The bugs fixed in patch 1,2 and 4 have all been observed in real systems, and
was relatively easy to reproduce given an appropriate stress setup.

Changes since v1:

- Changed error handling of of dma_map_single() in temac_start_xmit() to drop
packet instead of returning NETDEV_TX_BUSY.

Esben Haabendal (4):
net: ll_temac: Fix race condition causing TX hang
net: ll_temac: Add more error handling of dma_map_single() calls
net: ll_temac: Fix RX buffer descriptor handling on GFP_ATOMIC
pressure
net: ll_temac: Handle DMA halt condition caused by buffer underrun

drivers/net/ethernet/xilinx/ll_temac.h | 4 +
drivers/net/ethernet/xilinx/ll_temac_main.c | 209 ++++++++++++++++----
2 files changed, 175 insertions(+), 38 deletions(-)

--
2.25.0

2020-02-21 06:48:18

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net v2 2/4] net: ll_temac: Add more error handling of dma_map_single() calls

This adds error handling to the remaining dma_map_single() calls, so that
behavior is well defined if/when we run out of DMA memory.

Fixes: 92744989533c ("net: add Xilinx ll_temac device driver")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac_main.c | 26 +++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 996004ef8bd4..c368c3914bda 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -367,6 +367,8 @@ static int temac_dma_bd_init(struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr))
+ goto out;
lp->rx_bd_v[i].phys = cpu_to_be32(skb_dma_addr);
lp->rx_bd_v[i].len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
lp->rx_bd_v[i].app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
@@ -863,12 +865,13 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
cur_p->len = cpu_to_be32(skb_headlen(skb));
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr)))
+ return NETDEV_TX_BUSY;
cur_p->phys = cpu_to_be32(skb_dma_addr);
ptr_to_txbd((void *)skb, cur_p);

for (ii = 0; ii < num_frag; ii++) {
- lp->tx_bd_tail++;
- if (lp->tx_bd_tail >= TX_BD_NUM)
+ if (++lp->tx_bd_tail >= TX_BD_NUM)
lp->tx_bd_tail = 0;

cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
@@ -876,6 +879,25 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_frag_address(frag),
skb_frag_size(frag),
DMA_TO_DEVICE);
+ if (dma_mapping_error(ndev->dev.parent, skb_dma_addr)) {
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ while (--ii >= 0) {
+ --frag;
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ if (--lp->tx_bd_tail < 0)
+ lp->tx_bd_tail = TX_BD_NUM - 1;
+ cur_p = &lp->tx_bd_v[lp->tx_bd_tail];
+ }
+ dma_unmap_single(ndev->dev.parent,
+ be32_to_cpu(cur_p->phys),
+ skb_headlen(skb), DMA_TO_DEVICE);
+ return NETDEV_TX_BUSY;
+ }
cur_p->phys = cpu_to_be32(skb_dma_addr);
cur_p->len = cpu_to_be32(skb_frag_size(frag));
cur_p->app0 = 0;
--
2.25.0

2020-02-21 06:48:26

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net v2 3/4] net: ll_temac: Fix RX buffer descriptor handling on GFP_ATOMIC pressure

Failures caused by GFP_ATOMIC memory pressure have been observed, and
due to the missing error handling, results in kernel crash such as

[1876998.350133] kernel BUG at mm/slub.c:3952!
[1876998.350141] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[1876998.350147] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.3.0-scnxt #1
[1876998.350150] Hardware name: N/A N/A/COMe-bIP2, BIOS CCR2R920 03/01/2017
[1876998.350160] RIP: 0010:kfree+0x1ca/0x220
[1876998.350164] Code: 85 db 74 49 48 8b 95 68 01 00 00 48 31 c2 48 89 10 e9 d7 fe ff ff 49 8b 04 24 a9 00 00 01 00 75 0b 49 8b 44 24 08 a8 01 75 02 <0f> 0b 49 8b 04 24 31 f6 a9 00 00 01 00 74 06 41 0f b6 74 24
5b
[1876998.350172] RSP: 0018:ffffc900000f0df0 EFLAGS: 00010246
[1876998.350177] RAX: ffffea00027f0708 RBX: ffff888008d78000 RCX: 0000000000391372
[1876998.350181] RDX: 0000000000000000 RSI: ffffe8ffffd01400 RDI: ffff888008d78000
[1876998.350185] RBP: ffff8881185a5d00 R08: ffffc90000087dd8 R09: 000000000000280a
[1876998.350189] R10: 0000000000000002 R11: 0000000000000000 R12: ffffea0000235e00
[1876998.350193] R13: ffff8881185438a0 R14: 0000000000000000 R15: ffff888118543870
[1876998.350198] FS: 0000000000000000(0000) GS:ffff88811f300000(0000) knlGS:0000000000000000
[1876998.350203] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
s#1 Part1
[1876998.350206] CR2: 00007f8dac7b09f0 CR3: 000000011e20a006 CR4: 00000000001606e0
[1876998.350210] Call Trace:
[1876998.350215] <IRQ>
[1876998.350224] ? __netif_receive_skb_core+0x70a/0x920
[1876998.350229] kfree_skb+0x32/0xb0
[1876998.350234] __netif_receive_skb_core+0x70a/0x920
[1876998.350240] __netif_receive_skb_one_core+0x36/0x80
[1876998.350245] process_backlog+0x8b/0x150
[1876998.350250] net_rx_action+0xf7/0x340
[1876998.350255] __do_softirq+0x10f/0x353
[1876998.350262] irq_exit+0xb2/0xc0
[1876998.350265] do_IRQ+0x77/0xd0
[1876998.350271] common_interrupt+0xf/0xf
[1876998.350274] </IRQ>

In order to handle such failures more graceful, this change splits the
receive loop into one for consuming the received buffers, and one for
allocating new buffers.

When GFP_ATOMIC allocations fail, the receive will continue with the
buffers that is still there, and with the expectation that the allocations
will succeed in a later call to receive.

Fixes: 92744989533c ("net: add Xilinx ll_temac device driver")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac.h | 1 +
drivers/net/ethernet/xilinx/ll_temac_main.c | 112 ++++++++++++++------
2 files changed, 82 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 276292bca334..99fe059e5c7f 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -375,6 +375,7 @@ struct temac_local {
int tx_bd_next;
int tx_bd_tail;
int rx_bd_ci;
+ int rx_bd_tail;

/* DMA channel control setup */
u32 tx_chnl_ctrl;
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index c368c3914bda..255207f2fd27 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -389,12 +389,13 @@ static int temac_dma_bd_init(struct net_device *ndev)
lp->tx_bd_next = 0;
lp->tx_bd_tail = 0;
lp->rx_bd_ci = 0;
+ lp->rx_bd_tail = RX_BD_NUM - 1;

/* Enable RX DMA transfers */
wmb();
lp->dma_out(lp, RX_CURDESC_PTR, lp->rx_bd_p);
lp->dma_out(lp, RX_TAILDESC_PTR,
- lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1)));
+ lp->rx_bd_p + (sizeof(*lp->rx_bd_v) * lp->rx_bd_tail));

/* Prepare for TX DMA transfer */
lp->dma_out(lp, TX_CURDESC_PTR, lp->tx_bd_p);
@@ -923,27 +924,41 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
static void ll_temac_recv(struct net_device *ndev)
{
struct temac_local *lp = netdev_priv(ndev);
- struct sk_buff *skb, *new_skb;
- unsigned int bdstat;
- struct cdmac_bd *cur_p;
- dma_addr_t tail_p, skb_dma_addr;
- int length;
unsigned long flags;
+ int rx_bd;
+ bool update_tail = false;

spin_lock_irqsave(&lp->rx_lock, flags);

- tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci;
- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
-
- bdstat = be32_to_cpu(cur_p->app0);
- while ((bdstat & STS_CTRL_APP0_CMPLT)) {
+ /* Process all received buffers, passing them on network
+ * stack. After this, the buffer descriptors will be in an
+ * un-allocated stage, where no skb is allocated for it, and
+ * they are therefore not available for TEMAC/DMA.
+ */
+ do {
+ struct cdmac_bd *bd = &lp->rx_bd_v[lp->rx_bd_ci];
+ struct sk_buff *skb = lp->rx_skb[lp->rx_bd_ci];
+ unsigned int bdstat = be32_to_cpu(bd->app0);
+ int length;
+
+ /* While this should not normally happen, we can end
+ * here when GFP_ATOMIC allocations fail, and we
+ * therefore have un-allocated buffers.
+ */
+ if (!skb)
+ break;

- skb = lp->rx_skb[lp->rx_bd_ci];
- length = be32_to_cpu(cur_p->app4) & 0x3FFF;
+ /* Loop over all completed buffer descriptors */
+ if (!(bdstat & STS_CTRL_APP0_CMPLT))
+ break;

- dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
+ dma_unmap_single(ndev->dev.parent, be32_to_cpu(bd->phys),
XTE_MAX_JUMBO_FRAME_SIZE, DMA_FROM_DEVICE);
+ /* The buffer is not valid for DMA anymore */
+ bd->phys = 0;
+ bd->len = 0;

+ length = be32_to_cpu(bd->app4) & 0x3FFF;
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, ndev);
skb_checksum_none_assert(skb);
@@ -958,39 +973,74 @@ static void ll_temac_recv(struct net_device *ndev)
* (back) for proper IP checksum byte order
* (be16).
*/
- skb->csum = htons(be32_to_cpu(cur_p->app3) & 0xFFFF);
+ skb->csum = htons(be32_to_cpu(bd->app3) & 0xFFFF);
skb->ip_summed = CHECKSUM_COMPLETE;
}

if (!skb_defer_rx_timestamp(skb))
netif_rx(skb);
+ /* The skb buffer is now owned by network stack above */
+ lp->rx_skb[lp->rx_bd_ci] = NULL;

ndev->stats.rx_packets++;
ndev->stats.rx_bytes += length;

- new_skb = netdev_alloc_skb_ip_align(ndev,
- XTE_MAX_JUMBO_FRAME_SIZE);
- if (!new_skb) {
- spin_unlock_irqrestore(&lp->rx_lock, flags);
- return;
+ rx_bd = lp->rx_bd_ci;
+ if (++lp->rx_bd_ci >= RX_BD_NUM)
+ lp->rx_bd_ci = 0;
+ } while (rx_bd != lp->rx_bd_tail);
+
+ /* Allocate new buffers for those buffer descriptors that were
+ * passed to network stack. Note that GFP_ATOMIC allocations
+ * can fail (e.g. when a larger burst of GFP_ATOMIC
+ * allocations occurs), so while we try to allocate all
+ * buffers in the same interrupt where they were processed, we
+ * continue with what we could get in case of allocation
+ * failure. Allocation of remaining buffers will be retried
+ * in following calls.
+ */
+ while (1) {
+ struct sk_buff *skb;
+ struct cdmac_bd *bd;
+ dma_addr_t skb_dma_addr;
+
+ rx_bd = lp->rx_bd_tail + 1;
+ if (rx_bd >= RX_BD_NUM)
+ rx_bd = 0;
+ bd = &lp->rx_bd_v[rx_bd];
+
+ if (bd->phys)
+ break; /* All skb's allocated */
+
+ skb = netdev_alloc_skb_ip_align(ndev, XTE_MAX_JUMBO_FRAME_SIZE);
+ if (!skb) {
+ dev_warn(&ndev->dev, "skb alloc failed\n");
+ break;
}

- cur_p->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
- skb_dma_addr = dma_map_single(ndev->dev.parent, new_skb->data,
+ skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
XTE_MAX_JUMBO_FRAME_SIZE,
DMA_FROM_DEVICE);
- cur_p->phys = cpu_to_be32(skb_dma_addr);
- cur_p->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
- lp->rx_skb[lp->rx_bd_ci] = new_skb;
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent,
+ skb_dma_addr))) {
+ dev_kfree_skb_any(skb);
+ break;
+ }

- lp->rx_bd_ci++;
- if (lp->rx_bd_ci >= RX_BD_NUM)
- lp->rx_bd_ci = 0;
+ bd->phys = cpu_to_be32(skb_dma_addr);
+ bd->len = cpu_to_be32(XTE_MAX_JUMBO_FRAME_SIZE);
+ bd->app0 = cpu_to_be32(STS_CTRL_APP0_IRQONEND);
+ lp->rx_skb[rx_bd] = skb;
+
+ lp->rx_bd_tail = rx_bd;
+ update_tail = true;
+ }

- cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
- bdstat = be32_to_cpu(cur_p->app0);
+ /* Move tail pointer when buffers have been allocated */
+ if (update_tail) {
+ lp->dma_out(lp, RX_TAILDESC_PTR,
+ lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_tail);
}
- lp->dma_out(lp, RX_TAILDESC_PTR, tail_p);

spin_unlock_irqrestore(&lp->rx_lock, flags);
}
--
2.25.0

2020-02-21 06:48:54

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net v2 1/4] net: ll_temac: Fix race condition causing TX hang

It is possible that the interrupt handler fires and frees up space in
the TX ring in between checking for sufficient TX ring space and
stopping the TX queue in temac_start_xmit. If this happens, the
queue wake from the interrupt handler will occur before the queue is
stopped, causing a lost wakeup and the adapter's transmit hanging.

To avoid this, after stopping the queue, check again whether there is
sufficient space in the TX ring. If so, wake up the queue again.

This is a port of the similar fix in axienet driver,
commit 7de44285c1f6 ("net: axienet: Fix race condition causing TX hang").

Fixes: 23ecc4bde21f ("net: ll_temac: fix checksum offload logic")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac_main.c | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 6f11f52c9a9e..996004ef8bd4 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -788,6 +788,9 @@ static void temac_start_xmit_done(struct net_device *ndev)
stat = be32_to_cpu(cur_p->app0);
}

+ /* Matches barrier in temac_start_xmit */
+ smp_mb();
+
netif_wake_queue(ndev);
}

@@ -830,9 +833,19 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
cur_p = &lp->tx_bd_v[lp->tx_bd_tail];

if (temac_check_tx_bd_space(lp, num_frag + 1)) {
- if (!netif_queue_stopped(ndev))
- netif_stop_queue(ndev);
- return NETDEV_TX_BUSY;
+ if (netif_queue_stopped(ndev))
+ return NETDEV_TX_BUSY;
+
+ netif_stop_queue(ndev);
+
+ /* Matches barrier in temac_start_xmit_done */
+ smp_mb();
+
+ /* Space might have just been freed - check again */
+ if (temac_check_tx_bd_space(lp, num_frag))
+ return NETDEV_TX_BUSY;
+
+ netif_wake_queue(ndev);
}

cur_p->app0 = 0;
--
2.25.0

2020-02-21 06:49:21

by Esben Haabendal

[permalink] [raw]
Subject: [PATCH net v2 4/4] net: ll_temac: Handle DMA halt condition caused by buffer underrun

The SDMA engine used by TEMAC halts operation when it has finished
processing of the last buffer descriptor in the buffer ring.
Unfortunately, no interrupt event is generated when this happens,
so we need to setup another mechanism to make sure DMA operation is
restarted when enough buffers have been added to the ring.

Fixes: 92744989533c ("net: add Xilinx ll_temac device driver")
Signed-off-by: Esben Haabendal <[email protected]>
---
drivers/net/ethernet/xilinx/ll_temac.h | 3 ++
drivers/net/ethernet/xilinx/ll_temac_main.c | 58 +++++++++++++++++++--
2 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 99fe059e5c7f..53fb8141f1a6 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -380,6 +380,9 @@ struct temac_local {
/* DMA channel control setup */
u32 tx_chnl_ctrl;
u32 rx_chnl_ctrl;
+ u8 coalesce_count_rx;
+
+ struct delayed_work restart_work;
};

/* Wrappers for temac_ior()/temac_iow() function pointers above */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 255207f2fd27..9461acec6f70 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -51,6 +51,7 @@
#include <linux/ip.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/workqueue.h>
#include <linux/dma-mapping.h>
#include <linux/processor.h>
#include <linux/platform_data/xilinx-ll-temac.h>
@@ -866,8 +867,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_dma_addr = dma_map_single(ndev->dev.parent, skb->data,
skb_headlen(skb), DMA_TO_DEVICE);
cur_p->len = cpu_to_be32(skb_headlen(skb));
- if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr)))
- return NETDEV_TX_BUSY;
+ if (WARN_ON_ONCE(dma_mapping_error(ndev->dev.parent, skb_dma_addr))) {
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+ }
cur_p->phys = cpu_to_be32(skb_dma_addr);
ptr_to_txbd((void *)skb, cur_p);

@@ -897,7 +901,9 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
dma_unmap_single(ndev->dev.parent,
be32_to_cpu(cur_p->phys),
skb_headlen(skb), DMA_TO_DEVICE);
- return NETDEV_TX_BUSY;
+ dev_kfree_skb_any(skb);
+ ndev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
}
cur_p->phys = cpu_to_be32(skb_dma_addr);
cur_p->len = cpu_to_be32(skb_frag_size(frag));
@@ -920,6 +926,17 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_OK;
}

+static int ll_temac_recv_buffers_available(struct temac_local *lp)
+{
+ int available;
+
+ if (!lp->rx_skb[lp->rx_bd_ci])
+ return 0;
+ available = 1 + lp->rx_bd_tail - lp->rx_bd_ci;
+ if (available <= 0)
+ available += RX_BD_NUM;
+ return available;
+}

static void ll_temac_recv(struct net_device *ndev)
{
@@ -990,6 +1007,18 @@ static void ll_temac_recv(struct net_device *ndev)
lp->rx_bd_ci = 0;
} while (rx_bd != lp->rx_bd_tail);

+ /* DMA operations will halt when the last buffer descriptor is
+ * processed (ie. the one pointed to by RX_TAILDESC_PTR).
+ * When that happens, no more interrupt events will be
+ * generated. No IRQ_COAL or IRQ_DLY, and not even an
+ * IRQ_ERR. To avoid stalling, we schedule a delayed work
+ * when there is a potential risk of that happening. The work
+ * will call this function, and thus re-schedule itself until
+ * enough buffers are available again.
+ */
+ if (ll_temac_recv_buffers_available(lp) < lp->coalesce_count_rx)
+ schedule_delayed_work(&lp->restart_work, HZ / 1000);
+
/* Allocate new buffers for those buffer descriptors that were
* passed to network stack. Note that GFP_ATOMIC allocations
* can fail (e.g. when a larger burst of GFP_ATOMIC
@@ -1045,6 +1074,18 @@ static void ll_temac_recv(struct net_device *ndev)
spin_unlock_irqrestore(&lp->rx_lock, flags);
}

+/* Function scheduled to ensure a restart in case of DMA halt
+ * condition caused by running out of buffer descriptors.
+ */
+static void ll_temac_restart_work_func(struct work_struct *work)
+{
+ struct temac_local *lp = container_of(work, struct temac_local,
+ restart_work.work);
+ struct net_device *ndev = lp->ndev;
+
+ ll_temac_recv(ndev);
+}
+
static irqreturn_t ll_temac_tx_irq(int irq, void *_ndev)
{
struct net_device *ndev = _ndev;
@@ -1137,6 +1178,8 @@ static int temac_stop(struct net_device *ndev)

dev_dbg(&ndev->dev, "temac_close()\n");

+ cancel_delayed_work_sync(&lp->restart_work);
+
free_irq(lp->tx_irq, ndev);
free_irq(lp->rx_irq, ndev);

@@ -1258,6 +1301,7 @@ static int temac_probe(struct platform_device *pdev)
lp->dev = &pdev->dev;
lp->options = XTE_OPTION_DEFAULTS;
spin_lock_init(&lp->rx_lock);
+ INIT_DELAYED_WORK(&lp->restart_work, ll_temac_restart_work_func);

/* Setup mutex for synchronization of indirect register access */
if (pdata) {
@@ -1364,6 +1408,7 @@ static int temac_probe(struct platform_device *pdev)
*/
lp->tx_chnl_ctrl = 0x10220000;
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;

/* Finished with the DMA node; drop the reference */
of_node_put(dma_np);
@@ -1395,11 +1440,14 @@ static int temac_probe(struct platform_device *pdev)
(pdata->tx_irq_count << 16);
else
lp->tx_chnl_ctrl = 0x10220000;
- if (pdata->rx_irq_timeout || pdata->rx_irq_count)
+ if (pdata->rx_irq_timeout || pdata->rx_irq_count) {
lp->rx_chnl_ctrl = (pdata->rx_irq_timeout << 24) |
(pdata->rx_irq_count << 16);
- else
+ lp->coalesce_count_rx = pdata->rx_irq_count;
+ } else {
lp->rx_chnl_ctrl = 0xff070000;
+ lp->coalesce_count_rx = 0x07;
+ }
}

/* Error handle returned DMA RX and TX interrupts */
--
2.25.0

2020-02-22 00:06:10

by David Miller

[permalink] [raw]
Subject: Re: [PATCH net v2 0/4] net: ll_temac: Bugfixes

From: Esben Haabendal <[email protected]>
Date: Fri, 21 Feb 2020 07:47:09 +0100

> Fix a number of bugs which have been present since the first commit.
>
> The bugs fixed in patch 1,2 and 4 have all been observed in real systems, and
> was relatively easy to reproduce given an appropriate stress setup.
>
> Changes since v1:
>
> - Changed error handling of of dma_map_single() in temac_start_xmit() to drop
> packet instead of returning NETDEV_TX_BUSY.

Series applied, thanks.