Under certain conditions EMAC stop reception of incoming packets and
continuously increment R_MISS register instead of saving data into
provided buffer. The commit implement workaround for such situation.
Then the stall detected EMAC will be restarted.
On device the stall looks like the device lost it's dynamic IP address.
ifconfig shows that interface error counter rapidly increments.
At the same time on the DHCP server we can see continues DHCP-requests
from device.
In real network stalls happen really rarely. To make them frequent the
broadcast storm[1] should be simulated. For simulation it is necessary
to make following connections:
1. connect radxarock to 1st port of switch
2. connect some PC to 2nd port of switch
3. connect two other free ports together using standard ethernet cable,
in order to make a switching loop.
After that, is necessary to make a broadcast storm. For example, running on
PC 'ping' to some IP address triggers ARP-request storm. After some
time (~10sec), EMAC on rk3188 will stall.
Observed and tested on rk3188 radxarock.
[1] https://en.wikipedia.org/wiki/Broadcast_radiation
Signed-off-by: Alexander Kochetkov <[email protected]>
---
drivers/net/ethernet/arc/emac.h | 2 +
drivers/net/ethernet/arc/emac_main.c | 111 ++++++++++++++++++++++++++++++++++
2 files changed, 113 insertions(+)
diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h
index e4feb71..1c7afc5 100644
--- a/drivers/net/ethernet/arc/emac.h
+++ b/drivers/net/ethernet/arc/emac.h
@@ -158,6 +158,8 @@ struct arc_emac_priv {
unsigned int link;
unsigned int duplex;
unsigned int speed;
+
+ unsigned int rx_missed_errors;
};
/**
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 68de2f2..b2e0051 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -26,6 +26,8 @@
#include "emac.h"
+static void arc_emac_restart(struct net_device *ndev);
+
/**
* arc_emac_tx_avail - Return the number of available slots in the tx ring.
* @priv: Pointer to ARC EMAC private data structure.
@@ -259,6 +261,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
}
/**
+ * arc_emac_rx_miss_handle - handle R_MISS register
+ * @ndev: Pointer to the net_device structure.
+ */
+static void arc_emac_rx_miss_handle(struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &ndev->stats;
+ unsigned int miss;
+
+ miss = arc_reg_get(priv, R_MISS);
+ if (miss) {
+ stats->rx_errors += miss;
+ stats->rx_missed_errors += miss;
+ priv->rx_missed_errors += miss;
+ }
+}
+
+/**
+ * arc_emac_rx_stall_check - check RX stall
+ * @ndev: Pointer to the net_device structure.
+ * @budget: How many BDs requested to process on 1 call.
+ * @work_done: How many BDs processed
+ *
+ * Under certain conditions EMAC stop reception of incoming packets and
+ * continuously increment R_MISS register instead of saving data into
+ * provided buffer. This function detect that condition and restart
+ * EMAC.
+ */
+static void arc_emac_rx_stall_check(struct net_device *ndev,
+ int budget, unsigned int work_done)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct arc_emac_bd *rxbd;
+
+ if (work_done)
+ priv->rx_missed_errors = 0;
+
+ if (priv->rx_missed_errors && budget) {
+ rxbd = &priv->rxbd[priv->last_rx_bd];
+ if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
+ arc_emac_restart(ndev);
+ priv->rx_missed_errors = 0;
+ }
+ }
+}
+
+/**
* arc_emac_poll - NAPI poll handler.
* @napi: Pointer to napi_struct structure.
* @budget: How many BDs to process on 1 call.
@@ -272,6 +321,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
unsigned int work_done;
arc_emac_tx_clean(ndev);
+ arc_emac_rx_miss_handle(ndev);
work_done = arc_emac_rx(ndev, budget);
if (work_done < budget) {
@@ -279,6 +329,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
}
+ arc_emac_rx_stall_check(ndev, budget, work_done);
+
return work_done;
}
@@ -320,6 +372,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
if (status & MSER_MASK) {
stats->rx_missed_errors += 0x100;
stats->rx_errors += 0x100;
+ priv->rx_missed_errors += 0x100;
+ napi_schedule(&priv->napi);
}
if (status & RXCR_MASK) {
@@ -720,6 +774,63 @@ static int arc_emac_set_address(struct net_device *ndev, void *p)
return 0;
}
+/**
+ * arc_emac_restart - Restart EMAC
+ * @ndev: Pointer to net_device structure.
+ *
+ * This function do hardware reset of EMAC in order to restore
+ * network packets reception.
+ */
+static void arc_emac_restart(struct net_device *ndev)
+{
+ struct arc_emac_priv *priv = netdev_priv(ndev);
+ struct net_device_stats *stats = &ndev->stats;
+ int i;
+
+ if (net_ratelimit())
+ netdev_warn(ndev, "restarting stalled EMAC\n");
+
+ netif_stop_queue(ndev);
+
+ /* Disable interrupts */
+ arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+ /* Disable EMAC */
+ arc_reg_clr(priv, R_CTRL, EN_MASK);
+
+ /* Return the sk_buff to system */
+ arc_free_tx_queue(ndev);
+
+ /* Clean Tx BD's */
+ priv->txbd_curr = 0;
+ priv->txbd_dirty = 0;
+ memset(priv->txbd, 0, TX_RING_SZ);
+
+ for (i = 0; i < RX_BD_NUM; i++) {
+ struct arc_emac_bd *rxbd = &priv->rxbd[i];
+ unsigned int info = le32_to_cpu(rxbd->info);
+
+ if (!(info & FOR_EMAC)) {
+ stats->rx_errors++;
+ stats->rx_dropped++;
+ }
+ /* Return ownership to EMAC */
+ rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
+ }
+ priv->last_rx_bd = 0;
+
+ /* Make sure info is visible to EMAC before enable */
+ wmb();
+
+ /* Enable interrupts */
+ arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
+
+ /* Enable EMAC */
+ arc_reg_or(priv, R_CTRL, EN_MASK);
+
+ netif_start_queue(ndev);
+}
+
static const struct net_device_ops arc_emac_netdev_ops = {
.ndo_open = arc_emac_open,
.ndo_stop = arc_emac_stop,
--
1.7.9.5
From: Alexander Kochetkov <[email protected]>
Date: Fri, 15 Dec 2017 14:12:51 +0300
> Under certain conditions EMAC stop reception of incoming packets and
> continuously increment R_MISS register instead of saving data into
> provided buffer. The commit implement workaround for such situation.
> Then the stall detected EMAC will be restarted.
>
> On device the stall looks like the device lost it's dynamic IP address.
> ifconfig shows that interface error counter rapidly increments.
> At the same time on the DHCP server we can see continues DHCP-requests
> from device.
>
> In real network stalls happen really rarely. To make them frequent the
> broadcast storm[1] should be simulated. For simulation it is necessary
> to make following connections:
> 1. connect radxarock to 1st port of switch
> 2. connect some PC to 2nd port of switch
> 3. connect two other free ports together using standard ethernet cable,
> in order to make a switching loop.
>
> After that, is necessary to make a broadcast storm. For example, running on
> PC 'ping' to some IP address triggers ARP-request storm. After some
> time (~10sec), EMAC on rk3188 will stall.
>
> Observed and tested on rk3188 radxarock.
>
> [1] https://en.wikipedia.org/wiki/Broadcast_radiation
>
> Signed-off-by: Alexander Kochetkov <[email protected]>
This patch doesn't apply cleanly to any of my trees.