This patch adds bcmgenet_tx_poll for all active tx_rings. It can reduce
the interrupt load and send xmit in upper network stack on time.
The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] process only under 18
descriptors is to late reclaiming transmitted skb. Therefore,
performance degradation of xmit after 605ad7f ("tcp: refine TSO
autosizing").
Signed-off-by: Jaedon Shin <[email protected]>
---
drivers/net/ethernet/broadcom/genet/bcmgenet.c | 111 +++++++++++++++++++------
drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +
2 files changed, 87 insertions(+), 26 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ff83c46b..d18bea1 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -971,13 +971,14 @@ static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv,
}
/* Unlocked version of the reclaim routine */
-static void __bcmgenet_tx_reclaim(struct net_device *dev,
+static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
struct bcmgenet_tx_ring *ring)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
int last_tx_cn, last_c_index, num_tx_bds;
struct enet_cb *tx_cb_ptr;
struct netdev_queue *txq;
+ unsigned int pkts_compl = 0;
unsigned int bds_compl;
unsigned int c_index;
@@ -1005,6 +1006,7 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev,
tx_cb_ptr = ring->cbs + last_c_index;
bds_compl = 0;
if (tx_cb_ptr->skb) {
+ pkts_compl++;
bds_compl = skb_shinfo(tx_cb_ptr->skb)->nr_frags + 1;
dev->stats.tx_bytes += tx_cb_ptr->skb->len;
dma_unmap_single(&dev->dev,
@@ -1028,23 +1030,45 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev,
last_c_index &= (num_tx_bds - 1);
}
- if (ring->free_bds > (MAX_SKB_FRAGS + 1))
- ring->int_disable(priv, ring);
-
- if (netif_tx_queue_stopped(txq))
- netif_tx_wake_queue(txq);
+ if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
+ if (netif_tx_queue_stopped(txq))
+ netif_tx_wake_queue(txq);
+ }
ring->c_index = c_index;
+
+ return pkts_compl;
}
-static void bcmgenet_tx_reclaim(struct net_device *dev,
+static unsigned int bcmgenet_tx_reclaim(struct net_device *dev,
struct bcmgenet_tx_ring *ring)
{
+ unsigned int released;
unsigned long flags;
spin_lock_irqsave(&ring->lock, flags);
- __bcmgenet_tx_reclaim(dev, ring);
+ released = __bcmgenet_tx_reclaim(dev, ring);
spin_unlock_irqrestore(&ring->lock, flags);
+
+ return released;
+}
+
+static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
+{
+ struct bcmgenet_tx_ring *ring =
+ container_of(napi, struct bcmgenet_tx_ring, napi);
+ unsigned int work_done = 0;
+
+ work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
+
+ if (work_done == 0) {
+ napi_complete(napi);
+ ring->int_enable(ring->priv, ring);
+
+ return 0;
+ }
+
+ return budget;
}
static void bcmgenet_tx_reclaim_all(struct net_device *dev)
@@ -1302,10 +1326,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
bcmgenet_tdma_ring_writel(priv, ring->index,
ring->prod_index, TDMA_PROD_INDEX);
- if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) {
+ if (ring->free_bds <= (MAX_SKB_FRAGS + 1))
netif_tx_stop_queue(txq);
- ring->int_enable(priv, ring);
- }
out:
spin_unlock_irqrestore(&ring->lock, flags);
@@ -1647,7 +1669,7 @@ static int init_umac(struct bcmgenet_priv *priv)
bcmgenet_intr_disable(priv);
- cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE;
+ cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE;
dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__);
@@ -1693,6 +1715,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
unsigned int first_bd;
spin_lock_init(&ring->lock);
+ ring->priv = priv;
+ netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
ring->index = index;
if (index == DESC_INDEX) {
ring->queue = 0;
@@ -1738,6 +1762,17 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
TDMA_WRITE_PTR);
bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
DMA_END_ADDR);
+
+ napi_enable(&ring->napi);
+}
+
+static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv,
+ unsigned int index)
+{
+ struct bcmgenet_tx_ring *ring = &priv->tx_rings[index];
+
+ napi_disable(&ring->napi);
+ netif_napi_del(&ring->napi);
}
/* Initialize a RDMA ring */
@@ -1907,7 +1942,7 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv)
return ret;
}
-static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
+static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv)
{
int i;
@@ -1926,6 +1961,18 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
kfree(priv->tx_cbs);
}
+static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
+{
+ int i;
+
+ bcmgenet_fini_tx_ring(priv, DESC_INDEX);
+
+ for (i = 0; i < priv->hw_params->tx_queues; i++)
+ bcmgenet_fini_tx_ring(priv, i);
+
+ __bcmgenet_fini_dma(priv);
+}
+
/* init_edma: Initialize DMA control register */
static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
{
@@ -1952,7 +1999,7 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb),
GFP_KERNEL);
if (!priv->tx_cbs) {
- bcmgenet_fini_dma(priv);
+ __bcmgenet_fini_dma(priv);
return -ENOMEM;
}
@@ -1975,9 +2022,6 @@ static int bcmgenet_poll(struct napi_struct *napi, int budget)
struct bcmgenet_priv, napi);
unsigned int work_done;
- /* tx reclaim */
- bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]);
-
work_done = bcmgenet_desc_rx(priv, budget);
/* Advancing our consumer index*/
@@ -2022,28 +2066,34 @@ static void bcmgenet_irq_task(struct work_struct *work)
static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
{
struct bcmgenet_priv *priv = dev_id;
+ struct bcmgenet_tx_ring *ring;
unsigned int index;
/* Save irq status for bottom-half processing. */
priv->irq1_stat =
bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
- ~priv->int1_mask;
+ ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
/* clear interrupts */
bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
netif_dbg(priv, intr, priv->dev,
"%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
+
/* Check the MBDONE interrupts.
* packet is done, reclaim descriptors
*/
- if (priv->irq1_stat & 0x0000ffff) {
- index = 0;
- for (index = 0; index < 16; index++) {
- if (priv->irq1_stat & (1 << index))
- bcmgenet_tx_reclaim(priv->dev,
- &priv->tx_rings[index]);
+ for (index = 0; index < priv->hw_params->tx_queues; index++) {
+ if (!(priv->irq1_stat & BIT(index)))
+ continue;
+
+ ring = &priv->tx_rings[index];
+
+ if (likely(napi_schedule_prep(&ring->napi))) {
+ ring->int_disable(priv, ring);
+ __napi_schedule(&ring->napi);
}
}
+
return IRQ_HANDLED;
}
@@ -2075,8 +2125,12 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
}
if (priv->irq0_stat &
(UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) {
- /* Tx reclaim */
- bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]);
+ struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX];
+
+ if (likely(napi_schedule_prep(&ring->napi))) {
+ ring->int_disable(priv, ring);
+ __napi_schedule(&ring->napi);
+ }
}
if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
UMAC_IRQ_PHY_DET_F |
@@ -2168,10 +2222,15 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl)
static void bcmgenet_netif_start(struct net_device *dev)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
+ int index;
/* Start the network engine */
napi_enable(&priv->napi);
+ for (index = 0; index < priv->hw_params->tx_queues; index++)
+ bcmgenet_intrl2_1_writel(priv, (1 << index),
+ INTRL2_CPU_MASK_CLEAR);
+
umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
if (phy_is_internal(priv->phydev))
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index b36ddec..0d370d1 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -520,6 +520,7 @@ struct bcmgenet_hw_params {
struct bcmgenet_tx_ring {
spinlock_t lock; /* ring lock */
+ struct napi_struct napi; /* NAPI per tx queue */
unsigned int index; /* ring index */
unsigned int queue; /* queue index */
struct enet_cb *cbs; /* tx ring buffer control block*/
@@ -534,6 +535,7 @@ struct bcmgenet_tx_ring {
struct bcmgenet_tx_ring *);
void (*int_disable)(struct bcmgenet_priv *priv,
struct bcmgenet_tx_ring *);
+ struct bcmgenet_priv *priv;
};
/* device context */
--
2.3.0
On Fri, 2015-02-27 at 23:27 +0900, Jaedon Shin wrote:
> This patch adds bcmgenet_tx_poll for all active tx_rings. It can reduce
> the interrupt load and send xmit in upper network stack on time.
>
> The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] process only under 18
> descriptors is to late reclaiming transmitted skb. Therefore,
> performance degradation of xmit after 605ad7f ("tcp: refine TSO
> autosizing").
>
> Signed-off-by: Jaedon Shin <[email protected]>
> ---
> drivers/net/ethernet/broadcom/genet/bcmgenet.c | 111 +++++++++++++++++++------
> drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +
> 2 files changed, 87 insertions(+), 26 deletions(-)
Oh I only saw your patch now. It looks better than the quick fix ;)
On 27/02/15 06:27, Jaedon Shin wrote:
> This patch adds bcmgenet_tx_poll for all active tx_rings. It can reduce
> the interrupt load and send xmit in upper network stack on time.
>
> The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] process only under 18
> descriptors is to late reclaiming transmitted skb. Therefore,
> performance degradation of xmit after 605ad7f ("tcp: refine TSO
> autosizing").
This looks very similar to my previous attempts at using NAPI for TX
completion, thanks for doing this.
One thing you are not mentioning in your commit message is that ring16
used to be reclaimed/cleaned as part of the shared RX/TX NAPI context
(bcmgenet_poll), while you are now dedicating one and using
bcmgenet_tx_poll() for reclaim. This is a big enough change in the
driver structure that deserves to be reflected in the commit message.
>
> Signed-off-by: Jaedon Shin <[email protected]>
Signed-off-by: Florian Fainelli <[email protected]>
> ---
> drivers/net/ethernet/broadcom/genet/bcmgenet.c | 111 +++++++++++++++++++------
> drivers/net/ethernet/broadcom/genet/bcmgenet.h | 2 +
> 2 files changed, 87 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> index ff83c46b..d18bea1 100644
> --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
> @@ -971,13 +971,14 @@ static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv,
> }
>
> /* Unlocked version of the reclaim routine */
> -static void __bcmgenet_tx_reclaim(struct net_device *dev,
> +static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
> struct bcmgenet_tx_ring *ring)
> {
> struct bcmgenet_priv *priv = netdev_priv(dev);
> int last_tx_cn, last_c_index, num_tx_bds;
> struct enet_cb *tx_cb_ptr;
> struct netdev_queue *txq;
> + unsigned int pkts_compl = 0;
> unsigned int bds_compl;
> unsigned int c_index;
>
> @@ -1005,6 +1006,7 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev,
> tx_cb_ptr = ring->cbs + last_c_index;
> bds_compl = 0;
> if (tx_cb_ptr->skb) {
> + pkts_compl++;
> bds_compl = skb_shinfo(tx_cb_ptr->skb)->nr_frags + 1;
> dev->stats.tx_bytes += tx_cb_ptr->skb->len;
> dma_unmap_single(&dev->dev,
> @@ -1028,23 +1030,45 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev,
> last_c_index &= (num_tx_bds - 1);
> }
>
> - if (ring->free_bds > (MAX_SKB_FRAGS + 1))
> - ring->int_disable(priv, ring);
> -
> - if (netif_tx_queue_stopped(txq))
> - netif_tx_wake_queue(txq);
> + if (ring->free_bds > (MAX_SKB_FRAGS + 1)) {
> + if (netif_tx_queue_stopped(txq))
> + netif_tx_wake_queue(txq);
> + }
>
> ring->c_index = c_index;
> +
> + return pkts_compl;
> }
>
> -static void bcmgenet_tx_reclaim(struct net_device *dev,
> +static unsigned int bcmgenet_tx_reclaim(struct net_device *dev,
> struct bcmgenet_tx_ring *ring)
> {
> + unsigned int released;
> unsigned long flags;
>
> spin_lock_irqsave(&ring->lock, flags);
> - __bcmgenet_tx_reclaim(dev, ring);
> + released = __bcmgenet_tx_reclaim(dev, ring);
> spin_unlock_irqrestore(&ring->lock, flags);
> +
> + return released;
> +}
> +
> +static int bcmgenet_tx_poll(struct napi_struct *napi, int budget)
> +{
> + struct bcmgenet_tx_ring *ring =
> + container_of(napi, struct bcmgenet_tx_ring, napi);
> + unsigned int work_done = 0;
> +
> + work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring);
> +
> + if (work_done == 0) {
> + napi_complete(napi);
> + ring->int_enable(ring->priv, ring);
> +
> + return 0;
> + }
> +
> + return budget;
> }
>
> static void bcmgenet_tx_reclaim_all(struct net_device *dev)
> @@ -1302,10 +1326,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev)
> bcmgenet_tdma_ring_writel(priv, ring->index,
> ring->prod_index, TDMA_PROD_INDEX);
>
> - if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) {
> + if (ring->free_bds <= (MAX_SKB_FRAGS + 1))
> netif_tx_stop_queue(txq);
> - ring->int_enable(priv, ring);
> - }
>
> out:
> spin_unlock_irqrestore(&ring->lock, flags);
> @@ -1647,7 +1669,7 @@ static int init_umac(struct bcmgenet_priv *priv)
>
> bcmgenet_intr_disable(priv);
>
> - cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE;
> + cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE;
>
> dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__);
>
> @@ -1693,6 +1715,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
> unsigned int first_bd;
>
> spin_lock_init(&ring->lock);
> + ring->priv = priv;
> + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64);
> ring->index = index;
> if (index == DESC_INDEX) {
> ring->queue = 0;
> @@ -1738,6 +1762,17 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv,
> TDMA_WRITE_PTR);
> bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1,
> DMA_END_ADDR);
> +
> + napi_enable(&ring->napi);
> +}
> +
> +static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv,
> + unsigned int index)
> +{
> + struct bcmgenet_tx_ring *ring = &priv->tx_rings[index];
> +
> + napi_disable(&ring->napi);
> + netif_napi_del(&ring->napi);
> }
>
> /* Initialize a RDMA ring */
> @@ -1907,7 +1942,7 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv)
> return ret;
> }
>
> -static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
> +static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv)
> {
> int i;
>
> @@ -1926,6 +1961,18 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
> kfree(priv->tx_cbs);
> }
>
> +static void bcmgenet_fini_dma(struct bcmgenet_priv *priv)
> +{
> + int i;
> +
> + bcmgenet_fini_tx_ring(priv, DESC_INDEX);
> +
> + for (i = 0; i < priv->hw_params->tx_queues; i++)
> + bcmgenet_fini_tx_ring(priv, i);
> +
> + __bcmgenet_fini_dma(priv);
> +}
> +
> /* init_edma: Initialize DMA control register */
> static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
> {
> @@ -1952,7 +1999,7 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv)
> priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb),
> GFP_KERNEL);
> if (!priv->tx_cbs) {
> - bcmgenet_fini_dma(priv);
> + __bcmgenet_fini_dma(priv);
> return -ENOMEM;
> }
>
> @@ -1975,9 +2022,6 @@ static int bcmgenet_poll(struct napi_struct *napi, int budget)
> struct bcmgenet_priv, napi);
> unsigned int work_done;
>
> - /* tx reclaim */
> - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]);
> -
> work_done = bcmgenet_desc_rx(priv, budget);
>
> /* Advancing our consumer index*/
> @@ -2022,28 +2066,34 @@ static void bcmgenet_irq_task(struct work_struct *work)
> static irqreturn_t bcmgenet_isr1(int irq, void *dev_id)
> {
> struct bcmgenet_priv *priv = dev_id;
> + struct bcmgenet_tx_ring *ring;
> unsigned int index;
>
> /* Save irq status for bottom-half processing. */
> priv->irq1_stat =
> bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) &
> - ~priv->int1_mask;
> + ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS);
> /* clear interrupts */
> bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR);
>
> netif_dbg(priv, intr, priv->dev,
> "%s: IRQ=0x%x\n", __func__, priv->irq1_stat);
> +
> /* Check the MBDONE interrupts.
> * packet is done, reclaim descriptors
> */
> - if (priv->irq1_stat & 0x0000ffff) {
> - index = 0;
> - for (index = 0; index < 16; index++) {
> - if (priv->irq1_stat & (1 << index))
> - bcmgenet_tx_reclaim(priv->dev,
> - &priv->tx_rings[index]);
> + for (index = 0; index < priv->hw_params->tx_queues; index++) {
> + if (!(priv->irq1_stat & BIT(index)))
> + continue;
> +
> + ring = &priv->tx_rings[index];
> +
> + if (likely(napi_schedule_prep(&ring->napi))) {
> + ring->int_disable(priv, ring);
> + __napi_schedule(&ring->napi);
> }
> }
> +
> return IRQ_HANDLED;
> }
>
> @@ -2075,8 +2125,12 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
> }
> if (priv->irq0_stat &
> (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) {
> - /* Tx reclaim */
> - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]);
> + struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX];
> +
> + if (likely(napi_schedule_prep(&ring->napi))) {
> + ring->int_disable(priv, ring);
> + __napi_schedule(&ring->napi);
> + }
> }
> if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R |
> UMAC_IRQ_PHY_DET_F |
> @@ -2168,10 +2222,15 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl)
> static void bcmgenet_netif_start(struct net_device *dev)
> {
> struct bcmgenet_priv *priv = netdev_priv(dev);
> + int index;
>
> /* Start the network engine */
> napi_enable(&priv->napi);
>
> + for (index = 0; index < priv->hw_params->tx_queues; index++)
> + bcmgenet_intrl2_1_writel(priv, (1 << index),
> + INTRL2_CPU_MASK_CLEAR);
> +
For consistency, we would probably want to do this in umac_init()?
> umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
>
> if (phy_is_internal(priv->phydev))
> diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
> index b36ddec..0d370d1 100644
> --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
> +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
> @@ -520,6 +520,7 @@ struct bcmgenet_hw_params {
>
> struct bcmgenet_tx_ring {
> spinlock_t lock; /* ring lock */
> + struct napi_struct napi; /* NAPI per tx queue */
> unsigned int index; /* ring index */
> unsigned int queue; /* queue index */
> struct enet_cb *cbs; /* tx ring buffer control block*/
> @@ -534,6 +535,7 @@ struct bcmgenet_tx_ring {
> struct bcmgenet_tx_ring *);
> void (*int_disable)(struct bcmgenet_priv *priv,
> struct bcmgenet_tx_ring *);
> + struct bcmgenet_priv *priv;
> };
>
> /* device context */
>
--
Florian
From: Florian Fainelli <[email protected]>
Date: Fri, 27 Feb 2015 09:38:12 -0800
> On 27/02/15 06:27, Jaedon Shin wrote:
>> This patch adds bcmgenet_tx_poll for all active tx_rings. It can reduce
>> the interrupt load and send xmit in upper network stack on time.
>>
>> The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] process only under 18
>> descriptors is to late reclaiming transmitted skb. Therefore,
>> performance degradation of xmit after 605ad7f ("tcp: refine TSO
>> autosizing").
>
> This looks very similar to my previous attempts at using NAPI for TX
> completion, thanks for doing this.
>
> One thing you are not mentioning in your commit message is that ring16
> used to be reclaimed/cleaned as part of the shared RX/TX NAPI context
> (bcmgenet_poll), while you are now dedicating one and using
> bcmgenet_tx_poll() for reclaim. This is a big enough change in the
> driver structure that deserves to be reflected in the commit message.
>
>>
>> Signed-off-by: Jaedon Shin <[email protected]>
>
> Signed-off-by: Florian Fainelli <[email protected]>
Besides needing an updated commit message, this overlaps with a
cleanup patch in my queue from Petri Gynther that touches the same
exact code.
http://patchwork.ozlabs.org/patch/443604/
How would you guys like me to sort this all out? Drop Petri's
change for now?
On 27/02/15 14:10, David Miller wrote:
> From: Florian Fainelli <[email protected]>
> Date: Fri, 27 Feb 2015 09:38:12 -0800
>
>> On 27/02/15 06:27, Jaedon Shin wrote:
>>> This patch adds bcmgenet_tx_poll for all active tx_rings. It can reduce
>>> the interrupt load and send xmit in upper network stack on time.
>>>
>>> The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] process only under 18
>>> descriptors is to late reclaiming transmitted skb. Therefore,
>>> performance degradation of xmit after 605ad7f ("tcp: refine TSO
>>> autosizing").
>>
>> This looks very similar to my previous attempts at using NAPI for TX
>> completion, thanks for doing this.
>>
>> One thing you are not mentioning in your commit message is that ring16
>> used to be reclaimed/cleaned as part of the shared RX/TX NAPI context
>> (bcmgenet_poll), while you are now dedicating one and using
>> bcmgenet_tx_poll() for reclaim. This is a big enough change in the
>> driver structure that deserves to be reflected in the commit message.
>>
>>>
>>> Signed-off-by: Jaedon Shin <[email protected]>
>>
>> Signed-off-by: Florian Fainelli <[email protected]>
>
> Besides needing an updated commit message, this overlaps with a
> cleanup patch in my queue from Petri Gynther that touches the same
> exact code.
>
> http://patchwork.ozlabs.org/patch/443604/
>
> How would you guys like me to sort this all out? Drop Petri's
> change for now?
If you can take Petri's change now and Jaedon then resubmits on top of
that change, would that be acceptable?
--
Florian
From: Florian Fainelli <[email protected]>
Date: Fri, 27 Feb 2015 14:19:02 -0800
> If you can take Petri's change now and Jaedon then resubmits on top of
> that change, would that be acceptable?
I think it should go the other way around.
The bug should be fixed in 'net'.
Then Petri can resubmit the cleanup relative to that once I merge
'net' into 'net-next'. It's a cleanup so it's a net-next change.
On 27/02/15 14:38, David Miller wrote:
> From: Florian Fainelli <[email protected]>
> Date: Fri, 27 Feb 2015 14:19:02 -0800
>
>> If you can take Petri's change now and Jaedon then resubmits on top of
>> that change, would that be acceptable?
>
> I think it should go the other way around.
I was not quite sure whether Jaedon's change could be suitable for 'net'
considering how many lines of code it involved changing, but ok, that's
better that way.
>
> The bug should be fixed in 'net'.
>
> Then Petri can resubmit the cleanup relative to that once I merge
> 'net' into 'net-next'. It's a cleanup so it's a net-next change.
>
Sure.
--
Florian