2018-03-20 02:45:16

by Sinan Kaya

[permalink] [raw]
Subject: [PATCH v4 17/17] net: ena: Eliminate duplicate barriers on weakly-ordered archs

Code includes barrier() followed by writel(). writel() already has a
barrier
on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a barrier().

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <[email protected]>
---
drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++++--
drivers/net/ethernet/amazon/ena/ena_eth_com.h | 22 ++++++++++++++++++++--
drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++--
3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index bf2de52..b6e628f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -631,7 +631,8 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
*/
wmb();

- writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+ writel_relaxed(mmio_read_reg,
+ ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);

for (i = 0; i < timeout; i++) {
if (read_resp->req_id == mmio_read->seq_num)
@@ -1826,7 +1827,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)

/* write the aenq doorbell after all AENQ descriptors were read */
mb();
- writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+ writel_relaxed((u32)aenq->head,
+ dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
}

int ena_com_dev_reset(struct ena_com_dev *ena_dev,
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 2f76572..09ef7cd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -107,7 +107,8 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
return io_sq->q_depth - 1 - cnt;
}

-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
+ bool relaxed)
{
u16 tail;

@@ -116,7 +117,24 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
io_sq->qid, tail);

- writel(tail, io_sq->db_addr);
+ if (relaxed)
+ writel_relaxed(tail, io_sq->db_addr);
+ else
+ writel(tail, io_sq->db_addr);
+
+ return 0;
+}
+
+static inline int ena_com_write_sq_doorbell_rel(struct ena_com_io_sq *io_sq)
+{
+ u16 tail;
+
+ tail = io_sq->tail;
+
+ pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
+ io_sq->qid, tail);
+
+ writel_relaxed(tail, io_sq->db_addr);

return 0;
}
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6975150..0530201 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -556,7 +556,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
* issue a doorbell
*/
wmb();
- ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+ ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
}

rx_ring->next_to_use = next_to_use;
@@ -2151,7 +2151,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)

if (netif_xmit_stopped(txq) || !skb->xmit_more) {
/* trigger the dma engine */
- ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+ ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
u64_stats_update_begin(&tx_ring->syncp);
tx_ring->tx_stats.doorbells++;
u64_stats_update_end(&tx_ring->syncp);
--
2.7.4



2018-03-25 12:08:08

by Belgazal, Netanel

[permalink] [raw]
Subject: Re: [PATCH v4 17/17] net: ena: Eliminate duplicate barriers on weakly-ordered archs

I think you should either add a parameter to ena_com_write_sq_doorbell() or add ena_com_write_sq_doorbell_rel().
Right now, you have unused function.

On 3/20/18, 4:43 AM, "Sinan Kaya" <[email protected]> wrote:

Code includes barrier() followed by writel(). writel() already has a
barrier
on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a barrier().

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <[email protected]>
---
drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++++--
drivers/net/ethernet/amazon/ena/ena_eth_com.h | 22 ++++++++++++++++++++--
drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++--
3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index bf2de52..b6e628f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -631,7 +631,8 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
*/
wmb();

- writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+ writel_relaxed(mmio_read_reg,
+ ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);

for (i = 0; i < timeout; i++) {
if (read_resp->req_id == mmio_read->seq_num)
@@ -1826,7 +1827,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)

/* write the aenq doorbell after all AENQ descriptors were read */
mb();
- writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+ writel_relaxed((u32)aenq->head,
+ dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
}

int ena_com_dev_reset(struct ena_com_dev *ena_dev,
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 2f76572..09ef7cd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -107,7 +107,8 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
return io_sq->q_depth - 1 - cnt;
}

-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
+ bool relaxed)
{
u16 tail;

@@ -116,7 +117,24 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
io_sq->qid, tail);

- writel(tail, io_sq->db_addr);
+ if (relaxed)
+ writel_relaxed(tail, io_sq->db_addr);
+ else
+ writel(tail, io_sq->db_addr);
+
+ return 0;
+}
+
+static inline int ena_com_write_sq_doorbell_rel(struct ena_com_io_sq *io_sq)
+{
+ u16 tail;
+
+ tail = io_sq->tail;
+
+ pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
+ io_sq->qid, tail);
+
+ writel_relaxed(tail, io_sq->db_addr);

return 0;
}
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6975150..0530201 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -556,7 +556,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
* issue a doorbell
*/
wmb();
- ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+ ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
}

rx_ring->next_to_use = next_to_use;
@@ -2151,7 +2151,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)

if (netif_xmit_stopped(txq) || !skb->xmit_more) {
/* trigger the dma engine */
- ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+ ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
u64_stats_update_begin(&tx_ring->syncp);
tx_ring->tx_stats.doorbells++;
u64_stats_update_end(&tx_ring->syncp);
--
2.7.4




2018-03-25 13:34:52

by Sinan Kaya

[permalink] [raw]
Subject: Re: [PATCH v4 17/17] net: ena: Eliminate duplicate barriers on weakly-ordered archs

On 2018-03-25 08:06, Belgazal, Netanel wrote:
> I think you should either add a parameter to
> ena_com_write_sq_doorbell() or add ena_com_write_sq_doorbell_rel().
> Right now, you have unused function.

That is true. I got rid of ena_com_write_sq_doorbell_rel.

>
> On 3/20/18, 4:43 AM, "Sinan Kaya" <[email protected]> wrote:
>
> Code includes barrier() followed by writel(). writel() already has
> a
> barrier
> on some architectures like arm64.
>
> This ends up CPU observing two barriers back to back before
> executing the
> register write.
>
> Create a new wrapper function with relaxed write operator. Use the
> new
> wrapper when a write is following a barrier().
>
> Since code already has an explicit barrier call, changing writel()
> to
> writel_relaxed().
>
> Signed-off-by: Sinan Kaya <[email protected]>
> ---
> drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++++--
> drivers/net/ethernet/amazon/ena/ena_eth_com.h | 22
> ++++++++++++++++++++--
> drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++--
> 3 files changed, 26 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c
> b/drivers/net/ethernet/amazon/ena/ena_com.c
> index bf2de52..b6e628f 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_com.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_com.c
> @@ -631,7 +631,8 @@ static u32 ena_com_reg_bar_read32(struct
> ena_com_dev *ena_dev, u16 offset)
> */
> wmb();
>
> - writel(mmio_read_reg, ena_dev->reg_bar +
> ENA_REGS_MMIO_REG_READ_OFF);
> + writel_relaxed(mmio_read_reg,
> + ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
>
> for (i = 0; i < timeout; i++) {
> if (read_resp->req_id == mmio_read->seq_num)
> @@ -1826,7 +1827,8 @@ void ena_com_aenq_intr_handler(struct
> ena_com_dev *dev, void *data)
>
> /* write the aenq doorbell after all AENQ descriptors were read
> */
> mb();
> - writel((u32)aenq->head, dev->reg_bar +
> ENA_REGS_AENQ_HEAD_DB_OFF);
> + writel_relaxed((u32)aenq->head,
> + dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
> }
>
> int ena_com_dev_reset(struct ena_com_dev *ena_dev,
> diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
> b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
> index 2f76572..09ef7cd 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
> +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
> @@ -107,7 +107,8 @@ static inline int
> ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
> return io_sq->q_depth - 1 - cnt;
> }
>
> -static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq
> *io_sq)
> +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq
> *io_sq,
> + bool relaxed)
> {
> u16 tail;
>
> @@ -116,7 +117,24 @@ static inline int
> ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
> pr_debug("write submission queue doorbell for queue: %d tail:
> %d\n",
> io_sq->qid, tail);
>
> - writel(tail, io_sq->db_addr);
> + if (relaxed)
> + writel_relaxed(tail, io_sq->db_addr);
> + else
> + writel(tail, io_sq->db_addr);
> +
> + return 0;
> +}
> +
> +static inline int ena_com_write_sq_doorbell_rel(struct
> ena_com_io_sq *io_sq)
> +{
> + u16 tail;
> +
> + tail = io_sq->tail;
> +
> + pr_debug("write submission queue doorbell for queue: %d tail:
> %d\n",
> + io_sq->qid, tail);
> +
> + writel_relaxed(tail, io_sq->db_addr);
>
> return 0;
> }
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index 6975150..0530201 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -556,7 +556,7 @@ static int ena_refill_rx_bufs(struct ena_ring
> *rx_ring, u32 num)
> * issue a doorbell
> */
> wmb();
> - ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
> + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
> }
>
> rx_ring->next_to_use = next_to_use;
> @@ -2151,7 +2151,7 @@ static netdev_tx_t ena_start_xmit(struct
> sk_buff *skb, struct net_device *dev)
>
> if (netif_xmit_stopped(txq) || !skb->xmit_more) {
> /* trigger the dma engine */
> - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
> + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
> u64_stats_update_begin(&tx_ring->syncp);
> tx_ring->tx_stats.doorbells++;
> u64_stats_update_end(&tx_ring->syncp);
> --
> 2.7.4