2023-10-23 11:45:43

by Shinas Rasheed

[permalink] [raw]
Subject: [PATCH net-next 0/3] Cleanup and optimizations to transmit code

Cleanup dma sync calls, add xmit_more functionality and then further
remove atomic variable usage in the prior.

Shinas Rasheed (3):
octeon_ep: remove dma sync in trasmit path
octeon_ep: implement xmit_more in transmit
octeon_ep: remove atomic variable usage in Tx data path

.../ethernet/marvell/octeon_ep/octep_config.h | 3 +-
.../ethernet/marvell/octeon_ep/octep_main.c | 35 ++++++++++---------
.../ethernet/marvell/octeon_ep/octep_main.h | 9 +++++
.../net/ethernet/marvell/octeon_ep/octep_tx.c | 5 +--
.../net/ethernet/marvell/octeon_ep/octep_tx.h | 3 --
5 files changed, 30 insertions(+), 25 deletions(-)

--
2.25.1


2023-10-23 11:46:11

by Shinas Rasheed

[permalink] [raw]
Subject: [PATCH net-next 2/3] octeon_ep: implement xmit_more in transmit

Adds xmit_more handling in tx datapath for octeon_ep pf.

Signed-off-by: Shinas Rasheed <[email protected]>
---
.../ethernet/marvell/octeon_ep/octep_config.h | 2 +-
.../ethernet/marvell/octeon_ep/octep_main.c | 19 +++++++++++++++----
2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
index 1622a6ebf036..ed8b1ace56b9 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
@@ -15,7 +15,7 @@
/* Tx Queue: maximum descriptors per ring */
#define OCTEP_IQ_MAX_DESCRIPTORS 1024
/* Minimum input (Tx) requests to be enqueued to ring doorbell */
-#define OCTEP_DB_MIN 1
+#define OCTEP_DB_MIN 8
/* Packet threshold for Tx queue interrupt */
#define OCTEP_IQ_INTR_THRESHOLD 0x0

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index bf1e376a4232..730443ba2f5b 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -818,6 +818,7 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
struct octep_iq *iq;
skb_frag_t *frag;
u16 nr_frags, si;
+ int xmit_more;
u16 q_no, wi;

q_no = skb_get_queue_mapping(skb);
@@ -892,18 +893,28 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
}

netdev_tx_sent_queue(iq->netdev_q, skb->len);
+
+ xmit_more = netdev_xmit_more();
+
skb_tx_timestamp(skb);
atomic_inc(&iq->instr_pending);
+ iq->fill_cnt++;
wi++;
if (wi == iq->max_count)
wi = 0;
iq->host_write_index = wi;
+ if (xmit_more &&
+ (atomic_read(&iq->instr_pending) <
+ (iq->max_count - OCTEP_WAKE_QUEUE_THRESHOLD)) &&
+ iq->fill_cnt < iq->fill_threshold)
+ return NETDEV_TX_OK;
+
/* Flush the hw descriptor before writing to doorbell */
wmb();
-
- /* Ring Doorbell to notify the NIC there is a new packet */
- writel(1, iq->doorbell_reg);
- iq->stats.instr_posted++;
+ /* Ring Doorbell to notify the NIC of new packets */
+ writel(iq->fill_cnt, iq->doorbell_reg);
+ iq->stats.instr_posted += iq->fill_cnt;
+ iq->fill_cnt = 0;
return NETDEV_TX_OK;

dma_map_sg_err:
--
2.25.1

2023-10-23 11:46:20

by Shinas Rasheed

[permalink] [raw]
Subject: [PATCH net-next 1/3] octeon_ep: remove dma sync in trasmit path

Cleanup dma sync calls for scatter gather
mappings in trasmit path

Signed-off-by: Shinas Rasheed <[email protected]>
---
drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 7 -------
1 file changed, 7 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 552970c7dec0..bf1e376a4232 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -869,9 +869,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
if (dma_mapping_error(iq->dev, dma))
goto dma_map_err;

- dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma,
- OCTEP_SGLIST_SIZE_PER_PKT,
- DMA_TO_DEVICE);
memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT);
sglist[0].len[3] = len;
sglist[0].dma_ptr[0] = dma;
@@ -891,10 +888,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
frag++;
si++;
}
- dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma,
- OCTEP_SGLIST_SIZE_PER_PKT,
- DMA_TO_DEVICE);
-
hw_desc->dptr = tx_buffer->sglist_dma;
}

--
2.25.1

2023-10-23 11:47:02

by Shinas Rasheed

[permalink] [raw]
Subject: [PATCH net-next 3/3] octeon_ep: remove atomic variable usage in Tx data path

Replace atomic variable "instr_pending" which represents number of
posted tx instructions pending completion, with host_write_idx and
flush_index variables in the xmit and completion processing respectively.

Signed-off-by: Shinas Rasheed <[email protected]>
---
drivers/net/ethernet/marvell/octeon_ep/octep_config.h | 1 +
drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 11 ++++-------
drivers/net/ethernet/marvell/octeon_ep/octep_main.h | 9 +++++++++
drivers/net/ethernet/marvell/octeon_ep/octep_tx.c | 5 +----
drivers/net/ethernet/marvell/octeon_ep/octep_tx.h | 3 ---
5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
index ed8b1ace56b9..91cfa19c65b9 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
@@ -13,6 +13,7 @@
#define OCTEP_64BYTE_INSTR 64

/* Tx Queue: maximum descriptors per ring */
+/* This needs to be a power of 2 */
#define OCTEP_IQ_MAX_DESCRIPTORS 1024
/* Minimum input (Tx) requests to be enqueued to ring doorbell */
#define OCTEP_DB_MIN 8
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 730443ba2f5b..d7498a864385 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -777,7 +777,7 @@ static int octep_stop(struct net_device *netdev)
*/
static inline int octep_iq_full_check(struct octep_iq *iq)
{
- if (likely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+ if (likely((IQ_INSTR_SPACE(iq)) >
OCTEP_WAKE_QUEUE_THRESHOLD))
return 0;

@@ -787,7 +787,7 @@ static inline int octep_iq_full_check(struct octep_iq *iq)
/* check again and restart the queue, in case NAPI has just freed
* enough Tx ring entries.
*/
- if (unlikely((iq->max_count - atomic_read(&iq->instr_pending)) >=
+ if (unlikely(IQ_INSTR_SPACE(iq) >
OCTEP_WAKE_QUEUE_THRESHOLD)) {
netif_start_subqueue(iq->netdev, iq->q_no);
iq->stats.restart_cnt++;
@@ -897,14 +897,11 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
xmit_more = netdev_xmit_more();

skb_tx_timestamp(skb);
- atomic_inc(&iq->instr_pending);
iq->fill_cnt++;
wi++;
- if (wi == iq->max_count)
- wi = 0;
- iq->host_write_index = wi;
+ iq->host_write_index = wi & iq->ring_size_mask;
if (xmit_more &&
- (atomic_read(&iq->instr_pending) <
+ (IQ_INSTR_PENDING(iq) <
(iq->max_count - OCTEP_WAKE_QUEUE_THRESHOLD)) &&
iq->fill_cnt < iq->fill_threshold)
return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
index 6df902ebb7f3..c33e046b69a4 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h
@@ -40,6 +40,15 @@
#define OCTEP_OQ_INTR_RESEND_BIT 59

#define OCTEP_MMIO_REGIONS 3
+
+#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \
+ ((iq__)->host_write_index - (iq__)->flush_index) & \
+ (iq__)->ring_size_mask; \
+ })
+#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \
+ (iq_)->max_count - IQ_INSTR_PENDING(iq_); \
+ })
+
/* PCI address space mapping information.
* Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of
* Octeon gets mapped to different physical address spaces in
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
index d0adb82d65c3..06851b78aa28 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.c
@@ -21,7 +21,6 @@ static void octep_iq_reset_indices(struct octep_iq *iq)
iq->flush_index = 0;
iq->pkts_processed = 0;
iq->pkt_in_done = 0;
- atomic_set(&iq->instr_pending, 0);
}

/**
@@ -82,7 +81,6 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
}

iq->pkts_processed += compl_pkts;
- atomic_sub(compl_pkts, &iq->instr_pending);
iq->stats.instr_completed += compl_pkts;
iq->stats.bytes_sent += compl_bytes;
iq->stats.sgentry_sent += compl_sg;
@@ -91,7 +89,7 @@ int octep_iq_process_completions(struct octep_iq *iq, u16 budget)
netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes);

if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) &&
- ((iq->max_count - atomic_read(&iq->instr_pending)) >
+ (IQ_INSTR_SPACE(iq) >
OCTEP_WAKE_QUEUE_THRESHOLD))
netif_wake_subqueue(iq->netdev, iq->q_no);
return !budget;
@@ -144,7 +142,6 @@ static void octep_iq_free_pending(struct octep_iq *iq)
dev_kfree_skb_any(skb);
}

- atomic_set(&iq->instr_pending, 0);
iq->flush_index = fi;
netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no));
}
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
index 86c98b13fc44..1ba4ff65e54d 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_tx.h
@@ -172,9 +172,6 @@ struct octep_iq {
/* Statistics for this input queue. */
struct octep_iq_stats stats;

- /* This field keeps track of the instructions pending in this queue. */
- atomic_t instr_pending;
-
/* Pointer to the Virtual Base addr of the input ring. */
struct octep_tx_desc_hw *desc_ring;

--
2.25.1

2023-10-23 12:02:53

by Wojciech Drewek

[permalink] [raw]
Subject: RE: [PATCH net-next 1/3] octeon_ep: remove dma sync in trasmit path



> -----Original Message-----
> From: Shinas Rasheed <[email protected]>
> Sent: Monday, October 23, 2023 1:45 PM
> To: [email protected]; [email protected]
> Cc: [email protected]; [email protected]; Gallen, Erwan <[email protected]>; mschmidt <[email protected]>;
> [email protected]; [email protected]; [email protected]; [email protected]; Shinas Rasheed <[email protected]>;
> Veerasenareddy Burru <[email protected]>; Sathesh Edara <[email protected]>; Eric Dumazet <[email protected]>
> Subject: [PATCH net-next 1/3] octeon_ep: remove dma sync in trasmit path
>
> Cleanup dma sync calls for scatter gather
> mappings in trasmit path
>
> Signed-off-by: Shinas Rasheed <[email protected]>

Hi Shinas,

The commit msg only says what changed without justification.
What is the reasoning behind those changes?

> ---
> drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 7 -------
> 1 file changed, 7 deletions(-)
>
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> index 552970c7dec0..bf1e376a4232 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> @@ -869,9 +869,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
> if (dma_mapping_error(iq->dev, dma))
> goto dma_map_err;
>
> - dma_sync_single_for_cpu(iq->dev, tx_buffer->sglist_dma,
> - OCTEP_SGLIST_SIZE_PER_PKT,
> - DMA_TO_DEVICE);
> memset(sglist, 0, OCTEP_SGLIST_SIZE_PER_PKT);
> sglist[0].len[3] = len;
> sglist[0].dma_ptr[0] = dma;
> @@ -891,10 +888,6 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
> frag++;
> si++;
> }
> - dma_sync_single_for_device(iq->dev, tx_buffer->sglist_dma,
> - OCTEP_SGLIST_SIZE_PER_PKT,
> - DMA_TO_DEVICE);
> -
> hw_desc->dptr = tx_buffer->sglist_dma;
> }
>
> --
> 2.25.1
>

2023-10-23 12:14:07

by Wojciech Drewek

[permalink] [raw]
Subject: RE: [PATCH net-next 2/3] octeon_ep: implement xmit_more in transmit



> -----Original Message-----
> From: Shinas Rasheed <[email protected]>
> Sent: Monday, October 23, 2023 1:45 PM
> To: [email protected]; [email protected]
> Cc: [email protected]; [email protected]; Gallen, Erwan <[email protected]>; mschmidt <[email protected]>;
> [email protected]; [email protected]; [email protected]; [email protected]; Shinas Rasheed <[email protected]>;
> Veerasenareddy Burru <[email protected]>; Sathesh Edara <[email protected]>; Eric Dumazet <[email protected]>
> Subject: [PATCH net-next 2/3] octeon_ep: implement xmit_more in transmit
>
> Adds xmit_more handling in tx datapath for octeon_ep pf.

Imperative mode is preferred :)
Besides that:
Reviewed-by: Wojciech Drewek <[email protected]>

>
> Signed-off-by: Shinas Rasheed <[email protected]>
> ---
> .../ethernet/marvell/octeon_ep/octep_config.h | 2 +-
> .../ethernet/marvell/octeon_ep/octep_main.c | 19 +++++++++++++++----
> 2 files changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
> index 1622a6ebf036..ed8b1ace56b9 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_config.h
> @@ -15,7 +15,7 @@
> /* Tx Queue: maximum descriptors per ring */
> #define OCTEP_IQ_MAX_DESCRIPTORS 1024
> /* Minimum input (Tx) requests to be enqueued to ring doorbell */
> -#define OCTEP_DB_MIN 1
> +#define OCTEP_DB_MIN 8
> /* Packet threshold for Tx queue interrupt */
> #define OCTEP_IQ_INTR_THRESHOLD 0x0
>
> diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> index bf1e376a4232..730443ba2f5b 100644
> --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
> @@ -818,6 +818,7 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
> struct octep_iq *iq;
> skb_frag_t *frag;
> u16 nr_frags, si;
> + int xmit_more;
> u16 q_no, wi;
>
> q_no = skb_get_queue_mapping(skb);
> @@ -892,18 +893,28 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb,
> }
>
> netdev_tx_sent_queue(iq->netdev_q, skb->len);
> +
> + xmit_more = netdev_xmit_more();
> +
> skb_tx_timestamp(skb);
> atomic_inc(&iq->instr_pending);
> + iq->fill_cnt++;
> wi++;
> if (wi == iq->max_count)
> wi = 0;
> iq->host_write_index = wi;
> + if (xmit_more &&
> + (atomic_read(&iq->instr_pending) <
> + (iq->max_count - OCTEP_WAKE_QUEUE_THRESHOLD)) &&
> + iq->fill_cnt < iq->fill_threshold)
> + return NETDEV_TX_OK;
> +
> /* Flush the hw descriptor before writing to doorbell */
> wmb();
> -
> - /* Ring Doorbell to notify the NIC there is a new packet */
> - writel(1, iq->doorbell_reg);
> - iq->stats.instr_posted++;
> + /* Ring Doorbell to notify the NIC of new packets */
> + writel(iq->fill_cnt, iq->doorbell_reg);
> + iq->stats.instr_posted += iq->fill_cnt;
> + iq->fill_cnt = 0;
> return NETDEV_TX_OK;
>
> dma_map_sg_err:
> --
> 2.25.1
>

2023-10-23 16:09:05

by Shinas Rasheed

[permalink] [raw]
Subject: Re: [PATCH net-next 2/3] octeon_ep: implement xmit_more in transmit

Sure, will update it since the first patch changelog requires more explanation as well