2023-04-13 03:30:25

by Song, Yoong Siang

[permalink] [raw]
Subject: [PATCH net-next v4 1/3] net: stmmac: introduce wrapper for struct xdp_buff

Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx
metadata via kfuncs.

Reviewed-by: Jacob Keller <[email protected]>
Signed-off-by: Song Yoong Siang <[email protected]>
---
drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++++
.../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++---------
2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 3d15e1e92e18..ac8ccf851708 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -92,6 +92,10 @@ struct stmmac_rx_buffer {
dma_addr_t sec_addr;
};

+struct stmmac_xdp_buff {
+ struct xdp_buff xdp;
+};
+
struct stmmac_rx_queue {
u32 rx_count_frames;
u32 queue_index;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d7fcab057032..6ffce52ca837 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
int status = 0, coe = priv->hw->rx_csum;
unsigned int next_entry = rx_q->cur_rx;
enum dma_data_direction dma_dir;
+ struct stmmac_xdp_buff ctx = {};
unsigned int desc_size;
struct sk_buff *skb = NULL;
- struct xdp_buff xdp;
int xdp_status = 0;
int buf_sz;

@@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
dma_sync_single_for_cpu(priv->device, buf->addr,
buf1_len, dma_dir);

- xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
- xdp_prepare_buff(&xdp, page_address(buf->page),
+ xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
+ xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
buf->page_offset, buf1_len, false);

- pre_len = xdp.data_end - xdp.data_hard_start -
+ pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
buf->page_offset;
- skb = stmmac_xdp_run_prog(priv, &xdp);
+ skb = stmmac_xdp_run_prog(priv, &ctx.xdp);
/* Due xdp_adjust_tail: DMA sync for_device
* cover max len CPU touch
*/
- sync_len = xdp.data_end - xdp.data_hard_start -
+ sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
buf->page_offset;
sync_len = max(sync_len, pre_len);

@@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)

if (xdp_res & STMMAC_XDP_CONSUMED) {
page_pool_put_page(rx_q->page_pool,
- virt_to_head_page(xdp.data),
+ virt_to_head_page(ctx.xdp.data),
sync_len, true);
buf->page = NULL;
priv->dev->stats.rx_dropped++;
@@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)

if (!skb) {
/* XDP program may expand or reduce tail */
- buf1_len = xdp.data_end - xdp.data;
+ buf1_len = ctx.xdp.data_end - ctx.xdp.data;

skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
if (!skb) {
@@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
}

/* XDP program may adjust header */
- skb_copy_to_linear_data(skb, xdp.data, buf1_len);
+ skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
skb_put(skb, buf1_len);

/* Data payload copied into SKB, page ready for recycle */
--
2.34.1


2023-04-13 17:12:34

by Jesper Dangaard Brouer

[permalink] [raw]
Subject: Re: [PATCH net-next v4 1/3] net: stmmac: introduce wrapper for struct xdp_buff


On 13/04/2023 05.25, Song Yoong Siang wrote:
> Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx
> metadata via kfuncs.
>
> Reviewed-by: Jacob Keller <[email protected]>
> Signed-off-by: Song Yoong Siang <[email protected]>
> ---
> drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++++
> .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++---------
> 2 files changed, 13 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> index 3d15e1e92e18..ac8ccf851708 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
> @@ -92,6 +92,10 @@ struct stmmac_rx_buffer {
> dma_addr_t sec_addr;
> };
>
> +struct stmmac_xdp_buff {
> + struct xdp_buff xdp;
> +};
> +
> struct stmmac_rx_queue {
> u32 rx_count_frames;
> u32 queue_index;
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index d7fcab057032..6ffce52ca837 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
> int status = 0, coe = priv->hw->rx_csum;
> unsigned int next_entry = rx_q->cur_rx;
> enum dma_data_direction dma_dir;
> + struct stmmac_xdp_buff ctx = {};

This code trick {} will zero out the struct.

Is this done purpose and really needed?

On x86_64 this unfortunately generates an asm instruction: rep stos

A repeated store string operation, for zeroing out memory, which is
slow. (Because[1] it supports be suspended by an exception or interrupt,
which requires it to store/restore CPU flags).

[1] https://www.felixcloutier.com/x86/rep:repe:repz:repne:repnz#tbl-4-22


> unsigned int desc_size;
> struct sk_buff *skb = NULL;
> - struct xdp_buff xdp;
> int xdp_status = 0;
> int buf_sz;
>
> @@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
> dma_sync_single_for_cpu(priv->device, buf->addr,
> buf1_len, dma_dir);
>
> - xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
> - xdp_prepare_buff(&xdp, page_address(buf->page),
> + xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
> + xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
> buf->page_offset, buf1_len, false);
>
> - pre_len = xdp.data_end - xdp.data_hard_start -
> + pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
> buf->page_offset;
> - skb = stmmac_xdp_run_prog(priv, &xdp);
> + skb = stmmac_xdp_run_prog(priv, &ctx.xdp);
> /* Due xdp_adjust_tail: DMA sync for_device
> * cover max len CPU touch
> */
> - sync_len = xdp.data_end - xdp.data_hard_start -
> + sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
> buf->page_offset;
> sync_len = max(sync_len, pre_len);
>
> @@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>
> if (xdp_res & STMMAC_XDP_CONSUMED) {
> page_pool_put_page(rx_q->page_pool,
> - virt_to_head_page(xdp.data),
> + virt_to_head_page(ctx.xdp.data),
> sync_len, true);
> buf->page = NULL;
> priv->dev->stats.rx_dropped++;
> @@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
>
> if (!skb) {
> /* XDP program may expand or reduce tail */
> - buf1_len = xdp.data_end - xdp.data;
> + buf1_len = ctx.xdp.data_end - ctx.xdp.data;
>
> skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
> if (!skb) {
> @@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
> }
>
> /* XDP program may adjust header */
> - skb_copy_to_linear_data(skb, xdp.data, buf1_len);
> + skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
> skb_put(skb, buf1_len);
>
> /* Data payload copied into SKB, page ready for recycle */

2023-04-14 01:18:19

by Song, Yoong Siang

[permalink] [raw]
Subject: RE: [PATCH net-next v4 1/3] net: stmmac: introduce wrapper for struct xdp_buff

On Friday, April 14, 2023 1:10 AM , Jesper Dangaard Brouer <[email protected]> wrote:
>On 13/04/2023 05.25, Song Yoong Siang wrote:
>> Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx
>> metadata via kfuncs.
>>
>> Reviewed-by: Jacob Keller <[email protected]>
>> Signed-off-by: Song Yoong Siang <[email protected]>
>> ---
>> drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++++
>> .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++---------
>> 2 files changed, 13 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> index 3d15e1e92e18..ac8ccf851708 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
>> @@ -92,6 +92,10 @@ struct stmmac_rx_buffer {
>> dma_addr_t sec_addr;
>> };
>>
>> +struct stmmac_xdp_buff {
>> + struct xdp_buff xdp;
>> +};
>> +
>> struct stmmac_rx_queue {
>> u32 rx_count_frames;
>> u32 queue_index;
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> index d7fcab057032..6ffce52ca837 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
>> @@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int
>limit, u32 queue)
>> int status = 0, coe = priv->hw->rx_csum;
>> unsigned int next_entry = rx_q->cur_rx;
>> enum dma_data_direction dma_dir;
>> + struct stmmac_xdp_buff ctx = {};
>
>This code trick {} will zero out the struct.
>
>Is this done purpose and really needed?
>
>On x86_64 this unfortunately generates an asm instruction: rep stos
>
>A repeated store string operation, for zeroing out memory, which is slow.
>(Because[1] it supports be suspended by an exception or interrupt, which requires
>it to store/restore CPU flags).
>
>[1] https://www.felixcloutier.com/x86/rep:repe:repz:repne:repnz#tbl-4-22

Thanks for the useful information and link. I will submit v5 to remove {}.
>
>
>> unsigned int desc_size;
>> struct sk_buff *skb = NULL;
>> - struct xdp_buff xdp;
>> int xdp_status = 0;
>> int buf_sz;
>>
>> @@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int
>limit, u32 queue)
>> dma_sync_single_for_cpu(priv->device, buf->addr,
>> buf1_len, dma_dir);
>>
>> - xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq);
>> - xdp_prepare_buff(&xdp, page_address(buf->page),
>> + xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq);
>> + xdp_prepare_buff(&ctx.xdp, page_address(buf->page),
>> buf->page_offset, buf1_len, false);
>>
>> - pre_len = xdp.data_end - xdp.data_hard_start -
>> + pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
>> buf->page_offset;
>> - skb = stmmac_xdp_run_prog(priv, &xdp);
>> + skb = stmmac_xdp_run_prog(priv, &ctx.xdp);
>> /* Due xdp_adjust_tail: DMA sync for_device
>> * cover max len CPU touch
>> */
>> - sync_len = xdp.data_end - xdp.data_hard_start -
>> + sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start -
>> buf->page_offset;
>> sync_len = max(sync_len, pre_len);
>>
>> @@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv,
>> int limit, u32 queue)
>>
>> if (xdp_res & STMMAC_XDP_CONSUMED) {
>> page_pool_put_page(rx_q->page_pool,
>> -
>virt_to_head_page(xdp.data),
>> +
>virt_to_head_page(ctx.xdp.data),
>> sync_len, true);
>> buf->page = NULL;
>> priv->dev->stats.rx_dropped++;
>> @@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv,
>> int limit, u32 queue)
>>
>> if (!skb) {
>> /* XDP program may expand or reduce tail */
>> - buf1_len = xdp.data_end - xdp.data;
>> + buf1_len = ctx.xdp.data_end - ctx.xdp.data;
>>
>> skb = napi_alloc_skb(&ch->rx_napi, buf1_len);
>> if (!skb) {
>> @@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int
>limit, u32 queue)
>> }
>>
>> /* XDP program may adjust header */
>> - skb_copy_to_linear_data(skb, xdp.data, buf1_len);
>> + skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len);
>> skb_put(skb, buf1_len);
>>
>> /* Data payload copied into SKB, page ready for recycle
>*/