Add page pool for RX buffers for faster buffer cycle and reduce CPU
usage.
The standard page pool API is used.
Signed-off-by: Haiyang Zhang <[email protected]>
---
V2:
Use the standard page pool API as suggested by Jesper Dangaard Brouer
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 101 +++++++++++++++---
include/net/mana/mana.h | 3 +
2 files changed, 89 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a499e460594b..0b557b70cd45 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1414,8 +1414,8 @@ static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
return skb;
}
-static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
- struct mana_rxq *rxq)
+static void mana_rx_skb(void *buf_va, bool from_pool,
+ struct mana_rxcomp_oob *cqe, struct mana_rxq *rxq)
{
struct mana_stats_rx *rx_stats = &rxq->stats;
struct net_device *ndev = rxq->ndev;
@@ -1437,8 +1437,12 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
- if (act == XDP_REDIRECT && !rxq->xdp_rc)
+ if (act == XDP_REDIRECT && !rxq->xdp_rc) {
+ if (from_pool)
+ page_pool_release_page(rxq->page_pool,
+ virt_to_head_page(buf_va));
return;
+ }
if (act != XDP_PASS && act != XDP_TX)
goto drop_xdp;
@@ -1448,6 +1452,9 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
if (!skb)
goto drop;
+ if (from_pool)
+ skb_mark_for_recycle(skb);
+
skb->dev = napi->dev;
skb->protocol = eth_type_trans(skb, ndev);
@@ -1498,9 +1505,14 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
u64_stats_update_end(&rx_stats->syncp);
drop:
- WARN_ON_ONCE(rxq->xdp_save_va);
- /* Save for reuse */
- rxq->xdp_save_va = buf_va;
+ if (from_pool) {
+ page_pool_recycle_direct(rxq->page_pool,
+ virt_to_head_page(buf_va));
+ } else {
+ WARN_ON_ONCE(rxq->xdp_save_va);
+ /* Save for reuse */
+ rxq->xdp_save_va = buf_va;
+ }
++ndev->stats.rx_dropped;
@@ -1508,11 +1520,13 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
}
static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
- dma_addr_t *da, bool is_napi)
+ dma_addr_t *da, bool *from_pool, bool is_napi)
{
struct page *page;
void *va;
+ *from_pool = false;
+
/* Reuse XDP dropped page if available */
if (rxq->xdp_save_va) {
va = rxq->xdp_save_va;
@@ -1533,7 +1547,13 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
return NULL;
}
} else {
- page = dev_alloc_page();
+ if (is_napi) {
+ page = page_pool_dev_alloc_pages(rxq->page_pool);
+ *from_pool = true;
+ } else {
+ page = dev_alloc_page();
+ }
+
if (!page)
return NULL;
@@ -1543,7 +1563,11 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
DMA_FROM_DEVICE);
if (dma_mapping_error(dev, *da)) {
- put_page(virt_to_head_page(va));
+ if (*from_pool)
+ page_pool_put_full_page(rxq->page_pool, page, true);
+ else
+ put_page(virt_to_head_page(va));
+
return NULL;
}
@@ -1552,21 +1576,25 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
/* Allocate frag for rx buffer, and save the old buf */
static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq,
- struct mana_recv_buf_oob *rxoob, void **old_buf)
+ struct mana_recv_buf_oob *rxoob, void **old_buf,
+ bool *old_fp)
{
+ bool from_pool;
dma_addr_t da;
void *va;
- va = mana_get_rxfrag(rxq, dev, &da, true);
+ va = mana_get_rxfrag(rxq, dev, &da, &from_pool, true);
if (!va)
return;
dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize,
DMA_FROM_DEVICE);
*old_buf = rxoob->buf_va;
+ *old_fp = rxoob->from_pool;
rxoob->buf_va = va;
rxoob->sgl[0].address = da;
+ rxoob->from_pool = from_pool;
}
static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
@@ -1580,6 +1608,7 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
struct device *dev = gc->dev;
void *old_buf = NULL;
u32 curr, pktlen;
+ bool old_fp;
apc = netdev_priv(ndev);
@@ -1622,12 +1651,12 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
rxbuf_oob = &rxq->rx_oobs[curr];
WARN_ON_ONCE(rxbuf_oob->wqe_inf.wqe_size_in_bu != 1);
- mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf);
+ mana_refill_rx_oob(dev, rxq, rxbuf_oob, &old_buf, &old_fp);
/* Unsuccessful refill will have old_buf == NULL.
* In this case, mana_rx_skb() will drop the packet.
*/
- mana_rx_skb(old_buf, oob, rxq);
+ mana_rx_skb(old_buf, old_fp, oob, rxq);
drop:
mana_move_wq_tail(rxq->gdma_rq, rxbuf_oob->wqe_inf.wqe_size_in_bu);
@@ -1659,6 +1688,8 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
if (rxq->xdp_flush)
xdp_do_flush();
+
+ page_pool_nid_changed(rxq->page_pool, numa_mem_id());
}
static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
@@ -1881,6 +1912,7 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
struct mana_recv_buf_oob *rx_oob;
struct device *dev = gc->dev;
struct napi_struct *napi;
+ struct page *page;
int i;
if (!rxq)
@@ -1913,10 +1945,18 @@ static void mana_destroy_rxq(struct mana_port_context *apc,
dma_unmap_single(dev, rx_oob->sgl[0].address,
rx_oob->sgl[0].size, DMA_FROM_DEVICE);
- put_page(virt_to_head_page(rx_oob->buf_va));
+ page = virt_to_head_page(rx_oob->buf_va);
+
+ if (rx_oob->from_pool)
+ page_pool_put_full_page(rxq->page_pool, page, false);
+ else
+ put_page(page);
+
rx_oob->buf_va = NULL;
}
+ page_pool_destroy(rxq->page_pool);
+
if (rxq->gdma_rq)
mana_gd_destroy_queue(gc, rxq->gdma_rq);
@@ -1927,18 +1967,20 @@ static int mana_fill_rx_oob(struct mana_recv_buf_oob *rx_oob, u32 mem_key,
struct mana_rxq *rxq, struct device *dev)
{
struct mana_port_context *mpc = netdev_priv(rxq->ndev);
+ bool from_pool = false;
dma_addr_t da;
void *va;
if (mpc->rxbufs_pre)
va = mana_get_rxbuf_pre(rxq, &da);
else
- va = mana_get_rxfrag(rxq, dev, &da, false);
+ va = mana_get_rxfrag(rxq, dev, &da, &from_pool, false);
if (!va)
return -ENOMEM;
rx_oob->buf_va = va;
+ rx_oob->from_pool = from_pool;
rx_oob->sgl[0].address = da;
rx_oob->sgl[0].size = rxq->datasize;
@@ -2008,6 +2050,28 @@ static int mana_push_wqe(struct mana_rxq *rxq)
return 0;
}
+static int mana_create_page_pool(struct gdma_context *gc, struct mana_cq *cq,
+ struct mana_rxq *rxq)
+{
+ struct page_pool_params pprm = {};
+ int ret;
+
+ pprm.pool_size = RX_BUFFERS_PER_QUEUE;
+ pprm.napi = &cq->napi;
+ pprm.dev = gc->dev;
+ pprm.dma_dir = DMA_FROM_DEVICE;
+
+ rxq->page_pool = page_pool_create(&pprm);
+
+ if (IS_ERR(rxq->page_pool)) {
+ ret = PTR_ERR(rxq->page_pool);
+ rxq->page_pool = NULL;
+ return ret;
+ }
+
+ return 0;
+}
+
static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
u32 rxq_idx, struct mana_eq *eq,
struct net_device *ndev)
@@ -2106,6 +2170,13 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
netif_napi_add_weight(ndev, &cq->napi, mana_poll, 1);
+ /* Create page pool for RX queue */
+ err = mana_create_page_pool(gc, cq, rxq);
+ if (err) {
+ netdev_err(ndev, "Create page pool err:%d\n", err);
+ goto out;
+ }
+
WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx,
cq->napi.napi_id));
WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq,
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 024ad8ddb27e..b12859511839 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -280,6 +280,7 @@ struct mana_recv_buf_oob {
struct gdma_wqe_request wqe_req;
void *buf_va;
+ bool from_pool; /* allocated from a page pool */
/* SGL of the buffer going to be sent has part of the work request. */
u32 num_sge;
@@ -330,6 +331,8 @@ struct mana_rxq {
bool xdp_flush;
int xdp_rc; /* XDP redirect return code */
+ struct page_pool *page_pool;
+
/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
*/
--
2.25.1
On Tue, 18 Jul 2023 21:48:01 +0000 Haiyang Zhang wrote:
> Add page pool for RX buffers for faster buffer cycle and reduce CPU
> usage.
>
> The standard page pool API is used.
> @@ -1437,8 +1437,12 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
>
> act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
>
> - if (act == XDP_REDIRECT && !rxq->xdp_rc)
> + if (act == XDP_REDIRECT && !rxq->xdp_rc) {
> + if (from_pool)
> + page_pool_release_page(rxq->page_pool,
> + virt_to_head_page(buf_va));
IIUC you should pass the page_pool as the last argument to
xdp_rxq_info_reg_mem_model() and then the page will be recycled
by the core, you shouldn't release it.
Not to mention the potential race in releasing the page _after_
giving its ownership to someone else.
> - page = dev_alloc_page();
> + if (is_napi) {
> + page = page_pool_dev_alloc_pages(rxq->page_pool);
> + *from_pool = true;
> + } else {
> + page = dev_alloc_page();
FWIW if you're only calling this outside NAPI during init, when NAPI
can't yet run, I _think_ it's okay to use page_pool_dev_alloc..
> + pprm.pool_size = RX_BUFFERS_PER_QUEUE;
> + pprm.napi = &cq->napi;
> + pprm.dev = gc->dev;
> + pprm.dma_dir = DMA_FROM_DEVICE;
If you're not setting PP_FLAG_DMA_MAP you don't have to fill in .dev
and .dma_dir
--
pw-bot: cr
> -----Original Message-----
> From: Jakub Kicinski <[email protected]>
> Sent: Thursday, July 20, 2023 12:30 AM
> To: Haiyang Zhang <[email protected]>
> Cc: [email protected]; [email protected]; Dexuan Cui
> <[email protected]>; KY Srinivasan <[email protected]>; Paul Rosswurm
> <[email protected]>; [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Long Li <[email protected]>;
> [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> [email protected]; Ajay Sharma <[email protected]>; [email protected];
> [email protected]; [email protected]; linux-
> [email protected]
> Subject: Re: [PATCH V2,net-next] net: mana: Add page pool for RX buffers
>
> On Tue, 18 Jul 2023 21:48:01 +0000 Haiyang Zhang wrote:
> > Add page pool for RX buffers for faster buffer cycle and reduce CPU
> > usage.
> >
> > The standard page pool API is used.
>
> > @@ -1437,8 +1437,12 @@ static void mana_rx_skb(void *buf_va, struct
> mana_rxcomp_oob *cqe,
> >
> > act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);
> >
> > - if (act == XDP_REDIRECT && !rxq->xdp_rc)
> > + if (act == XDP_REDIRECT && !rxq->xdp_rc) {
> > + if (from_pool)
> > + page_pool_release_page(rxq->page_pool,
> > + virt_to_head_page(buf_va));
>
>
> IIUC you should pass the page_pool as the last argument to
> xdp_rxq_info_reg_mem_model() and then the page will be recycled
> by the core, you shouldn't release it.
>
> Not to mention the potential race in releasing the page _after_
> giving its ownership to someone else.
>
> > - page = dev_alloc_page();
> > + if (is_napi) {
> > + page = page_pool_dev_alloc_pages(rxq->page_pool);
> > + *from_pool = true;
> > + } else {
> > + page = dev_alloc_page();
>
> FWIW if you're only calling this outside NAPI during init, when NAPI
> can't yet run, I _think_ it's okay to use page_pool_dev_alloc..
>
> > + pprm.pool_size = RX_BUFFERS_PER_QUEUE;
> > + pprm.napi = &cq->napi;
> > + pprm.dev = gc->dev;
> > + pprm.dma_dir = DMA_FROM_DEVICE;
>
> If you're not setting PP_FLAG_DMA_MAP you don't have to fill in .dev
> and .dma_dir
Thank you for the comments.
I will update the patch.
- Haiyang