Extend the current support of XDP in lan966x with the action XDP_TX and
XDP_REDIRECT.
The first patch introduced a headroom for all the received frames. This
is needed when the XDP_TX and XDP_REDIRECT is added.
The next 2 patches, just introduced some helper functions that can be
reused when is needed to transmit back the frame.
The last 2 patches introduce the XDP actions XDP_TX and XDP_REDIRECT.
v1->v2:
- use skb_reserve of using skb_put and skb_pull
- make sure that data_len doesn't include XDP_PACKET_HEADROOM
Horatiu Vultur (5):
net: lan966x: Add XDP_PACKET_HEADROOM
net: lan966x: Introduce helper functions
net: lan966x: Add len field to lan966x_tx_dcb_buf
net: lan966x: Add support for XDP_TX
net: lan966x: Add support for XDP_REDIRECT
.../ethernet/microchip/lan966x/lan966x_fdma.c | 177 ++++++++++++++----
.../ethernet/microchip/lan966x/lan966x_main.c | 5 +-
.../ethernet/microchip/lan966x/lan966x_main.h | 15 ++
.../ethernet/microchip/lan966x/lan966x_xdp.c | 39 +++-
4 files changed, 195 insertions(+), 41 deletions(-)
--
2.38.0
Update the page_pool params to allocate XDP_PACKET_HEADROOM space as
headroom for all received frames.
This is needed for when the XDP_TX and XDP_REDIRECT are implemented.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdma.c | 16 +++++++++++-----
.../net/ethernet/microchip/lan966x/lan966x_xdp.c | 3 ++-
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index 5fbbd479cfb06..3055124b4dd79 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+
+#include <linux/bpf.h>
+
#include "lan966x_main.h"
static int lan966x_fdma_channel_active(struct lan966x *lan966x)
@@ -16,7 +18,7 @@ static struct page *lan966x_fdma_rx_alloc_page(struct lan966x_rx *rx,
if (unlikely(!page))
return NULL;
- db->dataptr = page_pool_get_dma_addr(page);
+ db->dataptr = page_pool_get_dma_addr(page) + XDP_PACKET_HEADROOM;
return page;
}
@@ -72,7 +74,7 @@ static int lan966x_fdma_rx_alloc_page_pool(struct lan966x_rx *rx)
.nid = NUMA_NO_NODE,
.dev = lan966x->dev,
.dma_dir = DMA_FROM_DEVICE,
- .offset = 0,
+ .offset = XDP_PACKET_HEADROOM,
.max_len = rx->max_mtu -
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
};
@@ -432,11 +434,13 @@ static int lan966x_fdma_rx_check_frame(struct lan966x_rx *rx, u64 *src_port)
if (unlikely(!page))
return FDMA_ERROR;
- dma_sync_single_for_cpu(lan966x->dev, (dma_addr_t)db->dataptr,
+ dma_sync_single_for_cpu(lan966x->dev,
+ (dma_addr_t)db->dataptr + XDP_PACKET_HEADROOM,
FDMA_DCB_STATUS_BLOCKL(db->status),
DMA_FROM_DEVICE);
- lan966x_ifh_get_src_port(page_address(page), src_port);
+ lan966x_ifh_get_src_port(page_address(page) + XDP_PACKET_HEADROOM,
+ src_port);
if (WARN_ON(*src_port >= lan966x->num_phys_ports))
return FDMA_ERROR;
@@ -466,6 +470,7 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx,
skb_mark_for_recycle(skb);
+ skb_reserve(skb, XDP_PACKET_HEADROOM);
skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status));
lan966x_ifh_get_timestamp(skb->data, ×tamp);
@@ -786,7 +791,8 @@ static int lan966x_fdma_get_max_frame(struct lan966x *lan966x)
return lan966x_fdma_get_max_mtu(lan966x) +
IFH_LEN_BYTES +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
- VLAN_HLEN * 2;
+ VLAN_HLEN * 2 +
+ XDP_PACKET_HEADROOM;
}
int lan966x_fdma_change_mtu(struct lan966x *lan966x)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
index e77d9f2aad2b4..8ebde1eb6a09c 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
@@ -44,7 +44,8 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
xdp_init_buff(&xdp, PAGE_SIZE << lan966x->rx.page_order,
&port->xdp_rxq);
- xdp_prepare_buff(&xdp, page_address(page), IFH_LEN_BYTES,
+ xdp_prepare_buff(&xdp, page_address(page),
+ IFH_LEN_BYTES + XDP_PACKET_HEADROOM,
data_len - IFH_LEN_BYTES, false);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
switch (act) {
--
2.38.0
Extend lan966x XDP support with the action XDP_REDIRECT. This is similar
with the XDP_TX, so a lot of functionality can be reused.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdma.c | 9 ++++++
.../ethernet/microchip/lan966x/lan966x_main.c | 1 +
.../ethernet/microchip/lan966x/lan966x_main.h | 6 ++++
.../ethernet/microchip/lan966x/lan966x_xdp.c | 28 +++++++++++++++++++
4 files changed, 44 insertions(+)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index c2e56233a8da5..b863a5b50d4de 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0+
#include <linux/bpf.h>
+#include <linux/filter.h>
#include "lan966x_main.h"
@@ -518,6 +519,7 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
int dcb_reload = rx->dcb_index;
struct lan966x_rx_dcb *old_dcb;
struct lan966x_db *db;
+ bool redirect = false;
struct sk_buff *skb;
struct page *page;
int counter = 0;
@@ -543,6 +545,10 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
case FDMA_TX:
lan966x_fdma_rx_advance_dcb(rx);
continue;
+ case FDMA_REDIRECT:
+ lan966x_fdma_rx_advance_dcb(rx);
+ redirect = true;
+ continue;
case FDMA_DROP:
lan966x_fdma_rx_free_page(rx);
lan966x_fdma_rx_advance_dcb(rx);
@@ -579,6 +585,9 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
if (counter < weight && napi_complete_done(napi, counter))
lan_wr(0xff, lan966x, FDMA_INTR_DB_ENA);
+ if (redirect)
+ xdp_do_flush();
+
return counter;
}
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 0b7707306da26..0aed244826d39 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -469,6 +469,7 @@ static const struct net_device_ops lan966x_port_netdev_ops = {
.ndo_eth_ioctl = lan966x_port_ioctl,
.ndo_setup_tc = lan966x_tc_setup,
.ndo_bpf = lan966x_xdp,
+ .ndo_xdp_xmit = lan966x_xdp_xmit,
};
bool lan966x_netdevice_check(const struct net_device *dev)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index df7fec361962b..b73c5a6cc0beb 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -106,12 +106,14 @@ enum macaccess_entry_type {
* FDMA_ERROR, something went wrong, stop getting more frames
* FDMA_DROP, frame is dropped, but continue to get more frames
* FDMA_TX, frame is given to TX, but continue to get more frames
+ * FDMA_REDIRECT, frame is given to TX, but continue to get more frames
*/
enum lan966x_fdma_action {
FDMA_PASS = 0,
FDMA_ERROR,
FDMA_DROP,
FDMA_TX,
+ FDMA_REDIRECT,
};
struct lan966x_port;
@@ -564,6 +566,10 @@ int lan966x_xdp(struct net_device *dev, struct netdev_bpf *xdp);
int lan966x_xdp_run(struct lan966x_port *port,
struct page *page,
u32 data_len);
+int lan966x_xdp_xmit(struct net_device *dev,
+ int n,
+ struct xdp_frame **frames,
+ u32 flags);
static inline bool lan966x_xdp_port_present(struct lan966x_port *port)
{
return !!port->xdp_prog;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
index 9b0ba3179df62..d2ecfe78382cf 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
@@ -35,6 +35,29 @@ int lan966x_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+int lan966x_xdp_xmit(struct net_device *dev,
+ int n,
+ struct xdp_frame **frames,
+ u32 flags)
+{
+ struct lan966x_port *port = netdev_priv(dev);
+ int i, nxmit = 0;
+
+ for (i = 0; i < n; ++i) {
+ struct xdp_frame *xdpf = frames[i];
+ int err;
+
+ err = lan966x_fdma_xmit_xdpf(port, xdpf,
+ virt_to_head_page(xdpf->data));
+ if (err)
+ break;
+
+ nxmit++;
+ }
+
+ return nxmit;
+}
+
int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
{
struct bpf_prog *xdp_prog = port->xdp_prog;
@@ -59,6 +82,11 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
return lan966x_fdma_xmit_xdpf(port, xdpf, page) ?
FDMA_DROP : FDMA_TX;
+ case XDP_REDIRECT:
+ if (xdp_do_redirect(port->dev, &xdp, xdp_prog))
+ return FDMA_DROP;
+
+ return FDMA_REDIRECT;
default:
bpf_warn_invalid_xdp_action(port->dev, xdp_prog, act);
fallthrough;
--
2.38.0
Introduce lan966x_fdma_tx_setup_dcb and lan966x_fdma_tx_start functions
and use of them inside lan966x_fdma_xmit. There is no functional change
in here.
They are introduced to be used when XDP_TX/REDIRECT actions are
introduced.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdma.c | 71 ++++++++++++-------
1 file changed, 44 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index 3055124b4dd79..94c720e59caee 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -612,14 +612,53 @@ static int lan966x_fdma_get_next_dcb(struct lan966x_tx *tx)
return -1;
}
+static void lan966x_fdma_tx_setup_dcb(struct lan966x_tx *tx,
+ int next_to_use, int len,
+ dma_addr_t dma_addr)
+{
+ struct lan966x_tx_dcb *next_dcb;
+ struct lan966x_db *next_db;
+
+ next_dcb = &tx->dcbs[next_to_use];
+ next_dcb->nextptr = FDMA_DCB_INVALID_DATA;
+
+ next_db = &next_dcb->db[0];
+ next_db->dataptr = dma_addr;
+ next_db->status = FDMA_DCB_STATUS_SOF |
+ FDMA_DCB_STATUS_EOF |
+ FDMA_DCB_STATUS_INTR |
+ FDMA_DCB_STATUS_BLOCKO(0) |
+ FDMA_DCB_STATUS_BLOCKL(len);
+}
+
+static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
+{
+ struct lan966x *lan966x = tx->lan966x;
+ struct lan966x_tx_dcb *dcb;
+
+ if (likely(lan966x->tx.activated)) {
+ /* Connect current dcb to the next db */
+ dcb = &tx->dcbs[tx->last_in_use];
+ dcb->nextptr = tx->dma + (next_to_use *
+ sizeof(struct lan966x_tx_dcb));
+
+ lan966x_fdma_tx_reload(tx);
+ } else {
+ /* Because it is first time, then just activate */
+ lan966x->tx.activated = true;
+ lan966x_fdma_tx_activate(tx);
+ }
+
+ /* Move to next dcb because this last in use */
+ tx->last_in_use = next_to_use;
+}
+
int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
{
struct lan966x_port *port = netdev_priv(dev);
struct lan966x *lan966x = port->lan966x;
struct lan966x_tx_dcb_buf *next_dcb_buf;
- struct lan966x_tx_dcb *next_dcb, *dcb;
struct lan966x_tx *tx = &lan966x->tx;
- struct lan966x_db *next_db;
int needed_headroom;
int needed_tailroom;
dma_addr_t dma_addr;
@@ -665,16 +704,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
}
/* Setup next dcb */
- next_dcb = &tx->dcbs[next_to_use];
- next_dcb->nextptr = FDMA_DCB_INVALID_DATA;
-
- next_db = &next_dcb->db[0];
- next_db->dataptr = dma_addr;
- next_db->status = FDMA_DCB_STATUS_SOF |
- FDMA_DCB_STATUS_EOF |
- FDMA_DCB_STATUS_INTR |
- FDMA_DCB_STATUS_BLOCKO(0) |
- FDMA_DCB_STATUS_BLOCKL(skb->len);
+ lan966x_fdma_tx_setup_dcb(tx, next_to_use, skb->len, dma_addr);
/* Fill up the buffer */
next_dcb_buf = &tx->dcbs_buf[next_to_use];
@@ -688,21 +718,8 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
LAN966X_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP)
next_dcb_buf->ptp = true;
- if (likely(lan966x->tx.activated)) {
- /* Connect current dcb to the next db */
- dcb = &tx->dcbs[tx->last_in_use];
- dcb->nextptr = tx->dma + (next_to_use *
- sizeof(struct lan966x_tx_dcb));
-
- lan966x_fdma_tx_reload(tx);
- } else {
- /* Because it is first time, then just activate */
- lan966x->tx.activated = true;
- lan966x_fdma_tx_activate(tx);
- }
-
- /* Move to next dcb because this last in use */
- tx->last_in_use = next_to_use;
+ /* Start the transmission */
+ lan966x_fdma_tx_start(tx, next_to_use);
return NETDEV_TX_OK;
--
2.38.0
Extend lan966x XDP support with the action XDP_TX. In this case when the
received buffer needs to execute XDP_TX, the buffer will be moved to the
TX buffers. So a new RX buffer will be allocated.
When the TX finish with the frame, it would release completely this
buffer.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdma.c | 78 +++++++++++++++++--
.../ethernet/microchip/lan966x/lan966x_main.c | 4 +-
.../ethernet/microchip/lan966x/lan966x_main.h | 8 ++
.../ethernet/microchip/lan966x/lan966x_xdp.c | 8 ++
4 files changed, 91 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index 384ed34197d58..c2e56233a8da5 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -394,13 +394,21 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
dcb_buf->dev->stats.tx_bytes += dcb_buf->len;
dcb_buf->used = false;
- dma_unmap_single(lan966x->dev,
- dcb_buf->dma_addr,
- dcb_buf->len,
- DMA_TO_DEVICE);
- if (!dcb_buf->ptp)
+ if (dcb_buf->skb)
+ dma_unmap_single(lan966x->dev,
+ dcb_buf->dma_addr,
+ dcb_buf->len,
+ DMA_TO_DEVICE);
+
+ if (dcb_buf->skb && !dcb_buf->ptp)
dev_kfree_skb_any(dcb_buf->skb);
+ if (dcb_buf->page) {
+ page_pool_release_page(lan966x->rx.page_pool,
+ dcb_buf->page);
+ put_page(dcb_buf->page);
+ }
+
clear = true;
}
@@ -532,6 +540,9 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
lan966x_fdma_rx_free_page(rx);
lan966x_fdma_rx_advance_dcb(rx);
goto allocate_new;
+ case FDMA_TX:
+ lan966x_fdma_rx_advance_dcb(rx);
+ continue;
case FDMA_DROP:
lan966x_fdma_rx_free_page(rx);
lan966x_fdma_rx_advance_dcb(rx);
@@ -653,6 +664,62 @@ static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
tx->last_in_use = next_to_use;
}
+int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
+ struct xdp_frame *xdpf,
+ struct page *page)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct lan966x_tx_dcb_buf *next_dcb_buf;
+ struct lan966x_tx *tx = &lan966x->tx;
+ dma_addr_t dma_addr;
+ int next_to_use;
+ __be32 *ifh;
+ int ret = 0;
+
+ spin_lock(&lan966x->tx_lock);
+
+ /* Get next index */
+ next_to_use = lan966x_fdma_get_next_dcb(tx);
+ if (next_to_use < 0) {
+ netif_stop_queue(port->dev);
+ ret = NETDEV_TX_BUSY;
+ goto out;
+ }
+
+ /* Generate new IFH */
+ ifh = page_address(page) + XDP_PACKET_HEADROOM;
+ memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
+ lan966x_ifh_set_bypass(ifh, 1);
+ lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port));
+
+ dma_addr = page_pool_get_dma_addr(page);
+ dma_sync_single_for_device(lan966x->dev, dma_addr + XDP_PACKET_HEADROOM,
+ xdpf->len + IFH_LEN_BYTES,
+ DMA_TO_DEVICE);
+
+ /* Setup next dcb */
+ lan966x_fdma_tx_setup_dcb(tx, next_to_use, xdpf->len + IFH_LEN_BYTES,
+ dma_addr + XDP_PACKET_HEADROOM);
+
+ /* Fill up the buffer */
+ next_dcb_buf = &tx->dcbs_buf[next_to_use];
+ next_dcb_buf->skb = NULL;
+ next_dcb_buf->page = page;
+ next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
+ next_dcb_buf->dma_addr = dma_addr;
+ next_dcb_buf->used = true;
+ next_dcb_buf->ptp = false;
+ next_dcb_buf->dev = port->dev;
+
+ /* Start the transmission */
+ lan966x_fdma_tx_start(tx, next_to_use);
+
+out:
+ spin_unlock(&lan966x->tx_lock);
+
+ return ret;
+}
+
int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
{
struct lan966x_port *port = netdev_priv(dev);
@@ -709,6 +776,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
/* Fill up the buffer */
next_dcb_buf = &tx->dcbs_buf[next_to_use];
next_dcb_buf->skb = skb;
+ next_dcb_buf->page = NULL;
next_dcb_buf->len = skb->len;
next_dcb_buf->dma_addr = dma_addr;
next_dcb_buf->used = true;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 42be5d0f1f015..0b7707306da26 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -302,13 +302,13 @@ static int lan966x_port_ifh_xmit(struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
-static void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
+void lan966x_ifh_set_bypass(void *ifh, u64 bypass)
{
packing(ifh, &bypass, IFH_POS_BYPASS + IFH_WID_BYPASS - 1,
IFH_POS_BYPASS, IFH_LEN * 4, PACK, 0);
}
-static void lan966x_ifh_set_port(void *ifh, u64 bypass)
+void lan966x_ifh_set_port(void *ifh, u64 bypass)
{
packing(ifh, &bypass, IFH_POS_DSTS + IFH_WID_DSTS - 1,
IFH_POS_DSTS, IFH_LEN * 4, PACK, 0);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index 7bb9098496f60..df7fec361962b 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -105,11 +105,13 @@ enum macaccess_entry_type {
* FDMA_PASS, frame is valid and can be used
* FDMA_ERROR, something went wrong, stop getting more frames
* FDMA_DROP, frame is dropped, but continue to get more frames
+ * FDMA_TX, frame is given to TX, but continue to get more frames
*/
enum lan966x_fdma_action {
FDMA_PASS = 0,
FDMA_ERROR,
FDMA_DROP,
+ FDMA_TX,
};
struct lan966x_port;
@@ -175,6 +177,7 @@ struct lan966x_rx {
struct lan966x_tx_dcb_buf {
struct net_device *dev;
struct sk_buff *skb;
+ struct page *page;
int len;
dma_addr_t dma_addr;
bool used;
@@ -360,6 +363,8 @@ bool lan966x_hw_offload(struct lan966x *lan966x, u32 port, struct sk_buff *skb);
void lan966x_ifh_get_src_port(void *ifh, u64 *src_port);
void lan966x_ifh_get_timestamp(void *ifh, u64 *timestamp);
+void lan966x_ifh_set_bypass(void *ifh, u64 bypass);
+void lan966x_ifh_set_port(void *ifh, u64 bypass);
void lan966x_stats_get(struct net_device *dev,
struct rtnl_link_stats64 *stats);
@@ -460,6 +465,9 @@ u32 lan966x_ptp_get_period_ps(void);
int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev);
+int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
+ struct xdp_frame *frame,
+ struct page *page);
int lan966x_fdma_change_mtu(struct lan966x *lan966x);
void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev);
void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev);
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
index 8ebde1eb6a09c..9b0ba3179df62 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_xdp.c
@@ -39,6 +39,7 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
{
struct bpf_prog *xdp_prog = port->xdp_prog;
struct lan966x *lan966x = port->lan966x;
+ struct xdp_frame *xdpf;
struct xdp_buff xdp;
u32 act;
@@ -51,6 +52,13 @@ int lan966x_xdp_run(struct lan966x_port *port, struct page *page, u32 data_len)
switch (act) {
case XDP_PASS:
return FDMA_PASS;
+ case XDP_TX:
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ if (!xdpf)
+ return FDMA_DROP;
+
+ return lan966x_fdma_xmit_xdpf(port, xdpf, page) ?
+ FDMA_DROP : FDMA_TX;
default:
bpf_warn_invalid_xdp_action(port->dev, xdp_prog, act);
fallthrough;
--
2.38.0
Currently when a frame was transmitted, it is required to unamp the
frame that was transmitted. The length of the frame was taken from the
transmitted skb. In the future we might not have an skb, therefore store
the length skb directly in the lan966x_tx_dcb_buf and use this one to
unamp the frame.
Signed-off-by: Horatiu Vultur <[email protected]>
---
drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c | 5 +++--
drivers/net/ethernet/microchip/lan966x/lan966x_main.h | 1 +
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index 94c720e59caee..384ed34197d58 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -391,12 +391,12 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
continue;
dcb_buf->dev->stats.tx_packets++;
- dcb_buf->dev->stats.tx_bytes += dcb_buf->skb->len;
+ dcb_buf->dev->stats.tx_bytes += dcb_buf->len;
dcb_buf->used = false;
dma_unmap_single(lan966x->dev,
dcb_buf->dma_addr,
- dcb_buf->skb->len,
+ dcb_buf->len,
DMA_TO_DEVICE);
if (!dcb_buf->ptp)
dev_kfree_skb_any(dcb_buf->skb);
@@ -709,6 +709,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
/* Fill up the buffer */
next_dcb_buf = &tx->dcbs_buf[next_to_use];
next_dcb_buf->skb = skb;
+ next_dcb_buf->len = skb->len;
next_dcb_buf->dma_addr = dma_addr;
next_dcb_buf->used = true;
next_dcb_buf->ptp = false;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index bc93051aa0798..7bb9098496f60 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -175,6 +175,7 @@ struct lan966x_rx {
struct lan966x_tx_dcb_buf {
struct net_device *dev;
struct sk_buff *skb;
+ int len;
dma_addr_t dma_addr;
bool used;
bool ptp;
--
2.38.0
From: Horatiu Vultur <[email protected]>
Date: Tue, 15 Nov 2022 22:44:55 +0100
Extend lan966x XDP support with the action XDP_TX. In this case when the
received buffer needs to execute XDP_TX, the buffer will be moved to the
TX buffers. So a new RX buffer will be allocated.
When the TX finish with the frame, it would release completely this
buffer.
Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdma.c | 78 +++++++++++++++++--
.../ethernet/microchip/lan966x/lan966x_main.c | 4 +-
.../ethernet/microchip/lan966x/lan966x_main.h | 8 ++
.../ethernet/microchip/lan966x/lan966x_xdp.c | 8 ++
4 files changed, 91 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index 384ed34197d58..c2e56233a8da5 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -394,13 +394,21 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
dcb_buf->dev->stats.tx_bytes += dcb_buf->len;
dcb_buf->used = false;
- dma_unmap_single(lan966x->dev,
- dcb_buf->dma_addr,
- dcb_buf->len,
- DMA_TO_DEVICE);
- if (!dcb_buf->ptp)
+ if (dcb_buf->skb)
+ dma_unmap_single(lan966x->dev,
+ dcb_buf->dma_addr,
+ dcb_buf->len,
+ DMA_TO_DEVICE);
+
+ if (dcb_buf->skb && !dcb_buf->ptp)
dev_kfree_skb_any(dcb_buf->skb);
+ if (dcb_buf->page) {
+ page_pool_release_page(lan966x->rx.page_pool,
+ dcb_buf->page);
+ put_page(dcb_buf->page);
+ }
Hmm, that's not really correct.
For skb, you need to unmap + free, true (BPW, just use
napi_consume_skb()).
For %XDP_TX, as you use Page Pool, you don't need to unmap, but you
need to do xdp_return_frame{,_bulk}. Plus, as Tx is being done here
directly from an Rx NAPI polling cycle, xdp_return_frame_rx_napi()
is usually used. Anyway, each of xdp_return_frame()'s variants will
call page_pool_put_full_page() for you.
For %XDP_REDIRECT, as you don't know the source of the XDP frame,
you need to unmap it (as it was previously mapped in
::ndo_xdp_xmit()), plus call xdp_return_frame{,_bulk} to free the
XDP frame. Note that _rx_napi() variant is not applicable here.
That description might be confusing, so you can take a look at the
already existing code[0] to get the idea. I think this piece shows
the expected logics rather well.
+
clear = true;
}
@@ -532,6 +540,9 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
lan966x_fdma_rx_free_page(rx);
lan966x_fdma_rx_advance_dcb(rx);
goto allocate_new;
+ case FDMA_TX:
+ lan966x_fdma_rx_advance_dcb(rx);
+ continue;
case FDMA_DROP:
lan966x_fdma_rx_free_page(rx);
lan966x_fdma_rx_advance_dcb(rx);
@@ -653,6 +664,62 @@ static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
tx->last_in_use = next_to_use;
}
+int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
+ struct xdp_frame *xdpf,
+ struct page *page)
+{
+ struct lan966x *lan966x = port->lan966x;
+ struct lan966x_tx_dcb_buf *next_dcb_buf;
+ struct lan966x_tx *tx = &lan966x->tx;
+ dma_addr_t dma_addr;
+ int next_to_use;
+ __be32 *ifh;
+ int ret = 0;
+
+ spin_lock(&lan966x->tx_lock);
+
+ /* Get next index */
+ next_to_use = lan966x_fdma_get_next_dcb(tx);
+ if (next_to_use < 0) {
+ netif_stop_queue(port->dev);
+ ret = NETDEV_TX_BUSY;
+ goto out;
+ }
+
+ /* Generate new IFH */
+ ifh = page_address(page) + XDP_PACKET_HEADROOM;
+ memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
+ lan966x_ifh_set_bypass(ifh, 1);
+ lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port));
+
+ dma_addr = page_pool_get_dma_addr(page);
+ dma_sync_single_for_device(lan966x->dev, dma_addr + XDP_PACKET_HEADROOM,
+ xdpf->len + IFH_LEN_BYTES,
+ DMA_TO_DEVICE);
Also not correct. This page was mapped with %DMA_FROM_DEVICE in the
Rx code, now you sync it for the opposite.
Most drivers in case of XDP enabled create Page Pools with ::dma_dir
set to %DMA_BIDIRECTIONAL. Now you would need only to sync it here
with the same direction (bidir) and that's it.
+
+ /* Setup next dcb */
+ lan966x_fdma_tx_setup_dcb(tx, next_to_use, xdpf->len + IFH_LEN_BYTES,
+ dma_addr + XDP_PACKET_HEADROOM);
+
+ /* Fill up the buffer */
+ next_dcb_buf = &tx->dcbs_buf[next_to_use];
+ next_dcb_buf->skb = NULL;
+ next_dcb_buf->page = page;
+ next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
+ next_dcb_buf->dma_addr = dma_addr;
+ next_dcb_buf->used = true;
+ next_dcb_buf->ptp = false;
+ next_dcb_buf->dev = port->dev;
+
+ /* Start the transmission */
+ lan966x_fdma_tx_start(tx, next_to_use);
+
+out:
+ spin_unlock(&lan966x->tx_lock);
+
+ return ret;
+}
+
int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
{
struct lan966x_port *port = netdev_priv(dev);
@@ -709,6 +776,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
/* Fill up the buffer */
next_dcb_buf = &tx->dcbs_buf[next_to_use];
next_dcb_buf->skb = skb;
+ next_dcb_buf->page = NULL;
next_dcb_buf->len = skb->len;
next_dcb_buf->dma_addr = dma_addr;
next_dcb_buf->used = true;
[...]
--
2.38.0
Thanks,
Olek
From: Horatiu Vultur <[email protected]>
Date: Tue, 15 Nov 2022 22:44:52 +0100
> Update the page_pool params to allocate XDP_PACKET_HEADROOM space as
> headroom for all received frames.
> This is needed for when the XDP_TX and XDP_REDIRECT are implemented.
>
> Signed-off-by: Horatiu Vultur <[email protected]>
[...]
> @@ -466,6 +470,7 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx,
>
> skb_mark_for_recycle(skb);
>
> + skb_reserve(skb, XDP_PACKET_HEADROOM);
Oh, forgot to ask previously. Just curious, which platforms do
usually have this NIC? Do those platforms have
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS set?
If no, then adding %NET_SKB_PAD to the headroom can significantly
improve performance, as currently you have 28 bytes of IFH + 14
bytes of Eth header, so IP header is not aligned to 4 bytes
boundary. Kernel and other drivers often expect IP header to be
aligned. Adding %NET_SKB_PAD to the headroom addresses that.
...but be careful, I've just realized that you have IFH in front
of Eth header, that means that it will also become unaligned after
that change, so make sure you don't access it with words bigger
than 2 bytes. Just test all the variants and pick the best :D
> skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status));
>
> lan966x_ifh_get_timestamp(skb->data, ×tamp);
> @@ -786,7 +791,8 @@ static int lan966x_fdma_get_max_frame(struct lan966x *lan966x)
> return lan966x_fdma_get_max_mtu(lan966x) +
> IFH_LEN_BYTES +
> SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
> - VLAN_HLEN * 2;
> + VLAN_HLEN * 2 +
> + XDP_PACKET_HEADROOM;
> }
[...]
> --
> 2.38.0
Thanks,
Olek
The 11/16/2022 16:45, Alexander Lobakin wrote:
> [Some people who received this message don't often get email from [email protected]. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]
>
> From: Horatiu Vultur <[email protected]>
> Date: Tue, 15 Nov 2022 22:44:52 +0100
>
> > Update the page_pool params to allocate XDP_PACKET_HEADROOM space as
> > headroom for all received frames.
> > This is needed for when the XDP_TX and XDP_REDIRECT are implemented.
> >
> > Signed-off-by: Horatiu Vultur <[email protected]>
>
> [...]
>
> > @@ -466,6 +470,7 @@ static struct sk_buff *lan966x_fdma_rx_get_frame(struct lan966x_rx *rx,
> >
> > skb_mark_for_recycle(skb);
> >
> > + skb_reserve(skb, XDP_PACKET_HEADROOM);
>
> Oh, forgot to ask previously. Just curious, which platforms do
> usually have this NIC? Do those platforms have
> CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS set?
I am running on ARM and I can see that
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is set.
> If no, then adding %NET_SKB_PAD to the headroom can significantly
> improve performance, as currently you have 28 bytes of IFH + 14
> bytes of Eth header, so IP header is not aligned to 4 bytes
> boundary. Kernel and other drivers often expect IP header to be
> aligned. Adding %NET_SKB_PAD to the headroom addresses that.
> ...but be careful, I've just realized that you have IFH in front
> of Eth header, that means that it will also become unaligned after
> that change, so make sure you don't access it with words bigger
> than 2 bytes. Just test all the variants and pick the best :D
Thanks for a detail explanation!
>
> > skb_put(skb, FDMA_DCB_STATUS_BLOCKL(db->status));
> >
> > lan966x_ifh_get_timestamp(skb->data, ×tamp);
> > @@ -786,7 +791,8 @@ static int lan966x_fdma_get_max_frame(struct lan966x *lan966x)
> > return lan966x_fdma_get_max_mtu(lan966x) +
> > IFH_LEN_BYTES +
> > SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
> > - VLAN_HLEN * 2;
> > + VLAN_HLEN * 2 +
> > + XDP_PACKET_HEADROOM;
> > }
>
> [...]
>
> > --
> > 2.38.0
>
> Thanks,
> Olek
--
/Horatiu
The 11/16/2022 16:34, Alexander Lobakin wrote:
>
> From: Horatiu Vultur <[email protected]>
> Date: Tue, 15 Nov 2022 22:44:55 +0100
Hi Olek,
>
> Extend lan966x XDP support with the action XDP_TX. In this case when the
> received buffer needs to execute XDP_TX, the buffer will be moved to the
> TX buffers. So a new RX buffer will be allocated.
> When the TX finish with the frame, it would release completely this
> buffer.
>
> Signed-off-by: Horatiu Vultur <[email protected]>
> ---
> .../ethernet/microchip/lan966x/lan966x_fdma.c | 78 +++++++++++++++++--
> .../ethernet/microchip/lan966x/lan966x_main.c | 4 +-
> .../ethernet/microchip/lan966x/lan966x_main.h | 8 ++
> .../ethernet/microchip/lan966x/lan966x_xdp.c | 8 ++
> 4 files changed, 91 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> index 384ed34197d58..c2e56233a8da5 100644
> --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> @@ -394,13 +394,21 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
> dcb_buf->dev->stats.tx_bytes += dcb_buf->len;
>
> dcb_buf->used = false;
> - dma_unmap_single(lan966x->dev,
> - dcb_buf->dma_addr,
> - dcb_buf->len,
> - DMA_TO_DEVICE);
> - if (!dcb_buf->ptp)
> + if (dcb_buf->skb)
> + dma_unmap_single(lan966x->dev,
> + dcb_buf->dma_addr,
> + dcb_buf->len,
> + DMA_TO_DEVICE);
> +
> + if (dcb_buf->skb && !dcb_buf->ptp)
> dev_kfree_skb_any(dcb_buf->skb);
>
> + if (dcb_buf->page) {
> + page_pool_release_page(lan966x->rx.page_pool,
> + dcb_buf->page);
> + put_page(dcb_buf->page);
> + }
>
> Hmm, that's not really correct.
>
> For skb, you need to unmap + free, true (BPW, just use
> napi_consume_skb()).
What does BPW stand for?
Yes, I can use napi_consume_skb instead of dev_kfree_skb_any();
> For %XDP_TX, as you use Page Pool, you don't need to unmap, but you
> need to do xdp_return_frame{,_bulk}. Plus, as Tx is being done here
> directly from an Rx NAPI polling cycle, xdp_return_frame_rx_napi()
> is usually used. Anyway, each of xdp_return_frame()'s variants will
> call page_pool_put_full_page() for you.
If I understand correctly this part that you describe, the page will
be added back in the page_pool cache. While in my case, I am giving
back the page to the page allocator. In this way the page_pool needs
to allocate more pages every time when the action XDP_TX is happening.
BTW, this shows that there is a missing xdp_rxq_info_reg_mem_model call,
because when calling xdp_return_frame_rx_napi, the frame was not going
to page_pool but the was simply freed because xdp_mem_info was the wrong
type.
> For %XDP_REDIRECT, as you don't know the source of the XDP frame,
Why I don't know the source?
Will it not be from an RX page that is allocated by Page Pool?
> you need to unmap it (as it was previously mapped in
> ::ndo_xdp_xmit()), plus call xdp_return_frame{,_bulk} to free the
> XDP frame. Note that _rx_napi() variant is not applicable here.
>
> That description might be confusing, so you can take a look at the
> already existing code[0] to get the idea. I think this piece shows
> the expected logics rather well.
I think you forgot to write the link to the code.
I looked also at different drivers but I didn't figure it out why the
frame needed to be mapped and where is happening that.
>
> +
> clear = true;
> }
>
> @@ -532,6 +540,9 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
> lan966x_fdma_rx_free_page(rx);
> lan966x_fdma_rx_advance_dcb(rx);
> goto allocate_new;
> + case FDMA_TX:
> + lan966x_fdma_rx_advance_dcb(rx);
> + continue;
> case FDMA_DROP:
> lan966x_fdma_rx_free_page(rx);
> lan966x_fdma_rx_advance_dcb(rx);
> @@ -653,6 +664,62 @@ static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
> tx->last_in_use = next_to_use;
> }
>
> +int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
> + struct xdp_frame *xdpf,
> + struct page *page)
> +{
> + struct lan966x *lan966x = port->lan966x;
> + struct lan966x_tx_dcb_buf *next_dcb_buf;
> + struct lan966x_tx *tx = &lan966x->tx;
> + dma_addr_t dma_addr;
> + int next_to_use;
> + __be32 *ifh;
> + int ret = 0;
> +
> + spin_lock(&lan966x->tx_lock);
> +
> + /* Get next index */
> + next_to_use = lan966x_fdma_get_next_dcb(tx);
> + if (next_to_use < 0) {
> + netif_stop_queue(port->dev);
> + ret = NETDEV_TX_BUSY;
> + goto out;
> + }
> +
> + /* Generate new IFH */
> + ifh = page_address(page) + XDP_PACKET_HEADROOM;
> + memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
> + lan966x_ifh_set_bypass(ifh, 1);
> + lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port));
> +
> + dma_addr = page_pool_get_dma_addr(page);
> + dma_sync_single_for_device(lan966x->dev, dma_addr + XDP_PACKET_HEADROOM,
> + xdpf->len + IFH_LEN_BYTES,
> + DMA_TO_DEVICE);
>
> Also not correct. This page was mapped with %DMA_FROM_DEVICE in the
> Rx code, now you sync it for the opposite.
> Most drivers in case of XDP enabled create Page Pools with ::dma_dir
> set to %DMA_BIDIRECTIONAL. Now you would need only to sync it here
> with the same direction (bidir) and that's it.
That is a really good catch!
I was wondering why the things were working when I tested this. Because
definitely, I can see the right behaviour.
>
> +
> + /* Setup next dcb */
> + lan966x_fdma_tx_setup_dcb(tx, next_to_use, xdpf->len + IFH_LEN_BYTES,
> + dma_addr + XDP_PACKET_HEADROOM);
> +
> + /* Fill up the buffer */
> + next_dcb_buf = &tx->dcbs_buf[next_to_use];
> + next_dcb_buf->skb = NULL;
> + next_dcb_buf->page = page;
> + next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
> + next_dcb_buf->dma_addr = dma_addr;
> + next_dcb_buf->used = true;
> + next_dcb_buf->ptp = false;
> + next_dcb_buf->dev = port->dev;
> +
> + /* Start the transmission */
> + lan966x_fdma_tx_start(tx, next_to_use);
> +
> +out:
> + spin_unlock(&lan966x->tx_lock);
> +
> + return ret;
> +}
> +
> int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> {
> struct lan966x_port *port = netdev_priv(dev);
> @@ -709,6 +776,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> /* Fill up the buffer */
> next_dcb_buf = &tx->dcbs_buf[next_to_use];
> next_dcb_buf->skb = skb;
> + next_dcb_buf->page = NULL;
> next_dcb_buf->len = skb->len;
> next_dcb_buf->dma_addr = dma_addr;
> next_dcb_buf->used = true;
>
> [...]
>
> --
> 2.38.0
>
> Thanks,
> Olek
--
/Horatiu
From: Horatiu Vultur <[email protected]>
Date: Wed, 16 Nov 2022 21:55:57 +0100
> The 11/16/2022 16:34, Alexander Lobakin wrote:
> >
> > From: Horatiu Vultur <[email protected]>
> > Date: Tue, 15 Nov 2022 22:44:55 +0100
>
> Hi Olek,
Hi!
>
> >
> > Extend lan966x XDP support with the action XDP_TX. In this case when the
> > received buffer needs to execute XDP_TX, the buffer will be moved to the
> > TX buffers. So a new RX buffer will be allocated.
> > When the TX finish with the frame, it would release completely this
> > buffer.
> >
> > Signed-off-by: Horatiu Vultur <[email protected]>
> > ---
> > .../ethernet/microchip/lan966x/lan966x_fdma.c | 78 +++++++++++++++++--
> > .../ethernet/microchip/lan966x/lan966x_main.c | 4 +-
> > .../ethernet/microchip/lan966x/lan966x_main.h | 8 ++
> > .../ethernet/microchip/lan966x/lan966x_xdp.c | 8 ++
> > 4 files changed, 91 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> > index 384ed34197d58..c2e56233a8da5 100644
> > --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> > +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> > @@ -394,13 +394,21 @@ static void lan966x_fdma_tx_clear_buf(struct lan966x *lan966x, int weight)
> > dcb_buf->dev->stats.tx_bytes += dcb_buf->len;
> >
> > dcb_buf->used = false;
> > - dma_unmap_single(lan966x->dev,
> > - dcb_buf->dma_addr,
> > - dcb_buf->len,
> > - DMA_TO_DEVICE);
> > - if (!dcb_buf->ptp)
> > + if (dcb_buf->skb)
> > + dma_unmap_single(lan966x->dev,
> > + dcb_buf->dma_addr,
> > + dcb_buf->len,
> > + DMA_TO_DEVICE);
> > +
> > + if (dcb_buf->skb && !dcb_buf->ptp)
> > dev_kfree_skb_any(dcb_buf->skb);
> >
> > + if (dcb_buf->page) {
> > + page_pool_release_page(lan966x->rx.page_pool,
> > + dcb_buf->page);
> > + put_page(dcb_buf->page);
> > + }
> >
> > Hmm, that's not really correct.
> >
> > For skb, you need to unmap + free, true (BPW, just use
> > napi_consume_skb()).
>
> What does BPW stand for?
Sorry, it was a typo <O> I meant BTW / "by the word" (or "by the
way").
> Yes, I can use napi_consume_skb instead of dev_kfree_skb_any();
>
> > For %XDP_TX, as you use Page Pool, you don't need to unmap, but you
> > need to do xdp_return_frame{,_bulk}. Plus, as Tx is being done here
> > directly from an Rx NAPI polling cycle, xdp_return_frame_rx_napi()
> > is usually used. Anyway, each of xdp_return_frame()'s variants will
> > call page_pool_put_full_page() for you.
>
> If I understand correctly this part that you describe, the page will
> be added back in the page_pool cache. While in my case, I am giving
> back the page to the page allocator. In this way the page_pool needs
> to allocate more pages every time when the action XDP_TX is happening.
>
> BTW, this shows that there is a missing xdp_rxq_info_reg_mem_model call,
> because when calling xdp_return_frame_rx_napi, the frame was not going
> to page_pool but the was simply freed because xdp_mem_info was the wrong
> type.
Correct!
>
> > For %XDP_REDIRECT, as you don't know the source of the XDP frame,
>
> Why I don't know the source?
> Will it not be from an RX page that is allocated by Page Pool?
Imagine some NIC which does not use Page Pool, for example, it does
its own page allocation / splitting / recycling techniques, gets
%XDP_REDIRECT when running XDP prog on Rx. devmap says it must
redirect the frame to your NIC.
Then, your ::ndo_xdp_xmit() will be run on a frame/page not
belonging to any Page Pool.
The example can be any of Intel drivers (there are plans to switch
at least i40e and ice to Page Pool, but they're always deeply in
the backlogs (clownface)).
>
> > you need to unmap it (as it was previously mapped in
> > ::ndo_xdp_xmit()), plus call xdp_return_frame{,_bulk} to free the
> > XDP frame. Note that _rx_napi() variant is not applicable here.
> >
> > That description might be confusing, so you can take a look at the
> > already existing code[0] to get the idea. I think this piece shows
> > the expected logics rather well.
>
> I think you forgot to write the link to the code.
> I looked also at different drivers but I didn't figure it out why the
> frame needed to be mapped and where is happening that.
Ooof, really. Pls look at the end of this reply :D
On ::ndo_xdp_xmit(), as I explained above, you can receive a frame
from any driver or BPF core code (such as cpumap), and BPF prog
there could be run on buffer of any kind: Page Pool page, just a
page, a kmalloc() chunk and so on.
So, in the code[0], you can see the following set of operations:
* DMA unmap in all cases excluding frame coming from %XDP_TX (then
it was only synced);
* updating statistics and freeing skb for skb cases;
* xdp_return_frame_rx_napi() for %XDP_TX cases;
* xdp_return_frame_bulk() for ::ndo_xdp_xmit() cases.
>
> >
> > +
> > clear = true;
> > }
> >
> > @@ -532,6 +540,9 @@ static int lan966x_fdma_napi_poll(struct napi_struct *napi, int weight)
> > lan966x_fdma_rx_free_page(rx);
> > lan966x_fdma_rx_advance_dcb(rx);
> > goto allocate_new;
> > + case FDMA_TX:
> > + lan966x_fdma_rx_advance_dcb(rx);
> > + continue;
> > case FDMA_DROP:
> > lan966x_fdma_rx_free_page(rx);
> > lan966x_fdma_rx_advance_dcb(rx);
> > @@ -653,6 +664,62 @@ static void lan966x_fdma_tx_start(struct lan966x_tx *tx, int next_to_use)
> > tx->last_in_use = next_to_use;
> > }
> >
> > +int lan966x_fdma_xmit_xdpf(struct lan966x_port *port,
> > + struct xdp_frame *xdpf,
> > + struct page *page)
> > +{
> > + struct lan966x *lan966x = port->lan966x;
> > + struct lan966x_tx_dcb_buf *next_dcb_buf;
> > + struct lan966x_tx *tx = &lan966x->tx;
> > + dma_addr_t dma_addr;
> > + int next_to_use;
> > + __be32 *ifh;
> > + int ret = 0;
> > +
> > + spin_lock(&lan966x->tx_lock);
> > +
> > + /* Get next index */
> > + next_to_use = lan966x_fdma_get_next_dcb(tx);
> > + if (next_to_use < 0) {
> > + netif_stop_queue(port->dev);
> > + ret = NETDEV_TX_BUSY;
> > + goto out;
> > + }
> > +
> > + /* Generate new IFH */
> > + ifh = page_address(page) + XDP_PACKET_HEADROOM;
> > + memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
> > + lan966x_ifh_set_bypass(ifh, 1);
> > + lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port));
> > +
> > + dma_addr = page_pool_get_dma_addr(page);
> > + dma_sync_single_for_device(lan966x->dev, dma_addr + XDP_PACKET_HEADROOM,
> > + xdpf->len + IFH_LEN_BYTES,
> > + DMA_TO_DEVICE);
> >
> > Also not correct. This page was mapped with %DMA_FROM_DEVICE in the
> > Rx code, now you sync it for the opposite.
> > Most drivers in case of XDP enabled create Page Pools with ::dma_dir
> > set to %DMA_BIDIRECTIONAL. Now you would need only to sync it here
> > with the same direction (bidir) and that's it.
>
> That is a really good catch!
> I was wondering why the things were working when I tested this. Because
> definitely, I can see the right behaviour.
The reasons can be:
1) your platform might have a DMA coherence engine, so that all
those DMA sync calls are no-ops;
2) on your platform, DMA writeback (TO_DEVICE) and DMA invalidate
(FROM_DEVICE) invoke the same operation/instruction. Some
hardware is designed that way, that any DMA sync is in fact a
bidir synchronization;
3) if there were no frame modification from the kernel, e.g. you
received it and immediately sent, cache was not polluted with
some pending modifications, so there was no work for writeback;
4) probably something else I might've missed.
>
> >
> > +
> > + /* Setup next dcb */
> > + lan966x_fdma_tx_setup_dcb(tx, next_to_use, xdpf->len + IFH_LEN_BYTES,
> > + dma_addr + XDP_PACKET_HEADROOM);
> > +
> > + /* Fill up the buffer */
> > + next_dcb_buf = &tx->dcbs_buf[next_to_use];
> > + next_dcb_buf->skb = NULL;
> > + next_dcb_buf->page = page;
> > + next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
> > + next_dcb_buf->dma_addr = dma_addr;
> > + next_dcb_buf->used = true;
> > + next_dcb_buf->ptp = false;
> > + next_dcb_buf->dev = port->dev;
> > +
> > + /* Start the transmission */
> > + lan966x_fdma_tx_start(tx, next_to_use);
> > +
> > +out:
> > + spin_unlock(&lan966x->tx_lock);
> > +
> > + return ret;
> > +}
> > +
> > int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> > {
> > struct lan966x_port *port = netdev_priv(dev);
> > @@ -709,6 +776,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> > /* Fill up the buffer */
> > next_dcb_buf = &tx->dcbs_buf[next_to_use];
> > next_dcb_buf->skb = skb;
> > + next_dcb_buf->page = NULL;
> > next_dcb_buf->len = skb->len;
> > next_dcb_buf->dma_addr = dma_addr;
> > next_dcb_buf->used = true;
> >
> > [...]
> >
> > --
> > 2.38.0
> >
> > Thanks,
> > Olek
>
> --
> /Horatiu
[0] https://elixir.bootlin.com/linux/v6.1-rc5/source/drivers/net/ethernet/marvell/mvneta.c#L1882
Thanks,
Olek
The 11/17/2022 16:31, Alexander Lobakin wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>
> From: Horatiu Vultur <[email protected]>
> Date: Wed, 16 Nov 2022 21:55:57 +0100
>
> > The 11/16/2022 16:34, Alexander Lobakin wrote:
> > >
> > > From: Horatiu Vultur <[email protected]>
> > > Date: Tue, 15 Nov 2022 22:44:55 +0100
> >
> > Hi Olek,
>
> Hi!
>
> > > For %XDP_REDIRECT, as you don't know the source of the XDP frame,
> >
> > Why I don't know the source?
> > Will it not be from an RX page that is allocated by Page Pool?
>
> Imagine some NIC which does not use Page Pool, for example, it does
> its own page allocation / splitting / recycling techniques, gets
> %XDP_REDIRECT when running XDP prog on Rx. devmap says it must
> redirect the frame to your NIC.
> Then, your ::ndo_xdp_xmit() will be run on a frame/page not
> belonging to any Page Pool.
> The example can be any of Intel drivers (there are plans to switch
> at least i40e and ice to Page Pool, but they're always deeply in
> the backlogs (clownface)).
Silly me, I was always thinking and trying only from one port of lan966x
to another port of lan966x. Of course it can come from other NICs.
>
> >
> > > you need to unmap it (as it was previously mapped in
> > > ::ndo_xdp_xmit()), plus call xdp_return_frame{,_bulk} to free the
> > > XDP frame. Note that _rx_napi() variant is not applicable here.
> > >
> > > That description might be confusing, so you can take a look at the
> > > already existing code[0] to get the idea. I think this piece shows
> > > the expected logics rather well.
> >
> > I think you forgot to write the link to the code.
> > I looked also at different drivers but I didn't figure it out why the
> > frame needed to be mapped and where is happening that.
>
> Ooof, really. Pls look at the end of this reply :D
> On ::ndo_xdp_xmit(), as I explained above, you can receive a frame
> from any driver or BPF core code (such as cpumap), and BPF prog
> there could be run on buffer of any kind: Page Pool page, just a
> page, a kmalloc() chunk and so on.
>
> So, in the code[0], you can see the following set of operations:
>
> * DMA unmap in all cases excluding frame coming from %XDP_TX (then
> it was only synced);
> * updating statistics and freeing skb for skb cases;
> * xdp_return_frame_rx_napi() for %XDP_TX cases;
> * xdp_return_frame_bulk() for ::ndo_xdp_xmit() cases.
Thanks for a detail explanation and for the link :D
I will update all this in the next version.
>
> > > + ifh = page_address(page) + XDP_PACKET_HEADROOM;
> > > + memset(ifh, 0x0, sizeof(__be32) * IFH_LEN);
> > > + lan966x_ifh_set_bypass(ifh, 1);
> > > + lan966x_ifh_set_port(ifh, BIT_ULL(port->chip_port));
> > > +
> > > + dma_addr = page_pool_get_dma_addr(page);
> > > + dma_sync_single_for_device(lan966x->dev, dma_addr + XDP_PACKET_HEADROOM,
> > > + xdpf->len + IFH_LEN_BYTES,
> > > + DMA_TO_DEVICE);
> > >
> > > Also not correct. This page was mapped with %DMA_FROM_DEVICE in the
> > > Rx code, now you sync it for the opposite.
> > > Most drivers in case of XDP enabled create Page Pools with ::dma_dir
> > > set to %DMA_BIDIRECTIONAL. Now you would need only to sync it here
> > > with the same direction (bidir) and that's it.
> >
> > That is a really good catch!
> > I was wondering why the things were working when I tested this. Because
> > definitely, I can see the right behaviour.
>
> The reasons can be:
>
> 1) your platform might have a DMA coherence engine, so that all
> those DMA sync calls are no-ops;
> 2) on your platform, DMA writeback (TO_DEVICE) and DMA invalidate
> (FROM_DEVICE) invoke the same operation/instruction. Some
> hardware is designed that way, that any DMA sync is in fact a
> bidir synchronization;
> 3) if there were no frame modification from the kernel, e.g. you
> received it and immediately sent, cache was not polluted with
> some pending modifications, so there was no work for writeback;
> 4) probably something else I might've missed.
>
> >
> > >
> > > +
> > > + /* Setup next dcb */
> > > + lan966x_fdma_tx_setup_dcb(tx, next_to_use, xdpf->len + IFH_LEN_BYTES,
> > > + dma_addr + XDP_PACKET_HEADROOM);
> > > +
> > > + /* Fill up the buffer */
> > > + next_dcb_buf = &tx->dcbs_buf[next_to_use];
> > > + next_dcb_buf->skb = NULL;
> > > + next_dcb_buf->page = page;
> > > + next_dcb_buf->len = xdpf->len + IFH_LEN_BYTES;
> > > + next_dcb_buf->dma_addr = dma_addr;
> > > + next_dcb_buf->used = true;
> > > + next_dcb_buf->ptp = false;
> > > + next_dcb_buf->dev = port->dev;
> > > +
> > > + /* Start the transmission */
> > > + lan966x_fdma_tx_start(tx, next_to_use);
> > > +
> > > +out:
> > > + spin_unlock(&lan966x->tx_lock);
> > > +
> > > + return ret;
> > > +}
> > > +
> > > int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> > > {
> > > struct lan966x_port *port = netdev_priv(dev);
> > > @@ -709,6 +776,7 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
> > > /* Fill up the buffer */
> > > next_dcb_buf = &tx->dcbs_buf[next_to_use];
> > > next_dcb_buf->skb = skb;
> > > + next_dcb_buf->page = NULL;
> > > next_dcb_buf->len = skb->len;
> > > next_dcb_buf->dma_addr = dma_addr;
> > > next_dcb_buf->used = true;
> > >
> > > [...]
> > >
> > > --
> > > 2.38.0
> > >
> > > Thanks,
> > > Olek
> >
> > --
> > /Horatiu
>
> [0] https://elixir.bootlin.com/linux/v6.1-rc5/source/drivers/net/ethernet/marvell/mvneta.c#L1882
>
> Thanks,
> Olek
--
/Horatiu