2022-10-25 20:14:54

by Shenwei Wang

[permalink] [raw]
Subject: [PATCH 1/1] net: fec: add initial XDP support

This patch adds the initial XDP support to Freescale driver. It supports
XDP_PASS, XDP_DROP and XDP_REDIRECT actions. Upcoming patches will add
support for XDP_TX and Zero Copy features.

As the patch is rather large, the part of codes to collect the
statistics is separated and will prepare a dedicated patch for that
part.

The driver has a macro RX_RING_SIZE to configure the RX ring size. After
testing with the RX ring size, it turned out the small the rign size the
better the better performance for XDP mode. So the different ring size is
selected for XDP mode and normal mode in this patch.

I just tested with the application of xdpsock.
-- Native here means running command of "xdpsock -i eth0"
-- SKB-Mode means running command of "xdpsock -S -i eth0"

RX Ring Size 16 32 64 128
Native 230K 227K 196K 160K
SKB-Mode 207K 208K 203K 204K

Normal mode performance by iperf.

RX Ring Size 16 64 128
iperf 300Mbps 830Mbps 933Mbps

The following are the testing result relating to XDP mode:

# ./xdpsock -i eth0
sock0@eth0:0 rxdrop xdp-drv
pps pkts 1.00
rx 231166 905984
tx 0 0

# xdpsock -S -i eth0 // skb-mode
sock0@eth0:0 rxdrop xdp-skb
pps pkts 1.00
rx 205638 917288
tx 0 0

# xdp2 eth0
proto 0: 571382 pkt/s
proto 0: 579849 pkt/s
proto 0: 582110 pkt/s

# xdp2 -S eth0 // skb-mode
proto 17: 71999 pkt/s
proto 17: 72000 pkt/s
proto 17: 71988 pkt/s

Signed-off-by: Shenwei Wang <[email protected]>
---
drivers/net/ethernet/freescale/fec.h | 4 +
drivers/net/ethernet/freescale/fec_main.c | 241 +++++++++++++++++++++-
2 files changed, 244 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 476e3863a310..07e85fc3d7ba 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -348,6 +348,7 @@ struct bufdesc_ex {
*/

#define FEC_ENET_XDP_HEADROOM (XDP_PACKET_HEADROOM)
+#define XDP_RX_RING_SIZE 16

#define FEC_ENET_RX_PAGES 256
#define FEC_ENET_RX_FRSIZE (PAGE_SIZE - FEC_ENET_XDP_HEADROOM \
@@ -663,6 +664,9 @@ struct fec_enet_private {

struct imx_sc_ipc *ipc_handle;

+ /* XDP BPF Program */
+ struct bpf_prog *xdp_prog;
+
u64 ethtool_stats[];
};

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6986b74fb8af..2e4be4590f77 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -89,6 +89,11 @@ static const u16 fec_enet_vlan_pri_to_queue[8] = {0, 0, 1, 1, 1, 2, 2, 2};
#define FEC_ENET_OPD_V 0xFFF0
#define FEC_MDIO_PM_TIMEOUT 100 /* ms */

+#define FEC_ENET_XDP_PASS 0
+#define FEC_ENET_XDP_CONSUMED BIT(0)
+#define FEC_ENET_XDP_TX BIT(1)
+#define FEC_ENET_XDP_REDIR BIT(2)
+
struct fec_devinfo {
u32 quirks;
};
@@ -418,13 +423,14 @@ static int
fec_enet_create_page_pool(struct fec_enet_private *fep,
struct fec_enet_priv_rx_q *rxq, int size)
{
+ struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
struct page_pool_params pp_params = {
.order = 0,
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
.pool_size = size,
.nid = dev_to_node(&fep->pdev->dev),
.dev = &fep->pdev->dev,
- .dma_dir = DMA_FROM_DEVICE,
+ .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
.offset = FEC_ENET_XDP_HEADROOM,
.max_len = FEC_ENET_RX_FRSIZE,
};
@@ -1499,6 +1505,59 @@ static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
bdp->cbd_bufaddr = cpu_to_fec32(phys_addr);
}

+static u32
+fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
+ struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq, int index)
+{
+ unsigned int sync, len = xdp->data_end - xdp->data;
+ u32 ret = FEC_ENET_XDP_PASS;
+ struct page *page;
+ int err;
+ u32 act;
+
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
+ sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM;
+ sync = max(sync, len);
+
+ switch (act) {
+ case XDP_PASS:
+ ret = FEC_ENET_XDP_PASS;
+ break;
+
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(fep->netdev, xdp, prog);
+ if (!err) {
+ ret = FEC_ENET_XDP_REDIR;
+ } else {
+ ret = FEC_ENET_XDP_CONSUMED;
+ page = virt_to_head_page(xdp->data);
+ page_pool_put_page(rxq->page_pool, page, sync, true);
+ }
+ break;
+
+ default:
+ bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
+ fallthrough;
+
+ case XDP_TX:
+ bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
+ fallthrough;
+
+ case XDP_ABORTED:
+ fallthrough; /* handle aborts by dropping packet */
+
+ case XDP_DROP:
+ ret = FEC_ENET_XDP_CONSUMED;
+ page = virt_to_head_page(xdp->data);
+ page_pool_put_page(rxq->page_pool, page, sync, true);
+ break;
+ }
+
+ return ret;
+}
+
/* During a receive, the bd_rx.cur points to the current incoming buffer.
* When we update through the ring, if the next incoming buffer has
* not been given to the system, we just set the empty indicator,
@@ -1520,6 +1579,9 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
u16 vlan_tag;
int index = 0;
bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
+ struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
+ u32 ret, xdp_result = FEC_ENET_XDP_PASS;
+ struct xdp_buff xdp;
struct page *page;

#ifdef CONFIG_M532x
@@ -1531,6 +1593,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
* These get messed up if we get called due to a busy condition.
*/
bdp = rxq->bd.cur;
+ xdp_init_buff(&xdp, PAGE_SIZE, &rxq->xdp_rxq);

while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) {

@@ -1580,6 +1643,17 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
prefetch(page_address(page));
fec_enet_update_cbd(rxq, bdp, index);

+ if (xdp_prog) {
+ xdp_buff_clear_frags_flag(&xdp);
+ xdp_prepare_buff(&xdp, page_address(page),
+ FEC_ENET_XDP_HEADROOM, pkt_len, false);
+
+ ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, index);
+ xdp_result |= ret;
+ if (ret != FEC_ENET_XDP_PASS)
+ goto rx_processing_done;
+ }
+
/* The packet length includes FCS, but we don't want to
* include that when passing upstream as it messes up
* bridging applications.
@@ -1675,6 +1749,10 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
writel(0, rxq->bd.reg_desc_active);
}
rxq->bd.cur = bdp;
+
+ if (xdp_result & FEC_ENET_XDP_REDIR)
+ xdp_do_flush_map();
+
return pkt_received;
}

@@ -3476,6 +3554,165 @@ static u16 fec_enet_select_queue(struct net_device *ndev, struct sk_buff *skb,
return fec_enet_vlan_pri_to_queue[vlan_tag >> 13];
}

+static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ struct fec_enet_private *fep = netdev_priv(dev);
+ bool is_run = netif_running(dev);
+ struct bpf_prog *old_prog;
+ unsigned int dsize;
+ int i;
+
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ if (is_run)
+ fec_enet_close(dev);
+
+ old_prog = xchg(&fep->xdp_prog, bpf->prog);
+
+ /* Update RX ring size */
+ dsize = fep->bufdesc_ex ? sizeof(struct bufdesc_ex) :
+ sizeof(struct bufdesc);
+ for (i = 0; i < fep->num_rx_queues; i++) {
+ struct fec_enet_priv_rx_q *rxq = fep->rx_queue[i];
+ struct bufdesc *cbd_base;
+ unsigned int size;
+
+ cbd_base = rxq->bd.base;
+ if (bpf->prog)
+ rxq->bd.ring_size = XDP_RX_RING_SIZE;
+ else
+ rxq->bd.ring_size = RX_RING_SIZE;
+ size = dsize * rxq->bd.ring_size;
+ cbd_base = (struct bufdesc *)(((void *)cbd_base) + size);
+ rxq->bd.last = (struct bufdesc *)(((void *)cbd_base) - dsize);
+ }
+
+ if (is_run)
+ fec_enet_open(dev);
+
+ if (old_prog)
+ bpf_prog_put(old_prog);
+
+ return 0;
+
+ case XDP_SETUP_XSK_POOL:
+ return -EOPNOTSUPP;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int cpu)
+{
+ int index = cpu;
+
+ if (unlikely(index < 0))
+ index = 0;
+
+ while (index >= fep->num_tx_queues)
+ index -= fep->num_tx_queues;
+
+ return index;
+}
+
+static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+ struct fec_enet_priv_tx_q *txq,
+ struct xdp_frame *frame)
+{
+ unsigned int index, status, estatus;
+ struct bufdesc *bdp, *last_bdp;
+ dma_addr_t dma_addr;
+ int entries_free;
+
+ entries_free = fec_enet_get_free_txdesc_num(txq);
+ if (entries_free < MAX_SKB_FRAGS + 1) {
+ netdev_err(fep->netdev, "NOT enough BD for SG!\n");
+ return NETDEV_TX_OK;
+ }
+
+ /* Fill in a Tx ring entry */
+ bdp = txq->bd.cur;
+ last_bdp = bdp;
+ status = fec16_to_cpu(bdp->cbd_sc);
+ status &= ~BD_ENET_TX_STATS;
+
+ index = fec_enet_get_bd_index(bdp, &txq->bd);
+
+ dma_addr = dma_map_single(&fep->pdev->dev, frame->data,
+ frame->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&fep->pdev->dev, dma_addr))
+ return FEC_ENET_XDP_CONSUMED;
+
+ status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST);
+ if (fep->bufdesc_ex)
+ estatus = BD_ENET_TX_INT;
+
+ bdp->cbd_bufaddr = cpu_to_fec32(dma_addr);
+ bdp->cbd_datlen = cpu_to_fec16(frame->len);
+
+ if (fep->bufdesc_ex) {
+ struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+
+ if (fep->quirks & FEC_QUIRK_HAS_AVB)
+ estatus |= FEC_TX_BD_FTYPE(txq->bd.qid);
+
+ ebdp->cbd_bdu = 0;
+ ebdp->cbd_esc = cpu_to_fec32(estatus);
+ }
+
+ index = fec_enet_get_bd_index(last_bdp, &txq->bd);
+ txq->tx_skbuff[index] = NULL;
+
+ /* Send it on its way. Tell FEC it's ready, interrupt when done,
+ * it's the last BD of the frame, and to put the CRC on the end.
+ */
+ status |= (BD_ENET_TX_READY | BD_ENET_TX_TC);
+ bdp->cbd_sc = cpu_to_fec16(status);
+
+ /* If this was the last BD in the ring, start at the beginning again. */
+ bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
+
+ txq->bd.cur = bdp;
+
+ return 0;
+}
+
+static int fec_enet_xdp_xmit(struct net_device *dev,
+ int num_frames,
+ struct xdp_frame **frames,
+ u32 flags)
+{
+ struct fec_enet_private *fep = netdev_priv(dev);
+ struct fec_enet_priv_tx_q *txq;
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+ unsigned int queue;
+ int i, nxmit = 0;
+
+ queue = fec_enet_xdp_get_tx_queue(fep, cpu);
+ txq = fep->tx_queue[queue];
+ nq = netdev_get_tx_queue(fep->netdev, queue);
+
+ __netif_tx_lock(nq, cpu);
+
+ for (i = 0; i < num_frames; i++) {
+ fec_enet_txq_xmit_frame(fep, txq, frames[i]);
+ nxmit++;
+ }
+
+ /* Make sure the update to bdp and tx_skbuff are performed. */
+ wmb();
+
+ /* Trigger transmission start */
+ writel(0, txq->bd.reg_desc_active);
+
+ __netif_tx_unlock(nq);
+
+ return num_frames;
+}
+
static const struct net_device_ops fec_netdev_ops = {
.ndo_open = fec_enet_open,
.ndo_stop = fec_enet_close,
@@ -3490,6 +3727,8 @@ static const struct net_device_ops fec_netdev_ops = {
.ndo_poll_controller = fec_poll_controller,
#endif
.ndo_set_features = fec_set_features,
+ .ndo_bpf = fec_enet_bpf,
+ .ndo_xdp_xmit = fec_enet_xdp_xmit,
};

static const unsigned short offset_des_active_rxq[] = {
--
2.34.1



2022-10-25 22:42:50

by Andrew Lunn

[permalink] [raw]
Subject: Re: [PATCH 1/1] net: fec: add initial XDP support

> +#define FEC_ENET_XDP_PASS 0
> +#define FEC_ENET_XDP_CONSUMED BIT(0)
> +#define FEC_ENET_XDP_TX BIT(1)
> +#define FEC_ENET_XDP_REDIR BIT(2)

I don't know XDP, so maybe a silly question. Are these action mutually
exclusive? Are these really bits, or should it be an enum?
fec_enet_run_xdp() does not combine them as bits.

> +static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf)
> +{
> + struct fec_enet_private *fep = netdev_priv(dev);
> + bool is_run = netif_running(dev);

You have the space, so maybe call it is_running.

> + struct bpf_prog *old_prog;
> + unsigned int dsize;
> + int i;
> +
> + switch (bpf->command) {
> + case XDP_SETUP_PROG:
> + if (is_run)
> + fec_enet_close(dev);

fec_net_close() followed by fec_enet_open() is pretty expensive. The
PHY is stopped and disconnected, and then connected and started. That
will probably trigger an auto-neg, which takes around 1.5 seconds
before the link is up again.

Maybe you should optimise this. I guess the real issue here is you
need to resize the RX ring. You need to be careful with that
anyway. If the machine is under memory pressure, you might not be able
to allocate the ring, resulting in a broken interface. What is
recommended for ethtool --set-ring is that you first allocate the new
ring, and if that is successful, free the old ring. If the allocation
fails, you still have the old ring, and you can safely return -ENOMEM
and still have a working interface.

So i think you can split this patch up into a few parts:

XDP using the default ring size. Your benchmarks show it works, its
just not optimal. But the resulting smaller patch will be easier to
review.

Add support for ethtool set-ring, which will allow you to pick apart
the bits of fec_net_close() and fec_enet_open() which are needed for
changing the rings. This might actually need a refactoring patch?

And then add support for optimal ring size for XDP.

Andrew

2022-10-26 08:17:23

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/1] net: fec: add initial XDP support

Hi Shenwei,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on net-next/master]
[also build test WARNING on net/master linus/master v6.1-rc2 next-20221026]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Shenwei-Wang/net-fec-add-initial-XDP-support/20221026-041331
patch link: https://lore.kernel.org/r/20221025201156.776576-1-shenwei.wang%40nxp.com
patch subject: [PATCH 1/1] net: fec: add initial XDP support
config: hexagon-randconfig-r033-20221024 (attached as .config)
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 791a7ae1ba3efd6bca96338e10ffde557ba83920)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/f601d09cdead68e49ba67efbb904277b697c2f66
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Shenwei-Wang/net-fec-add-initial-XDP-support/20221026-041331
git checkout f601d09cdead68e49ba67efbb904277b697c2f66
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash drivers/net/ethernet/freescale/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

In file included from drivers/net/ethernet/freescale/fec_main.c:33:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __raw_readb(PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
#define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
^
In file included from drivers/net/ethernet/freescale/fec_main.c:33:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
~~~~~~~~~~ ^
include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
#define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
^
In file included from drivers/net/ethernet/freescale/fec_main.c:33:
In file included from include/linux/interrupt.h:11:
In file included from include/linux/hardirq.h:11:
In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1:
In file included from include/asm-generic/hardirq.h:17:
In file included from include/linux/irq.h:20:
In file included from include/linux/io.h:13:
In file included from arch/hexagon/include/asm/io.h:334:
include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writeb(value, PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
__raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
~~~~~~~~~~ ^
>> drivers/net/ethernet/freescale/fec_main.c:3692:9: warning: variable 'nxmit' set but not used [-Wunused-but-set-variable]
int i, nxmit = 0;
^
7 warnings generated.


vim +/nxmit +3692 drivers/net/ethernet/freescale/fec_main.c

3681
3682 static int fec_enet_xdp_xmit(struct net_device *dev,
3683 int num_frames,
3684 struct xdp_frame **frames,
3685 u32 flags)
3686 {
3687 struct fec_enet_private *fep = netdev_priv(dev);
3688 struct fec_enet_priv_tx_q *txq;
3689 int cpu = smp_processor_id();
3690 struct netdev_queue *nq;
3691 unsigned int queue;
> 3692 int i, nxmit = 0;
3693
3694 queue = fec_enet_xdp_get_tx_queue(fep, cpu);
3695 txq = fep->tx_queue[queue];
3696 nq = netdev_get_tx_queue(fep->netdev, queue);
3697
3698 __netif_tx_lock(nq, cpu);
3699
3700 for (i = 0; i < num_frames; i++) {
3701 fec_enet_txq_xmit_frame(fep, txq, frames[i]);
3702 nxmit++;
3703 }
3704
3705 /* Make sure the update to bdp and tx_skbuff are performed. */
3706 wmb();
3707
3708 /* Trigger transmission start */
3709 writel(0, txq->bd.reg_desc_active);
3710
3711 __netif_tx_unlock(nq);
3712
3713 return num_frames;
3714 }
3715

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (6.50 kB)
.config.gz (39.30 kB)
Download all attachments

2022-10-27 01:56:52

by Shenwei Wang

[permalink] [raw]
Subject: RE: [EXT] Re: [PATCH 1/1] net: fec: add initial XDP support



> -----Original Message-----
> From: Andrew Lunn <[email protected]>
> Sent: Tuesday, October 25, 2022 5:09 PM
> To: Shenwei Wang <[email protected]>
> Cc: David S. Miller <[email protected]>; Eric Dumazet
> <[email protected]>; Jakub Kicinski <[email protected]>; Paolo Abeni
> <[email protected]>; Alexei Starovoitov <[email protected]>; Daniel Borkmann
> <[email protected]>; Jesper Dangaard Brouer <[email protected]>; John
> Fastabend <[email protected]>; [email protected]; linux-
> [email protected]; [email protected]
> Subject: [EXT] Re: [PATCH 1/1] net: fec: add initial XDP support
>
> Caution: EXT Email
>
> > +#define FEC_ENET_XDP_PASS 0
> > +#define FEC_ENET_XDP_CONSUMED BIT(0)
> > +#define FEC_ENET_XDP_TX BIT(1)
> > +#define FEC_ENET_XDP_REDIR BIT(2)
>
> I don't know XDP, so maybe a silly question. Are these action mutually exclusive?
> Are these really bits, or should it be an enum?
> fec_enet_run_xdp() does not combine them as bits.
>
The bit here is to record the states that may required after completing the XDP processing.
As the current implementation for XDP is not full, the other bit like FEC_ENET_XDP_TX is not
used for now. Generally it will require an extra action if a FEC_ENET_XDP_TX is returned.
Because we are processing a batch of packets together, those bits may get combined. It will
then responds to each bit accordingly.

> > +static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf
> > +*bpf) {
> > + struct fec_enet_private *fep = netdev_priv(dev);
> > + bool is_run = netif_running(dev);
>
> You have the space, so maybe call it is_running.
>
> > + struct bpf_prog *old_prog;
> > + unsigned int dsize;
> > + int i;
> > +
> > + switch (bpf->command) {
> > + case XDP_SETUP_PROG:
> > + if (is_run)
> > + fec_enet_close(dev);
>
> fec_net_close() followed by fec_enet_open() is pretty expensive. The PHY is
> stopped and disconnected, and then connected and started. That will probably
> trigger an auto-neg, which takes around 1.5 seconds before the link is up again.
>
> Maybe you should optimise this. I guess the real issue here is you need to resize
> the RX ring. You need to be careful with that anyway. If the machine is under
> memory pressure, you might not be able to allocate the ring, resulting in a
> broken interface. What is recommended for ethtool --set-ring is that you first
> allocate the new ring, and if that is successful, free the old ring. If the allocation
> fails, you still have the old ring, and you can safely return -ENOMEM and still
> have a working interface.
>
> So i think you can split this patch up into a few parts:
>
> XDP using the default ring size. Your benchmarks show it works, its just not
> optimal. But the resulting smaller patch will be easier to review.
>
> Add support for ethtool set-ring, which will allow you to pick apart the bits of
> fec_net_close() and fec_enet_open() which are needed for changing the rings.
> This might actually need a refactoring patch?
>

That sounds good. Let me think about it.

Thanks,
Shenwei

> And then add support for optimal ring size for XDP.
>
> Andrew