As the patch to add XDP statistics is based on the previous optimization
patch, I've put the two patches together. The link to the optimization
is the following:
https://lore.kernel.org/imx/[email protected]/
Shenwei Wang (2):
net: fec: simplify the code logic of quirks
net: fec: add xdp and page pool statistics
drivers/net/ethernet/freescale/fec.h | 12 +++
drivers/net/ethernet/freescale/fec_main.c | 119 ++++++++++++++++++----
2 files changed, 109 insertions(+), 22 deletions(-)
--
2.34.1
Simplify the code logic of handling the quirk of FEC_QUIRK_HAS_RACC.
If a SoC has the RACC quirk, the driver will enable the 16bit shift
by default in the probe function for a better performance.
This patch handles the logic in one place to make the logic simple
and clean. The patch optimizes the fec_enet_xdp_get_tx_queue function
according to Paolo Abeni's comments, and it also exludes the SoCs that
require to do frame swap from XDP support.
Signed-off-by: Shenwei Wang <[email protected]>
---
drivers/net/ethernet/freescale/fec_main.c | 49 ++++++++++++++---------
1 file changed, 30 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 6b062a0663f4..3fb870340c22 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1581,8 +1581,20 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
bool need_swap = fep->quirks & FEC_QUIRK_SWAP_FRAME;
struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
u32 ret, xdp_result = FEC_ENET_XDP_PASS;
+ u32 data_start = FEC_ENET_XDP_HEADROOM;
struct xdp_buff xdp;
struct page *page;
+ u32 sub_len = 4;
+
+#if !defined(CONFIG_M5272)
+ /*If it has the FEC_QUIRK_HAS_RACC quirk property, the bit of
+ * FEC_RACC_SHIFT16 is set by default in the probe function.
+ */
+ if (fep->quirks & FEC_QUIRK_HAS_RACC) {
+ data_start += 2;
+ sub_len += 2;
+ }
+#endif
#ifdef CONFIG_M532x
flush_cache_all();
@@ -1645,9 +1657,9 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
if (xdp_prog) {
xdp_buff_clear_frags_flag(&xdp);
+ /* subtract 16bit shift and FCS */
xdp_prepare_buff(&xdp, page_address(page),
- FEC_ENET_XDP_HEADROOM, pkt_len, false);
-
+ data_start, pkt_len - sub_len, false);
ret = fec_enet_run_xdp(fep, xdp_prog, &xdp, rxq, index);
xdp_result |= ret;
if (ret != FEC_ENET_XDP_PASS)
@@ -1659,18 +1671,15 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
* bridging applications.
*/
skb = build_skb(page_address(page), PAGE_SIZE);
- skb_reserve(skb, FEC_ENET_XDP_HEADROOM);
- skb_put(skb, pkt_len - 4);
+ skb_reserve(skb, data_start);
+ skb_put(skb, pkt_len - sub_len);
skb_mark_for_recycle(skb);
- data = skb->data;
- if (need_swap)
+ if (unlikely(need_swap)) {
+ data = page_address(page) + FEC_ENET_XDP_HEADROOM;
swap_buffer(data, pkt_len);
-
-#if !defined(CONFIG_M5272)
- if (fep->quirks & FEC_QUIRK_HAS_RACC)
- data = skb_pull_inline(skb, 2);
-#endif
+ }
+ data = skb->data;
/* Extract the enhanced buffer descriptor */
ebdp = NULL;
@@ -3562,6 +3571,13 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf)
switch (bpf->command) {
case XDP_SETUP_PROG:
+ /* No need to support the SoCs that require to
+ * do the frame swap because the performance wouldn't be
+ * better than the skb mode.
+ */
+ if (fep->quirks & FEC_QUIRK_SWAP_FRAME)
+ return -EOPNOTSUPP;
+
if (is_run) {
napi_disable(&fep->napi);
netif_tx_disable(dev);
@@ -3589,17 +3605,12 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf)
}
static int
-fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int cpu)
+fec_enet_xdp_get_tx_queue(struct fec_enet_private *fep, int index)
{
- int index = cpu;
-
if (unlikely(index < 0))
- index = 0;
-
- while (index >= fep->num_tx_queues)
- index -= fep->num_tx_queues;
+ return 0;
- return index;
+ return (index % fep->num_tx_queues);
}
static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
--
2.34.1
Added xdp and page pool statistics.
In order to make the implementation simple and compatible, the patch
uses the 32bit integer to record the XDP statistics.
Signed-off-by: Shenwei Wang <[email protected]>
---
drivers/net/ethernet/freescale/fec.h | 12 ++++
drivers/net/ethernet/freescale/fec_main.c | 70 ++++++++++++++++++++++-
2 files changed, 79 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 61e847b18343..e3159234886c 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -526,6 +526,17 @@ struct fec_enet_priv_txrx_info {
struct sk_buff *skb;
};
+enum {
+ RX_XDP_REDIRECT = 0,
+ RX_XDP_PASS,
+ RX_XDP_DROP,
+ RX_XDP_TX,
+ RX_XDP_TX_ERRORS,
+ TX_XDP_XMIT,
+ TX_XDP_XMIT_ERRORS,
+ XDP_STATS_TOTAL,
+};
+
struct fec_enet_priv_tx_q {
struct bufdesc_prop bd;
unsigned char *tx_bounce[TX_RING_SIZE];
@@ -546,6 +557,7 @@ struct fec_enet_priv_rx_q {
/* page_pool */
struct page_pool *page_pool;
struct xdp_rxq_info xdp_rxq;
+ u32 stats[XDP_STATS_TOTAL];
/* rx queue number, in the range 0-7 */
u8 id;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 3fb870340c22..89fef370bc10 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1523,10 +1523,12 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
switch (act) {
case XDP_PASS:
+ rxq->stats[RX_XDP_PASS]++;
ret = FEC_ENET_XDP_PASS;
break;
case XDP_REDIRECT:
+ rxq->stats[RX_XDP_REDIRECT]++;
err = xdp_do_redirect(fep->netdev, xdp, prog);
if (!err) {
ret = FEC_ENET_XDP_REDIR;
@@ -1549,6 +1551,7 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
fallthrough; /* handle aborts by dropping packet */
case XDP_DROP:
+ rxq->stats[RX_XDP_DROP]++;
ret = FEC_ENET_XDP_CONSUMED;
page = virt_to_head_page(xdp->data);
page_pool_put_page(rxq->page_pool, page, sync, true);
@@ -2657,37 +2660,91 @@ static const struct fec_stat {
{ "IEEE_rx_octets_ok", IEEE_R_OCTETS_OK },
};
-#define FEC_STATS_SIZE (ARRAY_SIZE(fec_stats) * sizeof(u64))
+static struct fec_xdp_stat {
+ char name[ETH_GSTRING_LEN];
+ u64 count;
+} fec_xdp_stats[XDP_STATS_TOTAL] = {
+ { "rx_xdp_redirect", 0 }, /* RX_XDP_REDIRECT = 0, */
+ { "rx_xdp_pass", 0 }, /* RX_XDP_PASS, */
+ { "rx_xdp_drop", 0 }, /* RX_XDP_DROP, */
+ { "rx_xdp_tx", 0 }, /* RX_XDP_TX, */
+ { "rx_xdp_tx_errors", 0 }, /* RX_XDP_TX_ERRORS, */
+ { "tx_xdp_xmit", 0 }, /* TX_XDP_XMIT, */
+ { "tx_xdp_xmit_errors", 0 }, /* TX_XDP_XMIT_ERRORS, */
+};
+
+#define FEC_STATS_SIZE ((ARRAY_SIZE(fec_stats) + \
+ ARRAY_SIZE(fec_xdp_stats)) * sizeof(u64))
static void fec_enet_update_ethtool_stats(struct net_device *dev)
{
struct fec_enet_private *fep = netdev_priv(dev);
- int i;
+ struct fec_xdp_stat xdp_stats[7];
+ int off = ARRAY_SIZE(fec_stats);
+ struct fec_enet_priv_rx_q *rxq;
+ int i, j;
for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
fep->ethtool_stats[i] = readl(fep->hwp + fec_stats[i].offset);
+
+ for (i = fep->num_rx_queues - 1; i >= 0; i--) {
+ rxq = fep->rx_queue[i];
+ for (j = 0; j < XDP_STATS_TOTAL; j++)
+ xdp_stats[j].count += rxq->stats[j];
+ }
+
+ for (i = 0; i < XDP_STATS_TOTAL; i++)
+ fep->ethtool_stats[i + off] = xdp_stats[i].count;
+}
+
+static void fec_enet_page_pool_stats(struct fec_enet_private *fep, u64 *data)
+{
+ struct page_pool_stats stats = {};
+ struct fec_enet_priv_rx_q *rxq;
+ int i;
+
+ for (i = fep->num_rx_queues - 1; i >= 0; i--) {
+ rxq = fep->rx_queue[i];
+
+ if (!rxq->page_pool)
+ continue;
+
+ page_pool_get_stats(rxq->page_pool, &stats);
+ }
+
+ page_pool_ethtool_stats_get(data, &stats);
}
static void fec_enet_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct fec_enet_private *fep = netdev_priv(dev);
+ u64 *dst = data + FEC_STATS_SIZE / 8;
if (netif_running(dev))
fec_enet_update_ethtool_stats(dev);
memcpy(data, fep->ethtool_stats, FEC_STATS_SIZE);
+
+ fec_enet_page_pool_stats(fep, dst);
}
static void fec_enet_get_strings(struct net_device *netdev,
u32 stringset, u8 *data)
{
+ int off = ARRAY_SIZE(fec_stats);
int i;
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
memcpy(data + i * ETH_GSTRING_LEN,
fec_stats[i].name, ETH_GSTRING_LEN);
+ for (i = 0; i < ARRAY_SIZE(fec_xdp_stats); i++)
+ memcpy(data + (i + off) * ETH_GSTRING_LEN,
+ fec_xdp_stats[i].name, ETH_GSTRING_LEN);
+ off = (i + off) * ETH_GSTRING_LEN;
+ page_pool_ethtool_stats_get_strings(data + off);
+
break;
case ETH_SS_TEST:
net_selftest_get_strings(data);
@@ -2697,9 +2754,14 @@ static void fec_enet_get_strings(struct net_device *netdev,
static int fec_enet_get_sset_count(struct net_device *dev, int sset)
{
+ int count;
+
switch (sset) {
case ETH_SS_STATS:
- return ARRAY_SIZE(fec_stats);
+ count = ARRAY_SIZE(fec_stats) + ARRAY_SIZE(fec_xdp_stats);
+ count += page_pool_ethtool_stats_get_count();
+ return count;
+
case ETH_SS_TEST:
return net_selftest_get_count();
default:
@@ -2718,6 +2780,8 @@ static void fec_enet_clear_ethtool_stats(struct net_device *dev)
for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
writel(0, fep->hwp + fec_stats[i].offset);
+ for (i = 0; i < ARRAY_SIZE(fec_xdp_stats); i++)
+ fec_xdp_stats[i].count = 0;
/* Don't disable MIB statistics counters */
writel(0, fep->hwp + FEC_MIB_CTRLSTAT);
}
--
2.34.1
> +enum {
> + RX_XDP_REDIRECT = 0,
> + RX_XDP_PASS,
> + RX_XDP_DROP,
> + RX_XDP_TX,
> + RX_XDP_TX_ERRORS,
> + TX_XDP_XMIT,
> + TX_XDP_XMIT_ERRORS,
> + XDP_STATS_TOTAL,
> +};
> +
> struct fec_enet_priv_tx_q {
> struct bufdesc_prop bd;
> unsigned char *tx_bounce[TX_RING_SIZE];
> @@ -546,6 +557,7 @@ struct fec_enet_priv_rx_q {
> /* page_pool */
> struct page_pool *page_pool;
> struct xdp_rxq_info xdp_rxq;
> + u32 stats[XDP_STATS_TOTAL];
>
> /* rx queue number, in the range 0-7 */
> u8 id;
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index 3fb870340c22..89fef370bc10 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -1523,10 +1523,12 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
>
> switch (act) {
> case XDP_PASS:
> + rxq->stats[RX_XDP_PASS]++;
> ret = FEC_ENET_XDP_PASS;
> break;
>
> case XDP_REDIRECT:
> + rxq->stats[RX_XDP_REDIRECT]++;
> err = xdp_do_redirect(fep->netdev, xdp, prog);
> if (!err) {
> ret = FEC_ENET_XDP_REDIR;
> @@ -1549,6 +1551,7 @@ fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
> fallthrough; /* handle aborts by dropping packet */
>
> case XDP_DROP:
> + rxq->stats[RX_XDP_DROP]++;
> ret = FEC_ENET_XDP_CONSUMED;
> page = virt_to_head_page(xdp->data);
> page_pool_put_page(rxq->page_pool, page, sync, true);
> @@ -2657,37 +2660,91 @@ static const struct fec_stat {
> { "IEEE_rx_octets_ok", IEEE_R_OCTETS_OK },
> };
>
> -#define FEC_STATS_SIZE (ARRAY_SIZE(fec_stats) * sizeof(u64))
> +static struct fec_xdp_stat {
> + char name[ETH_GSTRING_LEN];
> + u64 count;
> +} fec_xdp_stats[XDP_STATS_TOTAL] = {
> + { "rx_xdp_redirect", 0 }, /* RX_XDP_REDIRECT = 0, */
> + { "rx_xdp_pass", 0 }, /* RX_XDP_PASS, */
> + { "rx_xdp_drop", 0 }, /* RX_XDP_DROP, */
> + { "rx_xdp_tx", 0 }, /* RX_XDP_TX, */
> + { "rx_xdp_tx_errors", 0 }, /* RX_XDP_TX_ERRORS, */
> + { "tx_xdp_xmit", 0 }, /* TX_XDP_XMIT, */
> + { "tx_xdp_xmit_errors", 0 }, /* TX_XDP_XMIT_ERRORS, */
> +};
Why do you mix the string and the count?
> +
> +#define FEC_STATS_SIZE ((ARRAY_SIZE(fec_stats) + \
> + ARRAY_SIZE(fec_xdp_stats)) * sizeof(u64))
>
> static void fec_enet_update_ethtool_stats(struct net_device *dev)
> {
> struct fec_enet_private *fep = netdev_priv(dev);
> - int i;
> + struct fec_xdp_stat xdp_stats[7];
You are allocating 7 x name[ETH_GSTRING_LEN], here, which you are not
going to use. All you really need is u64 xdp_stats[XDP_STATS_TOTAL]
> + int off = ARRAY_SIZE(fec_stats);
> + struct fec_enet_priv_rx_q *rxq;
> + int i, j;
>
> for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
> fep->ethtool_stats[i] = readl(fep->hwp + fec_stats[i].offset);
> +
> + for (i = fep->num_rx_queues - 1; i >= 0; i--) {
> + rxq = fep->rx_queue[i];
> + for (j = 0; j < XDP_STATS_TOTAL; j++)
> + xdp_stats[j].count += rxq->stats[j];
> + }
> +
> + for (i = 0; i < XDP_STATS_TOTAL; i++)
> + fep->ethtool_stats[i + off] = xdp_stats[i].count;
It would be more logical to use j here.
It is also pretty messy. For fec_enet_get_strings() and
fec_enet_get_sset_count() you deal with the three sets of stats
individually. But here you combine normal stats and xdp stats in
one. It will probably come out cleaner if you keep it all separate.
> static void fec_enet_get_ethtool_stats(struct net_device *dev,
> struct ethtool_stats *stats, u64 *data)
> {
> struct fec_enet_private *fep = netdev_priv(dev);
> + u64 *dst = data + FEC_STATS_SIZE / 8;
Why 8? sizeof(u64) would be a bit clearer. Or just use
ARRAY_SIZE(fec_stats) which is what you are actually wanting.
>
> if (netif_running(dev))
> fec_enet_update_ethtool_stats(dev);
>
> memcpy(data, fep->ethtool_stats, FEC_STATS_SIZE);
> +
> + fec_enet_page_pool_stats(fep, dst);
> }
>
> static void fec_enet_get_strings(struct net_device *netdev,
> u32 stringset, u8 *data)
> {
> + int off = ARRAY_SIZE(fec_stats);
> int i;
> switch (stringset) {
> case ETH_SS_STATS:
> for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
> memcpy(data + i * ETH_GSTRING_LEN,
> fec_stats[i].name, ETH_GSTRING_LEN);
> + for (i = 0; i < ARRAY_SIZE(fec_xdp_stats); i++)
> + memcpy(data + (i + off) * ETH_GSTRING_LEN,
> + fec_xdp_stats[i].name, ETH_GSTRING_LEN);
> + off = (i + off) * ETH_GSTRING_LEN;
> + page_pool_ethtool_stats_get_strings(data + off);
Probably simpler is:
for (i = 0; i < ARRAY_SIZE(fec_stats); i++) {
memcpy(data, fec_stats[i].name, ETH_GSTRING_LEN);
data += ETH_GSTRING_LEN;
}
for (i = 0; i < ARRAY_SIZE(fec_xdp_stats); i++) {
memcpy(data, fec_xdp_stats[i].name, ETH_GSTRING_LEN);
data += ETH_GSTRING_LEN;
}
page_pool_ethtool_stats_get_strings(data);
Andrew
> -----Original Message-----
> From: Andrew Lunn <[email protected]>
> Sent: Monday, November 7, 2022 8:26 PM
> To: Shenwei Wang <[email protected]>
> >
> > -#define FEC_STATS_SIZE (ARRAY_SIZE(fec_stats) * sizeof(u64))
> > +static struct fec_xdp_stat {
> > + char name[ETH_GSTRING_LEN];
> > + u64 count;
> > +} fec_xdp_stats[XDP_STATS_TOTAL] = {
> > + { "rx_xdp_redirect", 0 }, /* RX_XDP_REDIRECT = 0, */
> > + { "rx_xdp_pass", 0 }, /* RX_XDP_PASS, */
> > + { "rx_xdp_drop", 0 }, /* RX_XDP_DROP, */
> > + { "rx_xdp_tx", 0 }, /* RX_XDP_TX, */
> > + { "rx_xdp_tx_errors", 0 }, /* RX_XDP_TX_ERRORS, */
> > + { "tx_xdp_xmit", 0 }, /* TX_XDP_XMIT, */
> > + { "tx_xdp_xmit_errors", 0 }, /* TX_XDP_XMIT_ERRORS, */
> > +};
>
> Why do you mix the string and the count?
It was following the original coding style but it is not good. Will fix it in the next version.
>
> > +
> > +#define FEC_STATS_SIZE ((ARRAY_SIZE(fec_stats) + \
> > + ARRAY_SIZE(fec_xdp_stats)) * sizeof(u64))
> >
> > static void fec_enet_update_ethtool_stats(struct net_device *dev) {
> > struct fec_enet_private *fep = netdev_priv(dev);
> > - int i;
> > + struct fec_xdp_stat xdp_stats[7];
>
> You are allocating 7 x name[ETH_GSTRING_LEN], here, which you are not going
> to use. All you really need is u64 xdp_stats[XDP_STATS_TOTAL]
>
> > + int off = ARRAY_SIZE(fec_stats);
> > + struct fec_enet_priv_rx_q *rxq;
> > + int i, j;
> >
> > for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
> > fep->ethtool_stats[i] = readl(fep->hwp +
> > fec_stats[i].offset);
> > +
> > + for (i = fep->num_rx_queues - 1; i >= 0; i--) {
> > + rxq = fep->rx_queue[i];
> > + for (j = 0; j < XDP_STATS_TOTAL; j++)
> > + xdp_stats[j].count += rxq->stats[j];
> > + }
> > +
> > + for (i = 0; i < XDP_STATS_TOTAL; i++)
> > + fep->ethtool_stats[i + off] = xdp_stats[i].count;
>
> It would be more logical to use j here.
>
> It is also pretty messy. For fec_enet_get_strings() and
> fec_enet_get_sset_count() you deal with the three sets of stats individually. But
> here you combine normal stats and xdp stats in one. It will probably come out
> cleaner if you keep it all separate.
After you said so, I feel it messy too. Will clean up the codes.
>
> > static void fec_enet_get_ethtool_stats(struct net_device *dev,
> > struct ethtool_stats *stats, u64
> > *data) {
> > struct fec_enet_private *fep = netdev_priv(dev);
> > + u64 *dst = data + FEC_STATS_SIZE / 8;
>
> Why 8? sizeof(u64) would be a bit clearer. Or just use
> ARRAY_SIZE(fec_stats) which is what you are actually wanting.
>
> >
> > if (netif_running(dev))
> > fec_enet_update_ethtool_stats(dev);
> >
> > memcpy(data, fep->ethtool_stats, FEC_STATS_SIZE);
> > +
> > + fec_enet_page_pool_stats(fep, dst);
> > }
> >
> > static void fec_enet_get_strings(struct net_device *netdev,
> > u32 stringset, u8 *data)
> > {
> > + int off = ARRAY_SIZE(fec_stats);
> > int i;
> > switch (stringset) {
> > case ETH_SS_STATS:
> > for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
> > memcpy(data + i * ETH_GSTRING_LEN,
> > fec_stats[i].name, ETH_GSTRING_LEN);
> > + for (i = 0; i < ARRAY_SIZE(fec_xdp_stats); i++)
> > + memcpy(data + (i + off) * ETH_GSTRING_LEN,
> > + fec_xdp_stats[i].name, ETH_GSTRING_LEN);
> > + off = (i + off) * ETH_GSTRING_LEN;
> > + page_pool_ethtool_stats_get_strings(data + off);
>
> Probably simpler is:
>
> for (i = 0; i < ARRAY_SIZE(fec_stats); i++) {
> memcpy(data, fec_stats[i].name, ETH_GSTRING_LEN);
> data += ETH_GSTRING_LEN;
> }
> for (i = 0; i < ARRAY_SIZE(fec_xdp_stats); i++) {
> memcpy(data, fec_xdp_stats[i].name, ETH_GSTRING_LEN);
> data += ETH_GSTRING_LEN;
> }
> page_pool_ethtool_stats_get_strings(data);
Yes, this looks better. ????
Thanks,
Shenwei
>
> Andrew