2022-06-09 22:10:10

by Haiyang Zhang

[permalink] [raw]
Subject: [PATCH net-next,2/2] net: mana: Add support of XDP_REDIRECT action

Support XDP_REDIRECT action

Signed-off-by: Haiyang Zhang <[email protected]>
---
drivers/net/ethernet/microsoft/mana/mana.h | 6 ++
.../net/ethernet/microsoft/mana/mana_bpf.c | 64 +++++++++++++++++++
drivers/net/ethernet/microsoft/mana/mana_en.c | 13 +++-
.../ethernet/microsoft/mana/mana_ethtool.c | 12 +++-
4 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
index f198b34c232f..d58be64374c8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana.h
+++ b/drivers/net/ethernet/microsoft/mana/mana.h
@@ -53,12 +53,14 @@ struct mana_stats_rx {
u64 bytes;
u64 xdp_drop;
u64 xdp_tx;
+ u64 xdp_redirect;
struct u64_stats_sync syncp;
};

struct mana_stats_tx {
u64 packets;
u64 bytes;
+ u64 xdp_xmit;
struct u64_stats_sync syncp;
};

@@ -311,6 +313,8 @@ struct mana_rxq {
struct bpf_prog __rcu *bpf_prog;
struct xdp_rxq_info xdp_rxq;
struct page *xdp_save_page;
+ bool xdp_flush;
+ int xdp_rc; /* XDP redirect return code */

/* MUST BE THE LAST MEMBER:
* Each receive buffer has an associated mana_recv_buf_oob.
@@ -396,6 +400,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming);
void mana_remove(struct gdma_dev *gd, bool suspending);

void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
+int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
+ u32 flags);
u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct xdp_buff *xdp, void *buf_va, uint pkt_len);
struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
index 1d2f948b5c00..421fd39ff3a8 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
@@ -32,9 +32,55 @@ void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev)
ndev->stats.tx_dropped++;
}

+static int mana_xdp_xmit_fm(struct net_device *ndev, struct xdp_frame *frame,
+ u16 q_idx)
+{
+ struct sk_buff *skb;
+
+ skb = xdp_build_skb_from_frame(frame, ndev);
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ skb_set_queue_mapping(skb, q_idx);
+
+ mana_xdp_tx(skb, ndev);
+
+ return 0;
+}
+
+int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
+ u32 flags)
+{
+ struct mana_port_context *apc = netdev_priv(ndev);
+ struct mana_stats_tx *tx_stats;
+ int i, count = 0;
+ u16 q_idx;
+
+ if (unlikely(!apc->port_is_up))
+ return 0;
+
+ q_idx = smp_processor_id() % ndev->real_num_tx_queues;
+
+ for (i = 0; i < n; i++) {
+ if (mana_xdp_xmit_fm(ndev, frames[i], q_idx))
+ break;
+
+ count++;
+ }
+
+ tx_stats = &apc->tx_qp[q_idx].txq.stats;
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->xdp_xmit += count;
+ u64_stats_update_end(&tx_stats->syncp);
+
+ return count;
+}
+
u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
struct xdp_buff *xdp, void *buf_va, uint pkt_len)
{
+ struct mana_stats_rx *rx_stats;
struct bpf_prog *prog;
u32 act = XDP_PASS;

@@ -49,12 +95,30 @@ u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,

act = bpf_prog_run_xdp(prog, xdp);

+ rx_stats = &rxq->stats;
+
switch (act) {
case XDP_PASS:
case XDP_TX:
case XDP_DROP:
break;

+ case XDP_REDIRECT:
+ rxq->xdp_rc = xdp_do_redirect(ndev, xdp, prog);
+ if (!rxq->xdp_rc) {
+ rxq->xdp_flush = true;
+
+ u64_stats_update_begin(&rx_stats->syncp);
+ rx_stats->packets++;
+ rx_stats->bytes += pkt_len;
+ rx_stats->xdp_redirect++;
+ u64_stats_update_end(&rx_stats->syncp);
+
+ break;
+ }
+
+ fallthrough;
+
case XDP_ABORTED:
trace_xdp_exception(ndev, prog, act);
break;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 3ef09e0cdbaa..9259a74eca40 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -6,6 +6,7 @@
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
+#include <linux/filter.h>
#include <linux/mm.h>

#include <net/checksum.h>
@@ -382,6 +383,7 @@ static const struct net_device_ops mana_devops = {
.ndo_validate_addr = eth_validate_addr,
.ndo_get_stats64 = mana_get_stats64,
.ndo_bpf = mana_bpf,
+ .ndo_xdp_xmit = mana_xdp_xmit,
};

static void mana_cleanup_port_context(struct mana_port_context *apc)
@@ -1120,6 +1122,9 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,

act = mana_run_xdp(ndev, rxq, &xdp, buf_va, pkt_len);

+ if (act == XDP_REDIRECT && !rxq->xdp_rc)
+ return;
+
if (act != XDP_PASS && act != XDP_TX)
goto drop_xdp;

@@ -1275,11 +1280,14 @@ static void mana_process_rx_cqe(struct mana_rxq *rxq, struct mana_cq *cq,
static void mana_poll_rx_cq(struct mana_cq *cq)
{
struct gdma_comp *comp = cq->gdma_comp_buf;
+ struct mana_rxq *rxq = cq->rxq;
int comp_read, i;

comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);

+ rxq->xdp_flush = false;
+
for (i = 0; i < comp_read; i++) {
if (WARN_ON_ONCE(comp[i].is_sq))
return;
@@ -1288,8 +1296,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
if (WARN_ON_ONCE(comp[i].wq_num != cq->rxq->gdma_id))
return;

- mana_process_rx_cqe(cq->rxq, cq, &comp[i]);
+ mana_process_rx_cqe(rxq, cq, &comp[i]);
}
+
+ if (rxq->xdp_flush)
+ xdp_do_flush();
}

static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index e13f2453eabb..c530db76880f 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -23,7 +23,7 @@ static int mana_get_sset_count(struct net_device *ndev, int stringset)
if (stringset != ETH_SS_STATS)
return -EINVAL;

- return ARRAY_SIZE(mana_eth_stats) + num_queues * 6;
+ return ARRAY_SIZE(mana_eth_stats) + num_queues * 8;
}

static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
@@ -50,6 +50,8 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
sprintf(p, "rx_%d_xdp_tx", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_%d_xdp_redirect", i);
+ p += ETH_GSTRING_LEN;
}

for (i = 0; i < num_queues; i++) {
@@ -57,6 +59,8 @@ static void mana_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
sprintf(p, "tx_%d_bytes", i);
p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_%d_xdp_xmit", i);
+ p += ETH_GSTRING_LEN;
}
}

@@ -70,6 +74,8 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
struct mana_stats_tx *tx_stats;
unsigned int start;
u64 packets, bytes;
+ u64 xdp_redirect;
+ u64 xdp_xmit;
u64 xdp_drop;
u64 xdp_tx;
int q, i = 0;
@@ -89,12 +95,14 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
bytes = rx_stats->bytes;
xdp_drop = rx_stats->xdp_drop;
xdp_tx = rx_stats->xdp_tx;
+ xdp_redirect = rx_stats->xdp_redirect;
} while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));

data[i++] = packets;
data[i++] = bytes;
data[i++] = xdp_drop;
data[i++] = xdp_tx;
+ data[i++] = xdp_redirect;
}

for (q = 0; q < num_queues; q++) {
@@ -104,10 +112,12 @@ static void mana_get_ethtool_stats(struct net_device *ndev,
start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
packets = tx_stats->packets;
bytes = tx_stats->bytes;
+ xdp_xmit = tx_stats->xdp_xmit;
} while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));

data[i++] = packets;
data[i++] = bytes;
+ data[i++] = xdp_xmit;
}
}

--
2.25.1


2022-06-14 08:37:27

by Paolo Abeni

[permalink] [raw]
Subject: Re: [PATCH net-next,2/2] net: mana: Add support of XDP_REDIRECT action

On Thu, 2022-06-09 at 14:57 -0700, Haiyang Zhang wrote:
> Support XDP_REDIRECT action
>
> Signed-off-by: Haiyang Zhang <[email protected]>

You really should expand the changelog a little bit...

> ---
> drivers/net/ethernet/microsoft/mana/mana.h | 6 ++
> .../net/ethernet/microsoft/mana/mana_bpf.c | 64 +++++++++++++++++++
> drivers/net/ethernet/microsoft/mana/mana_en.c | 13 +++-
> .../ethernet/microsoft/mana/mana_ethtool.c | 12 +++-
> 4 files changed, 93 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/ethernet/microsoft/mana/mana.h b/drivers/net/ethernet/microsoft/mana/mana.h
> index f198b34c232f..d58be64374c8 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana.h
> +++ b/drivers/net/ethernet/microsoft/mana/mana.h
> @@ -53,12 +53,14 @@ struct mana_stats_rx {
> u64 bytes;
> u64 xdp_drop;
> u64 xdp_tx;
> + u64 xdp_redirect;
> struct u64_stats_sync syncp;
> };
>
> struct mana_stats_tx {
> u64 packets;
> u64 bytes;
> + u64 xdp_xmit;
> struct u64_stats_sync syncp;
> };
>
> @@ -311,6 +313,8 @@ struct mana_rxq {
> struct bpf_prog __rcu *bpf_prog;
> struct xdp_rxq_info xdp_rxq;
> struct page *xdp_save_page;
> + bool xdp_flush;
> + int xdp_rc; /* XDP redirect return code */
>
> /* MUST BE THE LAST MEMBER:
> * Each receive buffer has an associated mana_recv_buf_oob.
> @@ -396,6 +400,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming);
> void mana_remove(struct gdma_dev *gd, bool suspending);
>
> void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
> +int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
> + u32 flags);
> u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
> struct xdp_buff *xdp, void *buf_va, uint pkt_len);
> struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
> diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
> index 1d2f948b5c00..421fd39ff3a8 100644
> --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c
> +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
> @@ -32,9 +32,55 @@ void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev)
> ndev->stats.tx_dropped++;
> }
>
> +static int mana_xdp_xmit_fm(struct net_device *ndev, struct xdp_frame *frame,
> + u16 q_idx)
> +{
> + struct sk_buff *skb;
> +
> + skb = xdp_build_skb_from_frame(frame, ndev);
> + if (unlikely(!skb))
> + return -ENOMEM;

... especially considering this implementation choice: converting the
xdp frame to an skb in very bad for performances.

You could implement a mana xmit helper working on top of the xdp_frame
struct, and use it here.

Additionally you could consider revisiting the XDP_TX path: currently
it builds a skb from the xdp_buff to xmit it locally, while it could
resort to a much cheaper xdp_buff to xdp_frame conversion.

The traditional way to handle all the above is keep all the
XDP_TX/XDP_REDIRECT bits in the device-specific _run_xdp helper, that
will additional avoid several conditionals in mana_rx_skb().

The above refactoring would probably require a bit of work, but it will
pay-off for sure and will become more costily with time. Your choice ;)

But at the very least we need a better changelog here.

Cheers,

Paolo

2022-06-14 20:31:47

by Haiyang Zhang

[permalink] [raw]
Subject: RE: [PATCH net-next,2/2] net: mana: Add support of XDP_REDIRECT action



> -----Original Message-----
> From: Paolo Abeni <[email protected]>
> Sent: Tuesday, June 14, 2022 3:56 AM
> To: Haiyang Zhang <[email protected]>; [email protected];
> [email protected]
> Cc: Dexuan Cui <[email protected]>; KY Srinivasan <[email protected]>;
> Stephen Hemminger <[email protected]>; Paul Rosswurm
> <[email protected]>; Shachar Raindel <[email protected]>;
> [email protected]; vkuznets <[email protected]>; [email protected];
> [email protected]
> Subject: Re: [PATCH net-next,2/2] net: mana: Add support of XDP_REDIRECT
> action
>
> On Thu, 2022-06-09 at 14:57 -0700, Haiyang Zhang wrote:
> > Support XDP_REDIRECT action
> >
> > Signed-off-by: Haiyang Zhang <[email protected]>
>
> You really should expand the changelog a little bit...
>
> > ---
> > drivers/net/ethernet/microsoft/mana/mana.h | 6 ++
> > .../net/ethernet/microsoft/mana/mana_bpf.c | 64
> +++++++++++++++++++
> > drivers/net/ethernet/microsoft/mana/mana_en.c | 13 +++-
> > .../ethernet/microsoft/mana/mana_ethtool.c | 12 +++-
> > 4 files changed, 93 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana.h
> b/drivers/net/ethernet/microsoft/mana/mana.h
> > index f198b34c232f..d58be64374c8 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana.h
> > +++ b/drivers/net/ethernet/microsoft/mana/mana.h
> > @@ -53,12 +53,14 @@ struct mana_stats_rx {
> > u64 bytes;
> > u64 xdp_drop;
> > u64 xdp_tx;
> > + u64 xdp_redirect;
> > struct u64_stats_sync syncp;
> > };
> >
> > struct mana_stats_tx {
> > u64 packets;
> > u64 bytes;
> > + u64 xdp_xmit;
> > struct u64_stats_sync syncp;
> > };
> >
> > @@ -311,6 +313,8 @@ struct mana_rxq {
> > struct bpf_prog __rcu *bpf_prog;
> > struct xdp_rxq_info xdp_rxq;
> > struct page *xdp_save_page;
> > + bool xdp_flush;
> > + int xdp_rc; /* XDP redirect return code */
> >
> > /* MUST BE THE LAST MEMBER:
> > * Each receive buffer has an associated mana_recv_buf_oob.
> > @@ -396,6 +400,8 @@ int mana_probe(struct gdma_dev *gd, bool
> resuming);
> > void mana_remove(struct gdma_dev *gd, bool suspending);
> >
> > void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
> > +int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame
> **frames,
> > + u32 flags);
> > u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
> > struct xdp_buff *xdp, void *buf_va, uint pkt_len);
> > struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
> > diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c
> b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
> > index 1d2f948b5c00..421fd39ff3a8 100644
> > --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c
> > +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c
> > @@ -32,9 +32,55 @@ void mana_xdp_tx(struct sk_buff *skb, struct
> net_device *ndev)
> > ndev->stats.tx_dropped++;
> > }
> >
> > +static int mana_xdp_xmit_fm(struct net_device *ndev, struct xdp_frame
> *frame,
> > + u16 q_idx)
> > +{
> > + struct sk_buff *skb;
> > +
> > + skb = xdp_build_skb_from_frame(frame, ndev);
> > + if (unlikely(!skb))
> > + return -ENOMEM;
>
> ... especially considering this implementation choice: converting the
> xdp frame to an skb in very bad for performances.
>
> You could implement a mana xmit helper working on top of the xdp_frame
> struct, and use it here.
>
> Additionally you could consider revisiting the XDP_TX path: currently
> it builds a skb from the xdp_buff to xmit it locally, while it could
> resort to a much cheaper xdp_buff to xdp_frame conversion.
>
> The traditional way to handle all the above is keep all the
> XDP_TX/XDP_REDIRECT bits in the device-specific _run_xdp helper, that
> will additional avoid several conditionals in mana_rx_skb().
>
> The above refactoring would probably require a bit of work, but it will
> pay-off for sure and will become more costily with time. Your choice ;)
>
> But at the very least we need a better changelog here.

Hi Paolo,

Thank you for the review.
Sure, I will put more details into the change log.

Other suggestions on removing the SKB conversion, etc., I will work on
them later.

Thanks,
- Haiyang