2022-03-17 20:08:11

by Horatiu Vultur

[permalink] [raw]
Subject: [PATCH net-next 0/5] net: lan966x: Add support for FDMA

Currently when injecting or extracting a frame from CPU, the frame
is given to the HW each word at a time. There is another way to
inject/extract frames from CPU using FDMA(Frame Direct Memory Access).
In this way the entire frame is given to the HW. This improves both
RX and TX bitrate.

Horatiu Vultur (5):
dt-bindings: net: lan966x: Extend with FDMA interrupt
net: lan966x: Add registers that are used for FDMA.
net: lan966x: Expose functions that are needed by FDMA
net: lan966x: Add FDMA functionality
net: lan96x: Update FDMA to change MTU.

.../net/microchip,lan966x-switch.yaml | 2 +
.../net/ethernet/microchip/lan966x/Makefile | 2 +-
.../ethernet/microchip/lan966x/lan966x_fdma.c | 772 ++++++++++++++++++
.../ethernet/microchip/lan966x/lan966x_main.c | 44 +-
.../ethernet/microchip/lan966x/lan966x_main.h | 120 +++
.../ethernet/microchip/lan966x/lan966x_port.c | 3 +
.../ethernet/microchip/lan966x/lan966x_regs.h | 106 +++
7 files changed, 1037 insertions(+), 12 deletions(-)
create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c

--
2.33.0


2022-03-17 20:28:20

by Horatiu Vultur

[permalink] [raw]
Subject: [PATCH net-next 2/5] net: lan966x: Add registers that are used for FDMA.

Add the registers that are used to configure the FDMA.

Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_main.c | 1 +
.../ethernet/microchip/lan966x/lan966x_regs.h | 106 ++++++++++++++++++
2 files changed, 107 insertions(+)

diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index e1bcb28039dc..4240db708886 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -43,6 +43,7 @@ struct lan966x_main_io_resource {

static const struct lan966x_main_io_resource lan966x_main_iomap[] = {
{ TARGET_CPU, 0xc0000, 0 }, /* 0xe00c0000 */
+ { TARGET_FDMA, 0xc0400, 0 }, /* 0xe00c0400 */
{ TARGET_ORG, 0, 1 }, /* 0xe2000000 */
{ TARGET_GCB, 0x4000, 1 }, /* 0xe2004000 */
{ TARGET_QS, 0x8000, 1 }, /* 0xe2008000 */
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
index 0c0b3e173d53..2f59285bef29 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h
@@ -17,6 +17,7 @@ enum lan966x_target {
TARGET_CHIP_TOP = 5,
TARGET_CPU = 6,
TARGET_DEV = 13,
+ TARGET_FDMA = 21,
TARGET_GCB = 27,
TARGET_ORG = 36,
TARGET_PTP = 41,
@@ -578,6 +579,111 @@ enum lan966x_target {
#define DEV_PCS1G_STICKY_LINK_DOWN_STICKY_GET(x)\
FIELD_GET(DEV_PCS1G_STICKY_LINK_DOWN_STICKY, x)

+/* FDMA:FDMA:FDMA_CH_ACTIVATE */
+#define FDMA_CH_ACTIVATE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 0, 0, 1, 4)
+
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE GENMASK(7, 0)
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE_SET(x)\
+ FIELD_PREP(FDMA_CH_ACTIVATE_CH_ACTIVATE, x)
+#define FDMA_CH_ACTIVATE_CH_ACTIVATE_GET(x)\
+ FIELD_GET(FDMA_CH_ACTIVATE_CH_ACTIVATE, x)
+
+/* FDMA:FDMA:FDMA_CH_RELOAD */
+#define FDMA_CH_RELOAD __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 4, 0, 1, 4)
+
+#define FDMA_CH_RELOAD_CH_RELOAD GENMASK(7, 0)
+#define FDMA_CH_RELOAD_CH_RELOAD_SET(x)\
+ FIELD_PREP(FDMA_CH_RELOAD_CH_RELOAD, x)
+#define FDMA_CH_RELOAD_CH_RELOAD_GET(x)\
+ FIELD_GET(FDMA_CH_RELOAD_CH_RELOAD, x)
+
+/* FDMA:FDMA:FDMA_CH_DISABLE */
+#define FDMA_CH_DISABLE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 8, 0, 1, 4)
+
+#define FDMA_CH_DISABLE_CH_DISABLE GENMASK(7, 0)
+#define FDMA_CH_DISABLE_CH_DISABLE_SET(x)\
+ FIELD_PREP(FDMA_CH_DISABLE_CH_DISABLE, x)
+#define FDMA_CH_DISABLE_CH_DISABLE_GET(x)\
+ FIELD_GET(FDMA_CH_DISABLE_CH_DISABLE, x)
+
+/* FDMA:FDMA:FDMA_CH_DB_DISCARD */
+#define FDMA_CH_DB_DISCARD __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 16, 0, 1, 4)
+
+#define FDMA_CH_DB_DISCARD_DB_DISCARD GENMASK(7, 0)
+#define FDMA_CH_DB_DISCARD_DB_DISCARD_SET(x)\
+ FIELD_PREP(FDMA_CH_DB_DISCARD_DB_DISCARD, x)
+#define FDMA_CH_DB_DISCARD_DB_DISCARD_GET(x)\
+ FIELD_GET(FDMA_CH_DB_DISCARD_DB_DISCARD, x)
+
+/* FDMA:FDMA:FDMA_DCB_LLP */
+#define FDMA_DCB_LLP(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 52, r, 8, 4)
+
+/* FDMA:FDMA:FDMA_DCB_LLP1 */
+#define FDMA_DCB_LLP1(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 84, r, 8, 4)
+
+/* FDMA:FDMA:FDMA_CH_ACTIVE */
+#define FDMA_CH_ACTIVE __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 180, 0, 1, 4)
+
+/* FDMA:FDMA:FDMA_CH_CFG */
+#define FDMA_CH_CFG(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 224, r, 8, 4)
+
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY BIT(4)
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x)
+#define FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_INTR_DB_EOF_ONLY, x)
+
+#define FDMA_CH_CFG_CH_INJ_PORT BIT(3)
+#define FDMA_CH_CFG_CH_INJ_PORT_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_INJ_PORT, x)
+#define FDMA_CH_CFG_CH_INJ_PORT_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_INJ_PORT, x)
+
+#define FDMA_CH_CFG_CH_DCB_DB_CNT GENMASK(2, 1)
+#define FDMA_CH_CFG_CH_DCB_DB_CNT_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_DCB_DB_CNT, x)
+#define FDMA_CH_CFG_CH_DCB_DB_CNT_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_DCB_DB_CNT, x)
+
+#define FDMA_CH_CFG_CH_MEM BIT(0)
+#define FDMA_CH_CFG_CH_MEM_SET(x)\
+ FIELD_PREP(FDMA_CH_CFG_CH_MEM, x)
+#define FDMA_CH_CFG_CH_MEM_GET(x)\
+ FIELD_GET(FDMA_CH_CFG_CH_MEM, x)
+
+/* FDMA:FDMA:FDMA_PORT_CTRL */
+#define FDMA_PORT_CTRL(r) __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 376, r, 2, 4)
+
+#define FDMA_PORT_CTRL_INJ_STOP BIT(4)
+#define FDMA_PORT_CTRL_INJ_STOP_SET(x)\
+ FIELD_PREP(FDMA_PORT_CTRL_INJ_STOP, x)
+#define FDMA_PORT_CTRL_INJ_STOP_GET(x)\
+ FIELD_GET(FDMA_PORT_CTRL_INJ_STOP, x)
+
+#define FDMA_PORT_CTRL_XTR_STOP BIT(2)
+#define FDMA_PORT_CTRL_XTR_STOP_SET(x)\
+ FIELD_PREP(FDMA_PORT_CTRL_XTR_STOP, x)
+#define FDMA_PORT_CTRL_XTR_STOP_GET(x)\
+ FIELD_GET(FDMA_PORT_CTRL_XTR_STOP, x)
+
+/* FDMA:FDMA:FDMA_INTR_DB */
+#define FDMA_INTR_DB __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 392, 0, 1, 4)
+
+/* FDMA:FDMA:FDMA_INTR_DB_ENA */
+#define FDMA_INTR_DB_ENA __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 396, 0, 1, 4)
+
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA GENMASK(7, 0)
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA_SET(x)\
+ FIELD_PREP(FDMA_INTR_DB_ENA_INTR_DB_ENA, x)
+#define FDMA_INTR_DB_ENA_INTR_DB_ENA_GET(x)\
+ FIELD_GET(FDMA_INTR_DB_ENA_INTR_DB_ENA, x)
+
+/* FDMA:FDMA:FDMA_INTR_ERR */
+#define FDMA_INTR_ERR __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 400, 0, 1, 4)
+
+/* FDMA:FDMA:FDMA_ERRORS */
+#define FDMA_ERRORS __REG(TARGET_FDMA, 0, 1, 8, 0, 1, 428, 412, 0, 1, 4)
+
/* PTP:PTP_CFG:PTP_DOM_CFG */
#define PTP_DOM_CFG __REG(TARGET_PTP, 0, 1, 512, 0, 1, 16, 12, 0, 1, 4)

--
2.33.0

2022-03-17 20:38:55

by Horatiu Vultur

[permalink] [raw]
Subject: [PATCH net-next 5/5] net: lan96x: Update FDMA to change MTU.

When changing the MTU, it is required to change also the size of the
DBs. In case those frames will arrive to CPU.

Signed-off-by: Horatiu Vultur <[email protected]>
---
.../ethernet/microchip/lan966x/lan966x_fdma.c | 95 +++++++++++++++++++
.../ethernet/microchip/lan966x/lan966x_main.c | 2 +-
.../ethernet/microchip/lan966x/lan966x_main.h | 1 +
3 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
index c23e521a1f8b..a33329cc4834 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -633,6 +633,101 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
return err;
}

+static int lan966x_fdma_get_max_mtu(struct lan966x *lan966x)
+{
+ int max_mtu = 0;
+ int i;
+
+ for (i = 0; i < lan966x->num_phys_ports; ++i) {
+ int mtu;
+
+ if (!lan966x->ports[i])
+ continue;
+
+ mtu = lan966x->ports[i]->dev->mtu;
+ if (mtu > max_mtu)
+ max_mtu = mtu;
+ }
+
+ return max_mtu;
+}
+
+static int lan966x_qsys_sw_status(struct lan966x *lan966x)
+{
+ return lan_rd(lan966x, QSYS_SW_STATUS(CPU_PORT));
+}
+
+static void lan966x_fdma_reload(struct lan966x *lan966x, int new_mtu)
+{
+ void *rx_dcb, *tx_dcb, *tx_dcb_buf;
+ dma_addr_t rx_dma, tx_dma;
+ unsigned long flags;
+ u32 size;
+
+ /* Store these for later to free them */
+ rx_dma = lan966x->rx.dma;
+ tx_dma = lan966x->tx.dma;
+ rx_dcb = lan966x->rx.dcbs;
+ tx_dcb = lan966x->tx.dcbs;
+ tx_dcb_buf = lan966x->tx.dcbs_buf;
+
+ lan966x_fdma_rx_disable(&lan966x->rx);
+ lan966x_fdma_rx_free_skbs(&lan966x->rx);
+ lan966x->rx.page_order = round_up(new_mtu, PAGE_SIZE) / PAGE_SIZE - 1;
+ lan966x_fdma_rx_alloc(&lan966x->rx);
+ lan966x_fdma_rx_start(&lan966x->rx);
+
+ spin_lock_irqsave(&lan966x->tx_lock, flags);
+ lan966x_fdma_tx_disable(&lan966x->tx);
+ lan966x_fdma_tx_alloc(&lan966x->tx);
+ spin_unlock_irqrestore(&lan966x->tx_lock, flags);
+
+ /* Now it is possible to clean */
+ size = sizeof(struct lan966x_tx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ dma_free_coherent(lan966x->dev, size, tx_dcb, tx_dma);
+
+ kfree(tx_dcb_buf);
+
+ size = sizeof(struct lan966x_rx_dcb) * FDMA_DCB_MAX;
+ size = ALIGN(size, PAGE_SIZE);
+ dma_free_coherent(lan966x->dev, size, rx_dcb, rx_dma);
+}
+
+int lan966x_fdma_change_mtu(struct lan966x *lan966x)
+{
+ int max_mtu;
+ u32 val;
+
+ max_mtu = lan966x_fdma_get_max_mtu(lan966x);
+ if (round_up(max_mtu, PAGE_SIZE) / PAGE_SIZE - 1 ==
+ lan966x->rx.page_order)
+ return 0;
+
+ /* Disable the CPU port */
+ lan_rmw(QSYS_SW_PORT_MODE_PORT_ENA_SET(0),
+ QSYS_SW_PORT_MODE_PORT_ENA,
+ lan966x, QSYS_SW_PORT_MODE(CPU_PORT));
+
+ /* Flush the CPU queues */
+ readx_poll_timeout(lan966x_qsys_sw_status, lan966x,
+ val, !(QSYS_SW_STATUS_EQ_AVAIL_GET(val)),
+ READL_SLEEP_US, READL_TIMEOUT_US);
+
+ /* Add a sleep in case there are frames between the queues and the CPU
+ * port
+ */
+ usleep_range(1000, 2000);
+
+ lan966x_fdma_reload(lan966x, max_mtu);
+
+ /* Enable back the CPU port */
+ lan_rmw(QSYS_SW_PORT_MODE_PORT_ENA_SET(1),
+ QSYS_SW_PORT_MODE_PORT_ENA,
+ lan966x, QSYS_SW_PORT_MODE(CPU_PORT));
+ return 0;
+}
+
void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev)
{
if (lan966x->fdma_ndev)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index 6cb9fffc3058..a78fee5471e7 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -359,7 +359,7 @@ static int lan966x_port_change_mtu(struct net_device *dev, int new_mtu)
lan966x, DEV_MAC_MAXLEN_CFG(port->chip_port));
dev->mtu = new_mtu;

- return 0;
+ return !lan966x->fdma ? 0 : lan966x_fdma_change_mtu(lan966x);
}

static int lan966x_mc_unsync(struct net_device *dev, const unsigned char *addr)
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
index bfa7feea2b56..fa4016f2b5d4 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h
@@ -397,6 +397,7 @@ void lan966x_ptp_txtstamp_release(struct lan966x_port *port,
irqreturn_t lan966x_ptp_irq_handler(int irq, void *args);

int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev);
+int lan966x_fdma_change_mtu(struct lan966x *lan966x);
void lan966x_fdma_netdev_init(struct lan966x *lan966x, struct net_device *dev);
void lan966x_fdma_netdev_deinit(struct lan966x *lan966x, struct net_device *dev);
void lan966x_fdma_init(struct lan966x *lan966x);
--
2.33.0

2022-03-18 14:30:26

by Michael Walle

[permalink] [raw]
Subject: Re: [PATCH net-next 0/5] net: lan966x: Add support for FDMA

Am 2022-03-18 13:10, schrieb Horatiu Vultur:
> The 03/18/2022 12:07, Michael Walle wrote:
>> > Currently when injecting or extracting a frame from CPU, the frame
>> > is given to the HW each word at a time. There is another way to
>> > inject/extract frames from CPU using FDMA(Frame Direct Memory Access).
>> > In this way the entire frame is given to the HW. This improves both
>> > RX and TX bitrate.
>>
>> I wanted to test this. ping and such works fine and I'm also
>> seeing fdma interrupts.
>
> Thanks for testing this also on your board.
>
>> But as soon as I try iperf3 I get a skb_panic
>> (due to frame size?). Hope that splash below helps.
>
> I have not seen this issue. But it looks like it is a problem that
> there
> is no more space to add the FCS.
> Can you tell me how you run iperf3 so I can also try it?

oh, I forgot to include the commandline.

# on the remote computer
$ iperf3 --version
iperf 3.6 (cJSON 1.5.2)
Linux eddie01 4.19.0-18-686-pae #1 SMP Debian 4.19.208-1 (2021-09-29)
i686
Optional features available: CPU affinity setting, IPv6 flow label,
SCTP, TCP congestion algorithm setting, sendfile / zerocopy, socket
pacing, authentication
$ iperf3 -s

# on the board
$ iperf3 --version
iperf 3.10.1 (cJSON 1.7.13)
Linux buildroot 5.17.0-rc8-next-20220316-00058-gc6cb0628f2a6-dirty #385
SMP Fri Mar 18 13:34:26 CET 2022 armv7l
Optional features available: CPU affinity setting, IPv6 flow label, TCP
congestion algorithm setting, sendfile / zerocopy, socket pacing, bind
to device, support IPv4 don't fragment
$ iperf3 -c eddie01

> Also I have a small diff that might fix the issue:
> ---
> --- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
> @@ -534,6 +534,8 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32
> *ifh, struct net_device *dev)
> struct lan966x_tx_dcb *next_dcb, *dcb;
> struct lan966x_tx *tx = &lan966x->tx;
> struct lan966x_db *next_db;
> + int needed_headroom;
> + int needed_tailroom;
> dma_addr_t dma_addr;
> int next_to_use;
> int err;
> @@ -554,10 +556,11 @@ int lan966x_fdma_xmit(struct sk_buff *skb,
> __be32 *ifh, struct net_device *dev)
>
> /* skb processing */
> skb_tx_timestamp(skb);

btw. skb_tx_timestamp() should be as close to the handover
of the frame to the hardware as possible, no?

> - if (skb_headroom(skb) < IFH_LEN * sizeof(u32)) {
> - err = pskb_expand_head(skb,
> - IFH_LEN * sizeof(u32) -
> skb_headroom(skb),
> - 0, GFP_ATOMIC);
> + needed_headroom = max_t(int, IFH_LEN * sizeof(u32) -
> skb_headroom(skb), 0);
> + needed_tailroom = max_t(int, ETH_FCS_LEN - skb_tailroom(skb),
> 0);
> + if (needed_headroom || needed_tailroom) {
> + err = pskb_expand_head(skb, needed_headroom,
> needed_tailroom,
> + GFP_ATOMIC);
> if (unlikely(err)) {
> dev->stats.tx_dropped++;
> err = NETDEV_TX_OK;

Indeed this will fix the issue:

# iperf3 -c eddie01
Connecting to host eddie01, port 5201
[ 5] local 10.0.1.143 port 55342 connected to 10.0.1.42 port 5201
[ ID] Interval Transfer Bitrate Retr Cwnd
[ 5] 0.00-1.01 sec 43.8 MBytes 364 Mbits/sec 0 245 KBytes
[ 5] 1.01-2.02 sec 43.8 MBytes 364 Mbits/sec 0 246 KBytes
[ 5] 2.02-3.03 sec 43.8 MBytes 364 Mbits/sec 0 259 KBytes

# iperf3 -R -c eddie01
Connecting to host eddie01, port 5201
Reverse mode, remote host eddie01 is sending
[ 5] local 10.0.1.143 port 55346 connected to 10.0.1.42 port 5201
[ ID] Interval Transfer Bitrate
[ 5] 0.00-1.00 sec 28.6 MBytes 240 Mbits/sec
[ 5] 1.00-2.00 sec 28.9 MBytes 242 Mbits/sec
[ 5] 2.00-3.00 sec 28.7 MBytes 241 Mbits/sec

-michael

2022-03-18 16:20:38

by Horatiu Vultur

[permalink] [raw]
Subject: Re: [PATCH net-next 0/5] net: lan966x: Add support for FDMA

The 03/18/2022 12:07, Michael Walle wrote:
>
> Hi Horatiu,

Hi Michael,

>
> > Currently when injecting or extracting a frame from CPU, the frame
> > is given to the HW each word at a time. There is another way to
> > inject/extract frames from CPU using FDMA(Frame Direct Memory Access).
> > In this way the entire frame is given to the HW. This improves both
> > RX and TX bitrate.
>
> I wanted to test this. ping and such works fine and I'm also
> seeing fdma interrupts.

Thanks for testing this also on your board.

> But as soon as I try iperf3 I get a skb_panic
> (due to frame size?). Hope that splash below helps.

I have not seen this issue. But it looks like it is a problem that there
is no more space to add the FCS.
Can you tell me how you run iperf3 so I can also try it?

Also I have a small diff that might fix the issue:
---
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_fdma.c
@@ -534,6 +534,8 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)
struct lan966x_tx_dcb *next_dcb, *dcb;
struct lan966x_tx *tx = &lan966x->tx;
struct lan966x_db *next_db;
+ int needed_headroom;
+ int needed_tailroom;
dma_addr_t dma_addr;
int next_to_use;
int err;
@@ -554,10 +556,11 @@ int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev)

/* skb processing */
skb_tx_timestamp(skb);
- if (skb_headroom(skb) < IFH_LEN * sizeof(u32)) {
- err = pskb_expand_head(skb,
- IFH_LEN * sizeof(u32) - skb_headroom(skb),
- 0, GFP_ATOMIC);
+ needed_headroom = max_t(int, IFH_LEN * sizeof(u32) - skb_headroom(skb), 0);
+ needed_tailroom = max_t(int, ETH_FCS_LEN - skb_tailroom(skb), 0);
+ if (needed_headroom || needed_tailroom) {
+ err = pskb_expand_head(skb, needed_headroom, needed_tailroom,
+ GFP_ATOMIC);
if (unlikely(err)) {
dev->stats.tx_dropped++;
err = NETDEV_TX_OK;
---

>
> -michael
>
> [ 159.778850] skbuff: skb_over_panic: text:c07960c0 len:106 put:4 head:c8cc6e00 data:c8cc6e9a tail:0xc8cc6f04 end:0xc8cc6f00 dev:eth0
> [ 159.788067] ------------[ cut here ]------------
> [ 159.792575] kernel BUG at net/core/skbuff.c:113!
> ..
> [ 160.620995] Backtrace:
> [ 160.623426] [<c0a62350>] (skb_panic) from [<c08876f8>] (skb_put+0x54/0x58)
> [ 160.630284] [<c08876a4>] (skb_put) from [<c07960c0>] (lan966x_fdma_xmit+0x108/0x504)
> [ 160.638011] r5:00000000 r4:00000000
> [ 160.641566] [<c0795fb8>] (lan966x_fdma_xmit) from [<c078cdf8>] (lan966x_port_xmit+0x1b0/0x450)
> [ 160.650171] r10:c1960160 r9:c1960040 r8:00000001 r7:0000001c r6:c196b000 r5:c891a780
> [ 160.657973] r4:00000000
> [ 160.660488] [<c078cc48>] (lan966x_port_xmit) from [<c08a3e24>] (dev_hard_start_xmit+0x114/0x248)
> [ 160.669267] r10:c0f016e8 r9:c0f060cc r8:c0e8d350 r7:c196ba00 r6:00000000 r5:c196b000
> [ 160.677069] r4:c891a780
> [ 160.679584] [<c08a3d10>] (dev_hard_start_xmit) from [<c0904728>] (sch_direct_xmit+0x11c/0x31c)
> [ 160.688188] r10:2a01000a r9:00000000 r8:00000000 r7:c196b000 r6:c196ba00 r5:c891a780
> [ 160.695991] r4:c1e57400
> [ 160.698507] [<c090460c>] (sch_direct_xmit) from [<c08a4520>] (__dev_queue_xmit+0x508/0xaac)
> [ 160.706850] r9:c8cc6f00 r8:0000004a r7:00000001 r6:00000000 r5:c1e57400 r4:c891a780
> [ 160.714565] [<c08a4018>] (__dev_queue_xmit) from [<c08a4ad8>] (dev_queue_xmit+0x14/0x18)
> [ 160.722648] r10:2a01000a r9:00000010 r8:0000004a r7:c1e7038c r6:00000000 r5:c891a780
> [ 160.730451] r4:c1e70300
> [ 160.732966] [<c08a4ac4>] (dev_queue_xmit) from [<c092b0c0>] (ip_finish_output2+0x270/0x5dc)
> [ 160.741300] [<c092ae50>] (ip_finish_output2) from [<c092ceb8>] (__ip_finish_output+0x9c/0x144)
> [ 160.749903] r9:c8cdef00 r8:c10833c0 r7:000005dc r6:c8924000 r5:c891a780 r4:c891a780
> [ 160.757618] [<c092ce1c>] (__ip_finish_output) from [<c092d01c>] (ip_output+0xbc/0xc4)
> [ 160.765441] r10:2a01000a r9:c8cdef00 r8:c8cc6e00 r7:c196b000 r6:c8924000 r5:c10833c0
> [ 160.773244] r4:c891a780
> [ 160.775759] [<c092cf60>] (ip_output) from [<c092ab10>] (ip_local_out+0x60/0x6c)
> [ 160.783058] r7:00000000 r6:c8924000 r5:c10833c0 r4:c891a780
> [ 160.788692] [<c092aab0>] (ip_local_out) from [<c092ad74>] (ip_build_and_send_pkt+0x110/0x1ec)
> [ 160.797206] r7:00000000 r6:c8d40000 r5:000000c4 r4:c891a780
> [ 160.802841] [<c092ac64>] (ip_build_and_send_pkt) from [<c0952bb8>] (tcp_v4_send_synack+0xec/0x138)
> [ 160.811792] r10:c8cdef00 r9:c0b9b614 r8:c10833c0 r7:8b01000a r6:2a01000a r5:c8d40000
> [ 160.819595] r4:c8924000
> [ 160.822110] [<c0952acc>] (tcp_v4_send_synack) from [<c0941520>] (tcp_conn_request+0x6cc/0x984)
> [ 160.830711] r7:c891ab40 r6:c0952acc r5:c8d40000 r4:c8924000
> [ 160.836345] [<c0940e54>] (tcp_conn_request) from [<c095299c>] (tcp_v4_conn_request+0x38/0x84)
> [ 160.844863] r10:c892902a r9:c892903e r8:c10833c0 r7:00000000 r6:00000000 r5:c891ab40
> [ 160.852666] r4:c8d40000
> [ 160.855181] [<c0952964>] (tcp_v4_conn_request) from [<c09d05a0>] (tcp_v6_conn_request+0x12c/0x138)
> [ 160.864122] [<c09d0474>] (tcp_v6_conn_request) from [<c0948870>] (tcp_rcv_state_process+0x29c/0xf44)
> [ 160.873240] r5:c891ab40 r4:c8d40000
> [ 160.876794] [<c09485d4>] (tcp_rcv_state_process) from [<c0953374>] (tcp_v4_do_rcv+0x110/0x22c)
> [ 160.885398] r10:c892902a r9:c892903e r8:c10833c0 r7:00000000 r6:c8d40000 r5:c891ab40
> [ 160.893201] r4:c8d40000
> [ 160.895716] [<c0953264>] (tcp_v4_do_rcv) from [<c0955158>] (tcp_v4_rcv+0xb08/0xba8)
> [ 160.903362] r7:00000000 r6:c8d40000 r5:00000000 r4:c891ab40
> [ 160.908998] [<c0954650>] (tcp_v4_rcv) from [<c09273f4>] (ip_protocol_deliver_rcu+0x34/0x1d8)
> [ 160.917428] r10:c10833c0 r9:c891ab40 r8:00000000 r7:c10833c0 r6:c0f06f64 r5:c891ab40
> [ 160.925230] r4:00000006
> [ 160.927746] [<c09273c0>] (ip_protocol_deliver_rcu) from [<c0927640>] (ip_local_deliver+0xa8/0xf0)
> [ 160.936609] r8:c0f01c0c r7:c0f01c0c r6:00000000 r5:c0f01bc4 r4:c891ab40
> [ 160.943283] [<c0927598>] (ip_local_deliver) from [<c0926794>] (ip_sublist_rcv_finish+0x44/0x58)
> [ 160.951967] r5:c0f01bc4 r4:c0f01bc4
> [ 160.955522] [<c0926750>] (ip_sublist_rcv_finish) from [<c0926d58>] (ip_list_rcv_finish.constprop.0+0x10c/0x134)
> [ 160.965598] r7:c0f01c0c r6:c0f01bc4 r5:c0f01c0c r4:c0f01c0c
> [ 160.971232] [<c0926c4c>] (ip_list_rcv_finish.constprop.0) from [<c09277cc>] (ip_list_rcv+0xec/0x114)
> [ 160.980358] r10:c0f01c5c r9:c10833c0 r8:c0f01c0c r7:c0f01c5c r6:c10833c0 r5:c196b000
> [ 160.988160] r4:c891ab40
> [ 160.990676] [<c09276e0>] (ip_list_rcv) from [<c08a6328>] (__netif_receive_skb_list_core+0x90/0x214)
> [ 160.999714] r10:c196b000 r9:c196b000 r8:00000000 r7:c0f01c5c r6:c0f07450 r5:c891acc0
> [ 161.007517] r4:c09276e0
> [ 161.010032] [<c08a6298>] (__netif_receive_skb_list_core) from [<c08a6670>] (netif_receive_skb_list_internal+0x1c4/0x2c4)
> [ 161.020893] r10:c10833b8 r9:c0f08a00 r8:00000000 r7:c0f01d44 r6:c0f01d44 r5:c0f01d44
> [ 161.028696] r4:00000000
> [ 161.031211] [<c08a64ac>] (netif_receive_skb_list_internal) from [<c08a67a8>] (netif_receive_skb_list+0x38/0xe4)
> [ 161.041291] r10:c1963de0 r9:f08527d0 r8:f08527c0 r7:00000003 r6:c1960040 r5:c103d920
> [ 161.049094] r4:c0f01d44
> [ 161.051609] [<c08a6770>] (netif_receive_skb_list) from [<c0795af4>] (lan966x_fdma_napi_poll+0x3ec/0x490)
> [ 161.061081] r9:f08527d0 r8:f08527c0 r7:00000003 r6:c1960040 r5:c1962de0 r4:00000040
> [ 161.068796] [<c0795708>] (lan966x_fdma_napi_poll) from [<c08a6ee4>] (__napi_poll+0x34/0x1dc)
> [ 161.077226] r10:ef7e6080 r9:c0f01db4 r8:c0f03d00 r7:c0f01db3 r6:00000040 r5:c1963de0
> [ 161.085029] r4:00000001
> [ 161.087545] [<c08a6eb0>] (__napi_poll) from [<c08a7290>] (net_rx_action+0xec/0x288)
> [ 161.095190] r7:2e957000 r6:ffffc936 r5:0000012c r4:c1963de0
> [ 161.100825] [<c08a71a4>] (net_rx_action) from [<c010143c>] (__do_softirq+0x13c/0x384)
> [ 161.108647] r10:c0f08a00 r9:00000008 r8:00000100 r7:c1037d40 r6:00000003 r5:00000004
> [ 161.116451] r4:c0f0308c
> [ 161.118966] [<c0101300>] (__do_softirq) from [<c013bc9c>] (irq_exit+0xac/0xdc)
> [ 161.126182] r10:c0f04f78 r9:c0f08a00 r8:00000000 r7:c0f01ed4 r6:00000000 r5:c0f01ea0
> [ 161.133985] r4:c0f08a00
> [ 161.136500] [<c013bbf0>] (irq_exit) from [<c0a62c84>] (generic_handle_arch_irq+0x48/0x4c)
> [ 161.144663] r5:c0f01ea0 r4:c0e8d358
> [ 161.148218] [<c0a62c3c>] (generic_handle_arch_irq) from [<c0100be8>] (__irq_svc+0x88/0xb0)

--
/Horatiu

2022-03-20 00:57:58

by Michael Walle

[permalink] [raw]
Subject: Re: [PATCH net-next 0/5] net: lan966x: Add support for FDMA

Hi Horatiu,

> Currently when injecting or extracting a frame from CPU, the frame
> is given to the HW each word at a time. There is another way to
> inject/extract frames from CPU using FDMA(Frame Direct Memory Access).
> In this way the entire frame is given to the HW. This improves both
> RX and TX bitrate.

I wanted to test this. ping and such works fine and I'm also
seeing fdma interrupts. But as soon as I try iperf3 I get a skb_panic
(due to frame size?). Hope that splash below helps.

-michael

[ 159.778850] skbuff: skb_over_panic: text:c07960c0 len:106 put:4 head:c8cc6e00 data:c8cc6e9a tail:0xc8cc6f04 end:0xc8cc6f00 dev:eth0
[ 159.788067] ------------[ cut here ]------------
[ 159.792575] kernel BUG at net/core/skbuff.c:113!
..
[ 160.620995] Backtrace:
[ 160.623426] [<c0a62350>] (skb_panic) from [<c08876f8>] (skb_put+0x54/0x58)
[ 160.630284] [<c08876a4>] (skb_put) from [<c07960c0>] (lan966x_fdma_xmit+0x108/0x504)
[ 160.638011] r5:00000000 r4:00000000
[ 160.641566] [<c0795fb8>] (lan966x_fdma_xmit) from [<c078cdf8>] (lan966x_port_xmit+0x1b0/0x450)
[ 160.650171] r10:c1960160 r9:c1960040 r8:00000001 r7:0000001c r6:c196b000 r5:c891a780
[ 160.657973] r4:00000000
[ 160.660488] [<c078cc48>] (lan966x_port_xmit) from [<c08a3e24>] (dev_hard_start_xmit+0x114/0x248)
[ 160.669267] r10:c0f016e8 r9:c0f060cc r8:c0e8d350 r7:c196ba00 r6:00000000 r5:c196b000
[ 160.677069] r4:c891a780
[ 160.679584] [<c08a3d10>] (dev_hard_start_xmit) from [<c0904728>] (sch_direct_xmit+0x11c/0x31c)
[ 160.688188] r10:2a01000a r9:00000000 r8:00000000 r7:c196b000 r6:c196ba00 r5:c891a780
[ 160.695991] r4:c1e57400
[ 160.698507] [<c090460c>] (sch_direct_xmit) from [<c08a4520>] (__dev_queue_xmit+0x508/0xaac)
[ 160.706850] r9:c8cc6f00 r8:0000004a r7:00000001 r6:00000000 r5:c1e57400 r4:c891a780
[ 160.714565] [<c08a4018>] (__dev_queue_xmit) from [<c08a4ad8>] (dev_queue_xmit+0x14/0x18)
[ 160.722648] r10:2a01000a r9:00000010 r8:0000004a r7:c1e7038c r6:00000000 r5:c891a780
[ 160.730451] r4:c1e70300
[ 160.732966] [<c08a4ac4>] (dev_queue_xmit) from [<c092b0c0>] (ip_finish_output2+0x270/0x5dc)
[ 160.741300] [<c092ae50>] (ip_finish_output2) from [<c092ceb8>] (__ip_finish_output+0x9c/0x144)
[ 160.749903] r9:c8cdef00 r8:c10833c0 r7:000005dc r6:c8924000 r5:c891a780 r4:c891a780
[ 160.757618] [<c092ce1c>] (__ip_finish_output) from [<c092d01c>] (ip_output+0xbc/0xc4)
[ 160.765441] r10:2a01000a r9:c8cdef00 r8:c8cc6e00 r7:c196b000 r6:c8924000 r5:c10833c0
[ 160.773244] r4:c891a780
[ 160.775759] [<c092cf60>] (ip_output) from [<c092ab10>] (ip_local_out+0x60/0x6c)
[ 160.783058] r7:00000000 r6:c8924000 r5:c10833c0 r4:c891a780
[ 160.788692] [<c092aab0>] (ip_local_out) from [<c092ad74>] (ip_build_and_send_pkt+0x110/0x1ec)
[ 160.797206] r7:00000000 r6:c8d40000 r5:000000c4 r4:c891a780
[ 160.802841] [<c092ac64>] (ip_build_and_send_pkt) from [<c0952bb8>] (tcp_v4_send_synack+0xec/0x138)
[ 160.811792] r10:c8cdef00 r9:c0b9b614 r8:c10833c0 r7:8b01000a r6:2a01000a r5:c8d40000
[ 160.819595] r4:c8924000
[ 160.822110] [<c0952acc>] (tcp_v4_send_synack) from [<c0941520>] (tcp_conn_request+0x6cc/0x984)
[ 160.830711] r7:c891ab40 r6:c0952acc r5:c8d40000 r4:c8924000
[ 160.836345] [<c0940e54>] (tcp_conn_request) from [<c095299c>] (tcp_v4_conn_request+0x38/0x84)
[ 160.844863] r10:c892902a r9:c892903e r8:c10833c0 r7:00000000 r6:00000000 r5:c891ab40
[ 160.852666] r4:c8d40000
[ 160.855181] [<c0952964>] (tcp_v4_conn_request) from [<c09d05a0>] (tcp_v6_conn_request+0x12c/0x138)
[ 160.864122] [<c09d0474>] (tcp_v6_conn_request) from [<c0948870>] (tcp_rcv_state_process+0x29c/0xf44)
[ 160.873240] r5:c891ab40 r4:c8d40000
[ 160.876794] [<c09485d4>] (tcp_rcv_state_process) from [<c0953374>] (tcp_v4_do_rcv+0x110/0x22c)
[ 160.885398] r10:c892902a r9:c892903e r8:c10833c0 r7:00000000 r6:c8d40000 r5:c891ab40
[ 160.893201] r4:c8d40000
[ 160.895716] [<c0953264>] (tcp_v4_do_rcv) from [<c0955158>] (tcp_v4_rcv+0xb08/0xba8)
[ 160.903362] r7:00000000 r6:c8d40000 r5:00000000 r4:c891ab40
[ 160.908998] [<c0954650>] (tcp_v4_rcv) from [<c09273f4>] (ip_protocol_deliver_rcu+0x34/0x1d8)
[ 160.917428] r10:c10833c0 r9:c891ab40 r8:00000000 r7:c10833c0 r6:c0f06f64 r5:c891ab40
[ 160.925230] r4:00000006
[ 160.927746] [<c09273c0>] (ip_protocol_deliver_rcu) from [<c0927640>] (ip_local_deliver+0xa8/0xf0)
[ 160.936609] r8:c0f01c0c r7:c0f01c0c r6:00000000 r5:c0f01bc4 r4:c891ab40
[ 160.943283] [<c0927598>] (ip_local_deliver) from [<c0926794>] (ip_sublist_rcv_finish+0x44/0x58)
[ 160.951967] r5:c0f01bc4 r4:c0f01bc4
[ 160.955522] [<c0926750>] (ip_sublist_rcv_finish) from [<c0926d58>] (ip_list_rcv_finish.constprop.0+0x10c/0x134)
[ 160.965598] r7:c0f01c0c r6:c0f01bc4 r5:c0f01c0c r4:c0f01c0c
[ 160.971232] [<c0926c4c>] (ip_list_rcv_finish.constprop.0) from [<c09277cc>] (ip_list_rcv+0xec/0x114)
[ 160.980358] r10:c0f01c5c r9:c10833c0 r8:c0f01c0c r7:c0f01c5c r6:c10833c0 r5:c196b000
[ 160.988160] r4:c891ab40
[ 160.990676] [<c09276e0>] (ip_list_rcv) from [<c08a6328>] (__netif_receive_skb_list_core+0x90/0x214)
[ 160.999714] r10:c196b000 r9:c196b000 r8:00000000 r7:c0f01c5c r6:c0f07450 r5:c891acc0
[ 161.007517] r4:c09276e0
[ 161.010032] [<c08a6298>] (__netif_receive_skb_list_core) from [<c08a6670>] (netif_receive_skb_list_internal+0x1c4/0x2c4)
[ 161.020893] r10:c10833b8 r9:c0f08a00 r8:00000000 r7:c0f01d44 r6:c0f01d44 r5:c0f01d44
[ 161.028696] r4:00000000
[ 161.031211] [<c08a64ac>] (netif_receive_skb_list_internal) from [<c08a67a8>] (netif_receive_skb_list+0x38/0xe4)
[ 161.041291] r10:c1963de0 r9:f08527d0 r8:f08527c0 r7:00000003 r6:c1960040 r5:c103d920
[ 161.049094] r4:c0f01d44
[ 161.051609] [<c08a6770>] (netif_receive_skb_list) from [<c0795af4>] (lan966x_fdma_napi_poll+0x3ec/0x490)
[ 161.061081] r9:f08527d0 r8:f08527c0 r7:00000003 r6:c1960040 r5:c1962de0 r4:00000040
[ 161.068796] [<c0795708>] (lan966x_fdma_napi_poll) from [<c08a6ee4>] (__napi_poll+0x34/0x1dc)
[ 161.077226] r10:ef7e6080 r9:c0f01db4 r8:c0f03d00 r7:c0f01db3 r6:00000040 r5:c1963de0
[ 161.085029] r4:00000001
[ 161.087545] [<c08a6eb0>] (__napi_poll) from [<c08a7290>] (net_rx_action+0xec/0x288)
[ 161.095190] r7:2e957000 r6:ffffc936 r5:0000012c r4:c1963de0
[ 161.100825] [<c08a71a4>] (net_rx_action) from [<c010143c>] (__do_softirq+0x13c/0x384)
[ 161.108647] r10:c0f08a00 r9:00000008 r8:00000100 r7:c1037d40 r6:00000003 r5:00000004
[ 161.116451] r4:c0f0308c
[ 161.118966] [<c0101300>] (__do_softirq) from [<c013bc9c>] (irq_exit+0xac/0xdc)
[ 161.126182] r10:c0f04f78 r9:c0f08a00 r8:00000000 r7:c0f01ed4 r6:00000000 r5:c0f01ea0
[ 161.133985] r4:c0f08a00
[ 161.136500] [<c013bbf0>] (irq_exit) from [<c0a62c84>] (generic_handle_arch_irq+0x48/0x4c)
[ 161.144663] r5:c0f01ea0 r4:c0e8d358
[ 161.148218] [<c0a62c3c>] (generic_handle_arch_irq) from [<c0100be8>] (__irq_svc+0x88/0xb0)