2023-04-19 09:02:32

by Yang Yang

[permalink] [raw]
Subject: [PATCH linux-next v2] selftests: net: udpgso_bench_rx: Fix verifty exceptions

From: Zhang Yunkai (CGEL ZTE) <[email protected]>

The verification function of this test case is likely to encounter the
following error, which may confuse users. The problem is easily
reproducible in the latest kernel.

Environment A, the sender:
bash# udpgso_bench_tx -l 4 -4 -D "$IP_B"
udpgso_bench_tx: write: Connection refused

Environment B, the receiver:
bash# udpgso_bench_rx -4 -G -S 1472 -v
udpgso_bench_rx: data[1472]: len 17664, a(97) != q(113)

If the packet is captured, you will see:
Environment A, the sender:
bash# tcpdump -i eth0 host "$IP_B" &
IP $IP_A.41025 > $IP_B.8000: UDP, length 1472
IP $IP_A.41025 > $IP_B.8000: UDP, length 1472
IP $IP_B > $IP_A: ICMP $IP_B udp port 8000 unreachable, length 556

Environment B, the receiver:
bash# tcpdump -i eth0 host "$IP_B" &
IP $IP_A.41025 > $IP_B.8000: UDP, length 7360
IP $IP_A.41025 > $IP_B.8000: UDP, length 14720
IP $IP_B > $IP_A: ICMP $IP_B udp port 8000 unreachable, length 556

In one test, the verification data is printed as follows:
abcd...xyz | 1...
.. |
abcd...xyz |
abcd...opabcd...xyz | ...1472... Not xyzabcd, messages are merged
.. |

This is because the sending buffer is buf[64K], and its content is a
loop of A-Z. But maybe only 1472 bytes per send, or more if UDP GSO is
used. The message content does not necessarily end with XYZ, but GRO
will merge these packets, and the -v parameter directly verifies the
entire GRO receive buffer. So we do the validation after the data is split
at the receiving end, just as the application actually uses this feature.

If the sender does not use GSO, each individual segment starts at A,
end at somewhere. Using GSO also has the same problem, and. The data
between each segment during transmission is continuous, but GRO is merged
in the order received, which is not necessarily the order of transmission.

Execution in the same environment does not cause problems, because the
lo device is not NAPI, and does not perform GRO processing. Perhaps it
could be worth supporting to reduce system calls.
bash# tcpdump -i lo host "$IP_self" &
bash# echo udp_gro_receive > /sys/kernel/debug/tracing/set_ftrace_filter
bash# echo function > /sys/kernel/debug/tracing/current_tracer
bash# udpgso_bench_rx -4 -G -S 1472 -v &
bash# udpgso_bench_tx -l 4 -4 -D "$IP_self"

The issue still exists when using the GRO with -G, but not using the -S
to obtain gsosize. Therefore, a print has been added to remind users.

After this issue is resolved, another issue will be encountered and will
be resolved in the next patch.
Environment A, the sender:
bash# udpgso_bench_tx -l 4 -4 -D "$DST"
udpgso_bench_tx: write: Connection refused

Environment B, the receiver:
bash# udpgso_bench_rx -4 -G -S 1472
udp rx: 15 MB/s 256 calls/s
udp rx: 30 MB/s 512 calls/s
udpgso_bench_rx: recv: bad gso size, got -1, expected 1472
(-1 == no gso cmsg))

v2:
- Fix confusing descriptions

Signed-off-by: Zhang Yunkai (CGEL ZTE) <[email protected]>
Reviewed-by: Xu Xin (CGEL ZTE) <[email protected]>
Reviewed-by: Yang Yang (CGEL ZTE) <[email protected]>
Cc: Xuexin Jiang (CGEL ZTE) <[email protected]>
---
tools/testing/selftests/net/udpgso_bench_rx.c | 40 +++++++++++++++++++++------
1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index f35a924d4a30..6a2026494cdb 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -189,26 +189,44 @@ static char sanitized_char(char val)
return (val >= 'a' && val <= 'z') ? val : '.';
}

-static void do_verify_udp(const char *data, int len)
+static void do_verify_udp(const char *data, int start, int len)
{
- char cur = data[0];
+ char cur = data[start];
int i;

/* verify contents */
if (cur < 'a' || cur > 'z')
error(1, 0, "data initial byte out of range");

- for (i = 1; i < len; i++) {
+ for (i = start + 1; i < start + len; i++) {
if (cur == 'z')
cur = 'a';
else
cur++;

- if (data[i] != cur)
+ if (data[i] != cur) {
+ if (cfg_gro_segment && !cfg_expected_gso_size)
+ error(0, 0, "Use -S to obtain gsosize, to %s"
+ , "help guide split and verification.");
+
error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
i, len,
sanitized_char(data[i]), data[i],
sanitized_char(cur), cur);
+ }
+ }
+}
+
+static void do_verify_udp_gro(const char *data, int len, int gso_size)
+{
+ int start = 0;
+
+ while (len - start > 0) {
+ if (len - start > gso_size)
+ do_verify_udp(data, start, gso_size);
+ else
+ do_verify_udp(data, start, len - start);
+ start += gso_size;
}
}

@@ -264,16 +282,20 @@ static void do_flush_udp(int fd)
if (cfg_expected_pkt_len && ret != cfg_expected_pkt_len)
error(1, 0, "recv: bad packet len, got %d,"
" expected %d\n", ret, cfg_expected_pkt_len);
+ if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size)
+ error(1, 0, "recv: bad gso size, got %d, expected %d %s",
+ gso_size, cfg_expected_gso_size, "(-1 == no gso cmsg))\n");
if (len && cfg_verify) {
if (ret == 0)
error(1, errno, "recv: 0 byte datagram\n");

- do_verify_udp(rbuf, ret);
+ if (!cfg_gro_segment)
+ do_verify_udp(rbuf, 0, ret);
+ else if (gso_size > 0)
+ do_verify_udp_gro(rbuf, ret, gso_size);
+ else
+ do_verify_udp_gro(rbuf, ret, ret);
}
- if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size)
- error(1, 0, "recv: bad gso size, got %d, expected %d "
- "(-1 == no gso cmsg))\n", gso_size,
- cfg_expected_gso_size);

packets++;
bytes += ret;
--
2.15.2


2023-04-19 14:11:06

by Willem de Bruijn

[permalink] [raw]
Subject: RE: [PATCH linux-next v2] selftests: net: udpgso_bench_rx: Fix verifty exceptions

yang.yang29@ wrote:
> From: Zhang Yunkai (CGEL ZTE) <[email protected]>
>
> The verification function of this test case is likely to encounter the
> following error, which may confuse users. The problem is easily
> reproducible in the latest kernel.
>
> Environment A, the sender:
> bash# udpgso_bench_tx -l 4 -4 -D "$IP_B"
> udpgso_bench_tx: write: Connection refused
>
> Environment B, the receiver:
> bash# udpgso_bench_rx -4 -G -S 1472 -v
> udpgso_bench_rx: data[1472]: len 17664, a(97) != q(113)
>
> If the packet is captured, you will see:
> Environment A, the sender:
> bash# tcpdump -i eth0 host "$IP_B" &
> IP $IP_A.41025 > $IP_B.8000: UDP, length 1472
> IP $IP_A.41025 > $IP_B.8000: UDP, length 1472
> IP $IP_B > $IP_A: ICMP $IP_B udp port 8000 unreachable, length 556
>
> Environment B, the receiver:
> bash# tcpdump -i eth0 host "$IP_B" &
> IP $IP_A.41025 > $IP_B.8000: UDP, length 7360
> IP $IP_A.41025 > $IP_B.8000: UDP, length 14720
> IP $IP_B > $IP_A: ICMP $IP_B udp port 8000 unreachable, length 556
>
> In one test, the verification data is printed as follows:
> abcd...xyz | 1...
> .. |
> abcd...xyz |
> abcd...opabcd...xyz | ...1472... Not xyzabcd, messages are merged
> .. |
>
> This is because the sending buffer is buf[64K], and its content is a
> loop of A-Z. But maybe only 1472 bytes per send, or more if UDP GSO is
> used. The message content does not necessarily end with XYZ, but GRO
> will merge these packets, and the -v parameter directly verifies the
> entire GRO receive buffer. So we do the validation after the data is split
> at the receiving end, just as the application actually uses this feature.

The explanation can be much more brief. The issue is that the test on
receive for expected payload pattern {AB..Z}+ fail for GRO packets if
segment payload does not end on a Z.

> If the sender does not use GSO, each individual segment starts at A,
> end at somewhere. Using GSO also has the same problem, and. The data
> between each segment during transmission is continuous, but GRO is merged
> in the order received, which is not necessarily the order of transmission.

The issue as I understand it is due to the above, not due to reordering.
Am I misunderstanding the problem?

> Execution in the same environment does not cause problems, because the
> lo device is not NAPI, and does not perform GRO processing. Perhaps it
> could be worth supporting to reduce system calls.
> bash# tcpdump -i lo host "$IP_self" &
> bash# echo udp_gro_receive > /sys/kernel/debug/tracing/set_ftrace_filter
> bash# echo function > /sys/kernel/debug/tracing/current_tracer
> bash# udpgso_bench_rx -4 -G -S 1472 -v &
> bash# udpgso_bench_tx -l 4 -4 -D "$IP_self"

This is not relevant.

> The issue still exists when using the GRO with -G, but not using the -S
> to obtain gsosize. Therefore, a print has been added to remind users.
>
> After this issue is resolved, another issue will be encountered and will
> be resolved in the next patch.
> Environment A, the sender:
> bash# udpgso_bench_tx -l 4 -4 -D "$DST"
> udpgso_bench_tx: write: Connection refused
>
> Environment B, the receiver:
> bash# udpgso_bench_rx -4 -G -S 1472
> udp rx: 15 MB/s 256 calls/s
> udp rx: 30 MB/s 512 calls/s
> udpgso_bench_rx: recv: bad gso size, got -1, expected 1472
> (-1 == no gso cmsg))

This is not relevant to *this patch*

> v2:
> - Fix confusing descriptions
>
> Signed-off-by: Zhang Yunkai (CGEL ZTE) <[email protected]>
> Reviewed-by: Xu Xin (CGEL ZTE) <[email protected]>
> Reviewed-by: Yang Yang (CGEL ZTE) <[email protected]>
> Cc: Xuexin Jiang (CGEL ZTE) <[email protected]>
> ---
> tools/testing/selftests/net/udpgso_bench_rx.c | 40 +++++++++++++++++++++------
> 1 file changed, 31 insertions(+), 9 deletions(-)
>
> diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
> index f35a924d4a30..6a2026494cdb 100644
> --- a/tools/testing/selftests/net/udpgso_bench_rx.c
> +++ b/tools/testing/selftests/net/udpgso_bench_rx.c
> @@ -189,26 +189,44 @@ static char sanitized_char(char val)
> return (val >= 'a' && val <= 'z') ? val : '.';
> }
>
> -static void do_verify_udp(const char *data, int len)
> +static void do_verify_udp(const char *data, int start, int len)
> {
> - char cur = data[0];
> + char cur = data[start];
> int i;
>
> /* verify contents */
> if (cur < 'a' || cur > 'z')
> error(1, 0, "data initial byte out of range");
>
> - for (i = 1; i < len; i++) {
> + for (i = start + 1; i < start + len; i++) {
> if (cur == 'z')
> cur = 'a';
> else
> cur++;
>
> - if (data[i] != cur)
> + if (data[i] != cur) {
> + if (cfg_gro_segment && !cfg_expected_gso_size)
> + error(0, 0, "Use -S to obtain gsosize, to %s"
> + , "help guide split and verification.");
> +
> error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
> i, len,
> sanitized_char(data[i]), data[i],
> sanitized_char(cur), cur);
> + }
> + }
> +}
> +
> +static void do_verify_udp_gro(const char *data, int len, int gso_size)
> +{
> + int start = 0;
> +
> + while (len - start > 0) {
> + if (len - start > gso_size)
> + do_verify_udp(data, start, gso_size);
> + else
> + do_verify_udp(data, start, len - start);
> + start += gso_size;
> }
> }
>
> @@ -264,16 +282,20 @@ static void do_flush_udp(int fd)
> if (cfg_expected_pkt_len && ret != cfg_expected_pkt_len)
> error(1, 0, "recv: bad packet len, got %d,"
> " expected %d\n", ret, cfg_expected_pkt_len);
> + if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size)
> + error(1, 0, "recv: bad gso size, got %d, expected %d %s",
> + gso_size, cfg_expected_gso_size, "(-1 == no gso cmsg))\n");

why move this block? and don't pass part of the fmt as an extra %s.

> if (len && cfg_verify) {
> if (ret == 0)
> error(1, errno, "recv: 0 byte datagram\n");
>
> - do_verify_udp(rbuf, ret);
> + if (!cfg_gro_segment)
> + do_verify_udp(rbuf, 0, ret);
> + else if (gso_size > 0)
> + do_verify_udp_gro(rbuf, ret, gso_size);
> + else
> + do_verify_udp_gro(rbuf, ret, ret);
> }
> - if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size)
> - error(1, 0, "recv: bad gso size, got %d, expected %d "
> - "(-1 == no gso cmsg))\n", gso_size,
> - cfg_expected_gso_size);
>
> packets++;
> bytes += ret;
> --
> 2.15.2