While testing UDP GSO fraglists forwarding through driver that uses
Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
iperf packets:
[ ID] Interval Transfer Bitrate Jitter
[SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
Simple switch to napi_gro_receive() any other method without frag0
shortcut completely resolved them.
I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
callback. While it's probably OK for non-frag0 paths (when all
headers or even the entire frame are already in skb->data), this
inline points to junk when using Fast GRO (napi_gro_frags() or
napi_gro_receive() with only Ethernet header in skb->data and all
the rest in shinfo->frags) and breaks GRO packet compilation and
the packet flow itself.
To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
are typically used. UDP even has an inline helper that makes use of
them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
to get rid of the out-of-order delivers.
Present since the introduction of plain UDP GRO in 5.0-rc1.
Since v2 [1]:
- dropped redundant check introduced in v2 as it's performed right
before (thanks to Eric);
- udp_hdr() switched to data + off for skbs from list (also Eric);
- fixed possible malfunction of {,__}udp{4,6}_lib_lookup_skb() with
Fast/frag0 due to ip{,v6}_hdr() usage (Willem).
Since v1 [2]:
- added a NULL pointer check for "uh" as suggested by Willem.
[1] https://lore.kernel.org/netdev/[email protected]
[2] https://lore.kernel.org/netdev/[email protected]
Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
Signed-off-by: Alexander Lobakin <[email protected]>
---
net/ipv4/udp.c | 4 ++--
net/ipv4/udp_offload.c | 9 ++++++---
net/ipv6/udp.c | 4 ++--
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 09f0a23d1a01..948ddc9a0212 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -534,7 +534,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
struct udp_table *udptable)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = skb_gro_network_header(skb);
return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
@@ -544,7 +544,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = skb_gro_network_header(skb);
return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index e67a66fbf27b..dbc4d17c55e9 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -366,11 +366,11 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *skb)
{
- struct udphdr *uh = udp_hdr(skb);
+ struct udphdr *uh = udp_gro_udphdr(skb);
struct sk_buff *pp = NULL;
struct udphdr *uh2;
struct sk_buff *p;
- unsigned int ulen;
+ u32 ulen, off;
int ret = 0;
/* requires non zero csum, for symmetry with GSO */
@@ -385,6 +385,9 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
NAPI_GRO_CB(skb)->flush = 1;
return NULL;
}
+
+ off = skb_gro_offset(skb);
+
/* pull encapsulating udp header */
skb_gro_pull(skb, sizeof(struct udphdr));
@@ -392,7 +395,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
if (!NAPI_GRO_CB(p)->same_flow)
continue;
- uh2 = udp_hdr(p);
+ uh2 = (void *)p->data + off;
/* Match ports only, as csum is always non zero */
if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 29d9691359b9..a256ecce76b2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -269,7 +269,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
struct udp_table *udptable)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
@@ -279,7 +279,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph = skb_gro_network_header(skb);
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
--
2.29.2
On Mon, Nov 9, 2020 at 4:15 PM Alexander Lobakin <[email protected]> wrote:
>
> While testing UDP GSO fraglists forwarding through driver that uses
> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> iperf packets:
>
> [ ID] Interval Transfer Bitrate Jitter
> [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
>
> Simple switch to napi_gro_receive() any other method without frag0
> shortcut completely resolved them.
>
> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> callback. While it's probably OK for non-frag0 paths (when all
> headers or even the entire frame are already in skb->data), this
> inline points to junk when using Fast GRO (napi_gro_frags() or
> napi_gro_receive() with only Ethernet header in skb->data and all
> the rest in shinfo->frags) and breaks GRO packet compilation and
> the packet flow itself.
> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> are typically used. UDP even has an inline helper that makes use of
> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> to get rid of the out-of-order delivers.
>
> Present since the introduction of plain UDP GRO in 5.0-rc1.
>
> Since v2 [1]:
> - dropped redundant check introduced in v2 as it's performed right
> before (thanks to Eric);
> - udp_hdr() switched to data + off for skbs from list (also Eric);
> - fixed possible malfunction of {,__}udp{4,6}_lib_lookup_skb() with
> Fast/frag0 due to ip{,v6}_hdr() usage (Willem).
>
> Since v1 [2]:
> - added a NULL pointer check for "uh" as suggested by Willem.
>
> [1] https://lore.kernel.org/netdev/[email protected]
> [2] https://lore.kernel.org/netdev/[email protected]
>
> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> Signed-off-by: Alexander Lobakin <[email protected]>
> ---
> net/ipv4/udp.c | 4 ++--
> net/ipv4/udp_offload.c | 9 ++++++---
> net/ipv6/udp.c | 4 ++--
> 3 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 09f0a23d1a01..948ddc9a0212 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -534,7 +534,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
> __be16 sport, __be16 dport,
> struct udp_table *udptable)
> {
> - const struct iphdr *iph = ip_hdr(skb);
> + const struct iphdr *iph = skb_gro_network_header(skb);
This function is called from the normal UDP stack, not the GRO stack.
It's not safe to use this helper here.
>
> return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
> iph->daddr, dport, inet_iif(skb),
> @@ -544,7 +544,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
> struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
> __be16 sport, __be16 dport)
> {
> - const struct iphdr *iph = ip_hdr(skb);
> + const struct iphdr *iph = skb_gro_network_header(skb);
This one is, but I think it would be preferable to avoid leaking this
frag0 optimization stuff outside of the core GRO code if we can help
it.
Also haven't checked whether that helper is safe to call from
.gro_complete handlers such as udp_gro_complete. It's not needed
there, in any case.
Instead, perhaps we can call __udp4_lib_lookup which takes the exact
fields as arguments, and do the network header lookup in
udp_gro_complete itself.
Less important (because it's not working before), does the use of
skb_gro_network_header break any nested tunnel support that the
p->data + off change would add?
Greeting,
FYI, we noticed the following commit (built with gcc-9):
commit: 0b726f6b318a07644b6c2388e6e44406740f4754 ("[PATCH v3 net] net: udp: fix Fast/frag0 UDP GRO")
url: https://github.com/0day-ci/linux/commits/Alexander-Lobakin/net-udp-fix-Fast-frag0-UDP-GRO/20201110-052215
base: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git 4e0396c59559264442963b349ab71f66e471f84d
in testcase: apachebench
version:
with following parameters:
runtime: 300s
concurrency: 2000
cluster: cs-localhost
cpufreq_governor: performance
ucode: 0x7000019
test-description: apachebench is a tool for benchmarking your Apache Hypertext Transfer Protocol (HTTP) server.
test-url: https://httpd.apache.org/docs/2.4/programs/ab.html
on test machine: 16 threads Intel(R) Xeon(R) CPU D-1541 @ 2.10GHz with 48G memory
caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
If you fix the issue, kindly add following tag
Reported-by: kernel test robot <[email protected]>
[ 28.582714] BUG: unable to handle page fault for address: fffffffffffffffa
[ 28.590164] #PF: supervisor read access in kernel mode
[ 28.590164] #PF: error_code(0x0000) - not-present page
[ 28.590165] PGD c7e20d067 P4D c7e20d067 PUD c7e20f067 PMD 0
[ 28.590169] Oops: 0000 [#1] SMP PTI
[ 28.590171] CPU: 15 PID: 0 Comm: swapper/15 Not tainted 5.10.0-rc2-00373-g0b726f6b318a #1
[ 28.590172] Hardware name: Supermicro SYS-5018D-FN4T/X10SDV-8C-TLN4F, BIOS 1.1 03/02/2016
[ 28.590177] RIP: 0010:__udp4_lib_rcv+0x547/0xbe0
[ 28.590178] Code: 74 0a f6 45 3c 80 74 04 44 8b 4d 28 48 8b 55 58 48 83 e2 fe 74 07 8b 52 7c 85 d2 75 06 8b 95 90 00 00 00 48 8b be f0 04 00 00 <44> 8b 58 0c 8b 48 10 55 41 55 44 89 de 41 51 41 89 d1 44 89 d2 e8
[ 28.590179] RSP: 0018:ffffc900003b4bb8 EFLAGS: 00010246
[ 28.590180] RAX: ffffffffffffffee RBX: 0000000000000011 RCX: ffff888c7bc580e2
[ 28.590181] RDX: 0000000000000002 RSI: ffff88810ddc8000 RDI: ffffffff82d68f00
[ 28.590182] RBP: ffff888c7bf8f800 R08: 00000000000003b7 R09: 0000000000000000
[ 28.590182] R10: 0000000000003500 R11: 0000000000000000 R12: ffff888c7bc580e2
[ 28.590183] R13: ffffffff82e072b0 R14: ffffffff82d68f00 R15: 0000000000000034
[ 28.590184] FS: 0000000000000000(0000) GS:ffff888c7fdc0000(0000) knlGS:0000000000000000
[ 28.590185] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 28.590186] CR2: fffffffffffffffa CR3: 0000000c7e20a006 CR4: 00000000003706e0
[ 28.590186] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 28.590187] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 28.590187] Call Trace:
[ 28.590189] <IRQ>
[ 28.590193] ip_protocol_deliver_rcu+0xc5/0x1c0
[ 28.590196] ip_local_deliver_finish+0x4b/0x60
[ 28.738714] ip_local_deliver+0x6e/0x140
[ 28.738717] ip_sublist_rcv_finish+0x57/0x80
[ 28.738719] ip_sublist_rcv+0x199/0x240
[ 28.750730] ip_list_rcv+0x13a/0x160
[ 28.750733] __netif_receive_skb_list_core+0x2a9/0x2e0
[ 28.750736] netif_receive_skb_list_internal+0x1d3/0x320
[ 28.764743] gro_normal_list+0x19/0x40
[ 28.764747] napi_complete_done+0x68/0x160
[ 28.773197] igb_poll+0x63/0x320
[ 28.773198] net_rx_action+0x136/0x3a0
[ 28.773201] __do_softirq+0xe1/0x2c3
[ 28.773204] asm_call_irq_on_stack+0x12/0x20
[ 28.773205] </IRQ>
[ 28.773208] do_softirq_own_stack+0x37/0x40
[ 28.773211] irq_exit_rcu+0xd2/0xe0
[ 28.773213] common_interrupt+0x74/0x140
[ 28.773216] asm_common_interrupt+0x1e/0x40
[ 28.773219] RIP: 0010:cpuidle_enter_state+0xd2/0x360
[ 28.773221] Code: 49 89 c5 0f 1f 44 00 00 31 ff e8 a9 5d 7a ff 45 84 ff 74 12 9c 58 f6 c4 02 0f 85 62 02 00 00 31 ff e8 32 f7 80 ff fb 45 85 f6 <0f> 88 fb 00 00 00 49 63 c6 4c 2b 2c 24 48 8d 14 40 48 8d 14 90 49
[ 28.773223] RSP: 0018:ffffc90000117e80 EFLAGS: 00000202
[ 28.829581] RAX: ffff888c7fdeb140 RBX: 0000000000000004 RCX: 000000000000001f
[ 28.829582] RDX: 0000000000000000 RSI: 000000003cf3d30b RDI: 0000000000000000
[ 28.829583] RBP: ffff888c7fdf6220 R08: 00000006a7a94966 R09: 000000000002aa70
[ 28.829583] R10: 00000000000001a9 R11: ffff888c7fdea104 R12: ffffffff82ce7e00
[ 28.829584] R13: 00000006a7a94966 R14: 0000000000000004 R15: 0000000000000000
[ 28.829588] cpuidle_enter+0x29/0x40
[ 28.829591] do_idle+0x1cb/0x260
[ 28.829593] cpu_startup_entry+0x19/0x20
[ 28.829595] start_secondary+0x114/0x160
[ 28.829598] secondary_startup_64_no_verify+0xc2/0xcb
[ 28.829599] Modules linked in: acpi_cpufreq(-) ipmi_msghandler wmi acpi_pad ip_tables
[ 28.829604] CR2: fffffffffffffffa
[ 28.829609] ---[ end trace c361868dfd9e225e ]---
To reproduce:
git clone https://github.com/intel/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml
Thanks,
Oliver Sang
On Fri, Nov 13, 2020 at 7:00 AM kernel test robot <[email protected]> wrote:
>
>
> Greeting,
>
> FYI, we noticed the following commit (built with gcc-9):
>
> commit: 0b726f6b318a07644b6c2388e6e44406740f4754 ("[PATCH v3 net] net: udp: fix Fast/frag0 UDP GRO")
> url: https://github.com/0day-ci/linux/commits/Alexander-Lobakin/net-udp-fix-Fast-frag0-UDP-GRO/20201110-052215
> base: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git 4e0396c59559264442963b349ab71f66e471f84d
>
> in testcase: apachebench
> version:
> with following parameters:
>
> runtime: 300s
> concurrency: 2000
> cluster: cs-localhost
> cpufreq_governor: performance
> ucode: 0x7000019
>
> test-description: apachebench is a tool for benchmarking your Apache Hypertext Transfer Protocol (HTTP) server.
> test-url: https://httpd.apache.org/docs/2.4/programs/ab.html
>
>
> on test machine: 16 threads Intel(R) Xeon(R) CPU D-1541 @ 2.10GHz with 48G memory
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
>
> If you fix the issue, kindly add following tag
> Reported-by: kernel test robot <[email protected]>
>
>
> [ 28.582714] BUG: unable to handle page fault for address: fffffffffffffffa
> [ 28.590164] #PF: supervisor read access in kernel mode
> [ 28.590164] #PF: error_code(0x0000) - not-present page
> [ 28.590165] PGD c7e20d067 P4D c7e20d067 PUD c7e20f067 PMD 0
> [ 28.590169] Oops: 0000 [#1] SMP PTI
> [ 28.590171] CPU: 15 PID: 0 Comm: swapper/15 Not tainted 5.10.0-rc2-00373-g0b726f6b318a #1
> [ 28.590172] Hardware name: Supermicro SYS-5018D-FN4T/X10SDV-8C-TLN4F, BIOS 1.1 03/02/2016
> [ 28.590177] RIP: 0010:__udp4_lib_rcv+0x547/0xbe0
> [ 28.590178] Code: 74 0a f6 45 3c 80 74 04 44 8b 4d 28 48 8b 55 58 48 83 e2 fe 74 07 8b 52 7c 85 d2 75 06 8b 95 90 00 00 00 48 8b be f0 04 00 00 <44> 8b 58 0c 8b 48 10 55 41 55 44 89 de 41 51 41 89 d1 44 89 d2 e8
> [ 28.590179] RSP: 0018:ffffc900003b4bb8 EFLAGS: 00010246
> [ 28.590180] RAX: ffffffffffffffee RBX: 0000000000000011 RCX: ffff888c7bc580e2
> [ 28.590181] RDX: 0000000000000002 RSI: ffff88810ddc8000 RDI: ffffffff82d68f00
> [ 28.590182] RBP: ffff888c7bf8f800 R08: 00000000000003b7 R09: 0000000000000000
> [ 28.590182] R10: 0000000000003500 R11: 0000000000000000 R12: ffff888c7bc580e2
> [ 28.590183] R13: ffffffff82e072b0 R14: ffffffff82d68f00 R15: 0000000000000034
> [ 28.590184] FS: 0000000000000000(0000) GS:ffff888c7fdc0000(0000) knlGS:0000000000000000
> [ 28.590185] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 28.590186] CR2: fffffffffffffffa CR3: 0000000c7e20a006 CR4: 00000000003706e0
> [ 28.590186] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 28.590187] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 28.590187] Call Trace:
> [ 28.590189] <IRQ>
> [ 28.590193] ip_protocol_deliver_rcu+0xc5/0x1c0
> [ 28.590196] ip_local_deliver_finish+0x4b/0x60
> [ 28.738714] ip_local_deliver+0x6e/0x140
> [ 28.738717] ip_sublist_rcv_finish+0x57/0x80
> [ 28.738719] ip_sublist_rcv+0x199/0x240
> [ 28.750730] ip_list_rcv+0x13a/0x160
> [ 28.750733] __netif_receive_skb_list_core+0x2a9/0x2e0
> [ 28.750736] netif_receive_skb_list_internal+0x1d3/0x320
> [ 28.764743] gro_normal_list+0x19/0x40
> [ 28.764747] napi_complete_done+0x68/0x160
> [ 28.773197] igb_poll+0x63/0x320
> [ 28.773198] net_rx_action+0x136/0x3a0
> [ 28.773201] __do_softirq+0xe1/0x2c3
> [ 28.773204] asm_call_irq_on_stack+0x12/0x20
> [ 28.773205] </IRQ>
> [ 28.773208] do_softirq_own_stack+0x37/0x40
> [ 28.773211] irq_exit_rcu+0xd2/0xe0
> [ 28.773213] common_interrupt+0x74/0x140
> [ 28.773216] asm_common_interrupt+0x1e/0x40
> [ 28.773219] RIP: 0010:cpuidle_enter_state+0xd2/0x360
This was expected. This v3 of the patch has already been superseded by
one that addresses this lookup:
> @@ -534,7 +534,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
> __be16 sport, __be16 dport,
> struct udp_table *udptable)
> {
> - const struct iphdr *iph = ip_hdr(skb);
> + const struct iphdr *iph = skb_gro_network_header(skb);
The merged version was v5 and lacks this change.
From: Alexander Lobakin <[email protected]>
Date: Mon, 16 Nov 2020 12:54:42 +0000
> From: Willem de Bruijn <[email protected]>
> Date: Fri, 13 Nov 2020 10:51:36 -0500
>
> Hi!
>
>> On Fri, Nov 13, 2020 at 7:00 AM kernel test robot <[email protected]> wrote:
>>>
>>>
>>> Greeting,
>>>
>>> FYI, we noticed the following commit (built with gcc-9):
>>>
>>> commit: 0b726f6b318a07644b6c2388e6e44406740f4754 ("[PATCH v3 net] net: udp: fix Fast/frag0 UDP GRO")
>>> url: https://github.com/0day-ci/linux/commits/Alexander-Lobakin/net-udp-fix-Fast-frag0-UDP-GRO/20201110-052215
>>> base: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git 4e0396c59559264442963b349ab71f66e471f84d
>>>
>>> in testcase: apachebench
>>> version:
>>> with following parameters:
>>>
>>> runtime: 300s
>>> concurrency: 2000
>>> cluster: cs-localhost
>>> cpufreq_governor: performance
>>> ucode: 0x7000019
>>>
>>> test-description: apachebench is a tool for benchmarking your Apache Hypertext Transfer Protocol (HTTP) server.
>>> test-url: https://httpd.apache.org/docs/2.4/programs/ab.html
>>>
>>>
>>> on test machine: 16 threads Intel(R) Xeon(R) CPU D-1541 @ 2.10GHz with 48G memory
>>>
>>> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>>>
>>>
>>> If you fix the issue, kindly add following tag
>>> Reported-by: kernel test robot <[email protected]>
>>>
>>>
>>> [ 28.582714] BUG: unable to handle page fault for address: fffffffffffffffa
>>> [ 28.590164] #PF: supervisor read access in kernel mode
>>> [ 28.590164] #PF: error_code(0x0000) - not-present page
>>> [ 28.590165] PGD c7e20d067 P4D c7e20d067 PUD c7e20f067 PMD 0
>>> [ 28.590169] Oops: 0000 [#1] SMP PTI
>>> [ 28.590171] CPU: 15 PID: 0 Comm: swapper/15 Not tainted 5.10.0-rc2-00373-g0b726f6b318a #1
>>> [ 28.590172] Hardware name: Supermicro SYS-5018D-FN4T/X10SDV-8C-TLN4F, BIOS 1.1 03/02/2016
>>> [ 28.590177] RIP: 0010:__udp4_lib_rcv+0x547/0xbe0
>>> [ 28.590178] Code: 74 0a f6 45 3c 80 74 04 44 8b 4d 28 48 8b 55 58 48 83 e2 fe 74 07 8b 52 7c 85 d2 75 06 8b 95 90 00 00 00 48 8b be f0 04 00 00 <44> 8b 58 0c 8b 48 10 55 41 55 44 89 de 41 51 41 89 d1 44 89 d2 e8
>>> [ 28.590179] RSP: 0018:ffffc900003b4bb8 EFLAGS: 00010246
>>> [ 28.590180] RAX: ffffffffffffffee RBX: 0000000000000011 RCX: ffff888c7bc580e2
>>> [ 28.590181] RDX: 0000000000000002 RSI: ffff88810ddc8000 RDI: ffffffff82d68f00
>>> [ 28.590182] RBP: ffff888c7bf8f800 R08: 00000000000003b7 R09: 0000000000000000
>>> [ 28.590182] R10: 0000000000003500 R11: 0000000000000000 R12: ffff888c7bc580e2
>>> [ 28.590183] R13: ffffffff82e072b0 R14: ffffffff82d68f00 R15: 0000000000000034
>>> [ 28.590184] FS: 0000000000000000(0000) GS:ffff888c7fdc0000(0000) knlGS:0000000000000000
>>> [ 28.590185] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [ 28.590186] CR2: fffffffffffffffa CR3: 0000000c7e20a006 CR4: 00000000003706e0
>>> [ 28.590186] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>>> [ 28.590187] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>>> [ 28.590187] Call Trace:
>>> [ 28.590189] <IRQ>
>>> [ 28.590193] ip_protocol_deliver_rcu+0xc5/0x1c0
>>> [ 28.590196] ip_local_deliver_finish+0x4b/0x60
>>> [ 28.738714] ip_local_deliver+0x6e/0x140
>>> [ 28.738717] ip_sublist_rcv_finish+0x57/0x80
>>> [ 28.738719] ip_sublist_rcv+0x199/0x240
>>> [ 28.750730] ip_list_rcv+0x13a/0x160
>>> [ 28.750733] __netif_receive_skb_list_core+0x2a9/0x2e0
>>> [ 28.750736] netif_receive_skb_list_internal+0x1d3/0x320
>>> [ 28.764743] gro_normal_list+0x19/0x40
>>> [ 28.764747] napi_complete_done+0x68/0x160
>>> [ 28.773197] igb_poll+0x63/0x320
>>> [ 28.773198] net_rx_action+0x136/0x3a0
>>> [ 28.773201] __do_softirq+0xe1/0x2c3
>>> [ 28.773204] asm_call_irq_on_stack+0x12/0x20
>>> [ 28.773205] </IRQ>
>>> [ 28.773208] do_softirq_own_stack+0x37/0x40
>>> [ 28.773211] irq_exit_rcu+0xd2/0xe0
>>> [ 28.773213] common_interrupt+0x74/0x140
>>> [ 28.773216] asm_common_interrupt+0x1e/0x40
>>> [ 28.773219] RIP: 0010:cpuidle_enter_state+0xd2/0x360
>>
>> This was expected. This v3 of the patch has already been superseded by
>> one that addresses this lookup:
>
> Wait. This page fault happens on IP receive, which is performed after
> all GRO processing. At this point, all headers are pulled to skb->head,
> and no GRO helpers are needed to access them.
> The function that causes that, __udp4_lib_rcv(), uses ip_hdr() a lot,
> and it's safe. There should be another questionable point.
Oh well. I simply missed that this autotest was performed with v3.
I thought that the merged one was found to be broken. Glad it's fine.
>>> @@ -534,7 +534,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
>>> __be16 sport, __be16 dport,
>>> struct udp_table *udptable)
>>> {
>>> - const struct iphdr *iph = ip_hdr(skb);
>>> + const struct iphdr *iph = skb_gro_network_header(skb);
>>
>> The merged version was v5 and lacks this change.
>
> Thanks,
> Al
Al
On Mon, Nov 16, 2020 at 8:07 AM Alexander Lobakin <[email protected]> wrote:
>
> From: Willem de Bruijn <[email protected]>
> Date: Fri, 13 Nov 2020 10:51:36 -0500
>
> Hi!
>
> > On Fri, Nov 13, 2020 at 7:00 AM kernel test robot <[email protected]> wrote:
> >>
> >>
> >> Greeting,
> >>
> >> FYI, we noticed the following commit (built with gcc-9):
> >>
> >> commit: 0b726f6b318a07644b6c2388e6e44406740f4754 ("[PATCH v3 net] net: udp: fix Fast/frag0 UDP GRO")
> >> url: https://github.com/0day-ci/linux/commits/Alexander-Lobakin/net-udp-fix-Fast-frag0-UDP-GRO/20201110-052215
> >> base: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git 4e0396c59559264442963b349ab71f66e471f84d
> >>
> >> in testcase: apachebench
> >> version:
> >> with following parameters:
> >>
> >> runtime: 300s
> >> concurrency: 2000
> >> cluster: cs-localhost
> >> cpufreq_governor: performance
> >> ucode: 0x7000019
> >>
> >> test-description: apachebench is a tool for benchmarking your Apache Hypertext Transfer Protocol (HTTP) server.
> >> test-url: https://httpd.apache.org/docs/2.4/programs/ab.html
> >>
> >>
> >> on test machine: 16 threads Intel(R) Xeon(R) CPU D-1541 @ 2.10GHz with 48G memory
> >>
> >> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >>
> >>
> >> If you fix the issue, kindly add following tag
> >> Reported-by: kernel test robot <[email protected]>
> >>
> >>
> >> [ 28.582714] BUG: unable to handle page fault for address: fffffffffffffffa
> >> [ 28.590164] #PF: supervisor read access in kernel mode
> >> [ 28.590164] #PF: error_code(0x0000) - not-present page
> >> [ 28.590165] PGD c7e20d067 P4D c7e20d067 PUD c7e20f067 PMD 0
> >> [ 28.590169] Oops: 0000 [#1] SMP PTI
> >> [ 28.590171] CPU: 15 PID: 0 Comm: swapper/15 Not tainted 5.10.0-rc2-00373-g0b726f6b318a #1
> >> [ 28.590172] Hardware name: Supermicro SYS-5018D-FN4T/X10SDV-8C-TLN4F, BIOS 1.1 03/02/2016
> >> [ 28.590177] RIP: 0010:__udp4_lib_rcv+0x547/0xbe0
> >> [ 28.590178] Code: 74 0a f6 45 3c 80 74 04 44 8b 4d 28 48 8b 55 58 48 83 e2 fe 74 07 8b 52 7c 85 d2 75 06 8b 95 90 00 00 00 48 8b be f0 04 00 00 <44> 8b 58 0c 8b 48 10 55 41 55 44 89 de 41 51 41 89 d1 44 89 d2 e8
> >> [ 28.590179] RSP: 0018:ffffc900003b4bb8 EFLAGS: 00010246
> >> [ 28.590180] RAX: ffffffffffffffee RBX: 0000000000000011 RCX: ffff888c7bc580e2
> >> [ 28.590181] RDX: 0000000000000002 RSI: ffff88810ddc8000 RDI: ffffffff82d68f00
> >> [ 28.590182] RBP: ffff888c7bf8f800 R08: 00000000000003b7 R09: 0000000000000000
> >> [ 28.590182] R10: 0000000000003500 R11: 0000000000000000 R12: ffff888c7bc580e2
> >> [ 28.590183] R13: ffffffff82e072b0 R14: ffffffff82d68f00 R15: 0000000000000034
> >> [ 28.590184] FS: 0000000000000000(0000) GS:ffff888c7fdc0000(0000) knlGS:0000000000000000
> >> [ 28.590185] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> >> [ 28.590186] CR2: fffffffffffffffa CR3: 0000000c7e20a006 CR4: 00000000003706e0
> >> [ 28.590186] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> >> [ 28.590187] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> >> [ 28.590187] Call Trace:
> >> [ 28.590189] <IRQ>
> >> [ 28.590193] ip_protocol_deliver_rcu+0xc5/0x1c0
> >> [ 28.590196] ip_local_deliver_finish+0x4b/0x60
> >> [ 28.738714] ip_local_deliver+0x6e/0x140
> >> [ 28.738717] ip_sublist_rcv_finish+0x57/0x80
> >> [ 28.738719] ip_sublist_rcv+0x199/0x240
> >> [ 28.750730] ip_list_rcv+0x13a/0x160
> >> [ 28.750733] __netif_receive_skb_list_core+0x2a9/0x2e0
> >> [ 28.750736] netif_receive_skb_list_internal+0x1d3/0x320
> >> [ 28.764743] gro_normal_list+0x19/0x40
> >> [ 28.764747] napi_complete_done+0x68/0x160
> >> [ 28.773197] igb_poll+0x63/0x320
> >> [ 28.773198] net_rx_action+0x136/0x3a0
> >> [ 28.773201] __do_softirq+0xe1/0x2c3
> >> [ 28.773204] asm_call_irq_on_stack+0x12/0x20
> >> [ 28.773205] </IRQ>
> >> [ 28.773208] do_softirq_own_stack+0x37/0x40
> >> [ 28.773211] irq_exit_rcu+0xd2/0xe0
> >> [ 28.773213] common_interrupt+0x74/0x140
> >> [ 28.773216] asm_common_interrupt+0x1e/0x40
> >> [ 28.773219] RIP: 0010:cpuidle_enter_state+0xd2/0x360
> >
> > This was expected. This v3 of the patch has already been superseded by
> > one that addresses this lookup:
>
> Wait. This page fault happens on IP receive, which is performed after
> all GRO processing. At this point, all headers are pulled to skb->head,
> and no GRO helpers are needed to access them.
> The function that causes that, __udp4_lib_rcv(), uses ip_hdr() a lot,
> and it's safe. There should be another questionable point.
Right, patch v3 calls skb_gro_network_header from
__udp4_lib_lookup_skb, which is called from __udp4_lib_rcv, the
function at which the crash is reported.
@@ -534,7 +534,7 @@ static inline struct sock
*__udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport,
struct udp_table *udptable)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph = skb_gro_network_header(skb);
From: Willem de Bruijn <[email protected]>
Date: Fri, 13 Nov 2020 10:51:36 -0500
Hi!
> On Fri, Nov 13, 2020 at 7:00 AM kernel test robot <[email protected]> wrote:
>>
>>
>> Greeting,
>>
>> FYI, we noticed the following commit (built with gcc-9):
>>
>> commit: 0b726f6b318a07644b6c2388e6e44406740f4754 ("[PATCH v3 net] net: udp: fix Fast/frag0 UDP GRO")
>> url: https://github.com/0day-ci/linux/commits/Alexander-Lobakin/net-udp-fix-Fast-frag0-UDP-GRO/20201110-052215
>> base: https://git.kernel.org/cgit/linux/kernel/git/davem/net.git 4e0396c59559264442963b349ab71f66e471f84d
>>
>> in testcase: apachebench
>> version:
>> with following parameters:
>>
>> runtime: 300s
>> concurrency: 2000
>> cluster: cs-localhost
>> cpufreq_governor: performance
>> ucode: 0x7000019
>>
>> test-description: apachebench is a tool for benchmarking your Apache Hypertext Transfer Protocol (HTTP) server.
>> test-url: https://httpd.apache.org/docs/2.4/programs/ab.html
>>
>>
>> on test machine: 16 threads Intel(R) Xeon(R) CPU D-1541 @ 2.10GHz with 48G memory
>>
>> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>>
>>
>> If you fix the issue, kindly add following tag
>> Reported-by: kernel test robot <[email protected]>
>>
>>
>> [ 28.582714] BUG: unable to handle page fault for address: fffffffffffffffa
>> [ 28.590164] #PF: supervisor read access in kernel mode
>> [ 28.590164] #PF: error_code(0x0000) - not-present page
>> [ 28.590165] PGD c7e20d067 P4D c7e20d067 PUD c7e20f067 PMD 0
>> [ 28.590169] Oops: 0000 [#1] SMP PTI
>> [ 28.590171] CPU: 15 PID: 0 Comm: swapper/15 Not tainted 5.10.0-rc2-00373-g0b726f6b318a #1
>> [ 28.590172] Hardware name: Supermicro SYS-5018D-FN4T/X10SDV-8C-TLN4F, BIOS 1.1 03/02/2016
>> [ 28.590177] RIP: 0010:__udp4_lib_rcv+0x547/0xbe0
>> [ 28.590178] Code: 74 0a f6 45 3c 80 74 04 44 8b 4d 28 48 8b 55 58 48 83 e2 fe 74 07 8b 52 7c 85 d2 75 06 8b 95 90 00 00 00 48 8b be f0 04 00 00 <44> 8b 58 0c 8b 48 10 55 41 55 44 89 de 41 51 41 89 d1 44 89 d2 e8
>> [ 28.590179] RSP: 0018:ffffc900003b4bb8 EFLAGS: 00010246
>> [ 28.590180] RAX: ffffffffffffffee RBX: 0000000000000011 RCX: ffff888c7bc580e2
>> [ 28.590181] RDX: 0000000000000002 RSI: ffff88810ddc8000 RDI: ffffffff82d68f00
>> [ 28.590182] RBP: ffff888c7bf8f800 R08: 00000000000003b7 R09: 0000000000000000
>> [ 28.590182] R10: 0000000000003500 R11: 0000000000000000 R12: ffff888c7bc580e2
>> [ 28.590183] R13: ffffffff82e072b0 R14: ffffffff82d68f00 R15: 0000000000000034
>> [ 28.590184] FS: 0000000000000000(0000) GS:ffff888c7fdc0000(0000) knlGS:0000000000000000
>> [ 28.590185] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [ 28.590186] CR2: fffffffffffffffa CR3: 0000000c7e20a006 CR4: 00000000003706e0
>> [ 28.590186] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>> [ 28.590187] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>> [ 28.590187] Call Trace:
>> [ 28.590189] <IRQ>
>> [ 28.590193] ip_protocol_deliver_rcu+0xc5/0x1c0
>> [ 28.590196] ip_local_deliver_finish+0x4b/0x60
>> [ 28.738714] ip_local_deliver+0x6e/0x140
>> [ 28.738717] ip_sublist_rcv_finish+0x57/0x80
>> [ 28.738719] ip_sublist_rcv+0x199/0x240
>> [ 28.750730] ip_list_rcv+0x13a/0x160
>> [ 28.750733] __netif_receive_skb_list_core+0x2a9/0x2e0
>> [ 28.750736] netif_receive_skb_list_internal+0x1d3/0x320
>> [ 28.764743] gro_normal_list+0x19/0x40
>> [ 28.764747] napi_complete_done+0x68/0x160
>> [ 28.773197] igb_poll+0x63/0x320
>> [ 28.773198] net_rx_action+0x136/0x3a0
>> [ 28.773201] __do_softirq+0xe1/0x2c3
>> [ 28.773204] asm_call_irq_on_stack+0x12/0x20
>> [ 28.773205] </IRQ>
>> [ 28.773208] do_softirq_own_stack+0x37/0x40
>> [ 28.773211] irq_exit_rcu+0xd2/0xe0
>> [ 28.773213] common_interrupt+0x74/0x140
>> [ 28.773216] asm_common_interrupt+0x1e/0x40
>> [ 28.773219] RIP: 0010:cpuidle_enter_state+0xd2/0x360
>
> This was expected. This v3 of the patch has already been superseded by
> one that addresses this lookup:
Wait. This page fault happens on IP receive, which is performed after
all GRO processing. At this point, all headers are pulled to skb->head,
and no GRO helpers are needed to access them.
The function that causes that, __udp4_lib_rcv(), uses ip_hdr() a lot,
and it's safe. There should be another questionable point.
>> @@ -534,7 +534,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
>> __be16 sport, __be16 dport,
>> struct udp_table *udptable)
>> {
>> - const struct iphdr *iph = ip_hdr(skb);
>> + const struct iphdr *iph = skb_gro_network_header(skb);
>
> The merged version was v5 and lacks this change.
Thanks,
Al