While testing UDP GSO fraglists forwarding through driver that uses
Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
iperf packets:
[ ID] Interval Transfer Bitrate Jitter
[SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
Simple switch to napi_gro_receive() any other method without frag0
shortcut completely resolved them.
I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
callback. While it's probably OK for non-frag0 paths (when all
headers or even the entire frame are already in skb->data), this
inline points to junk when using Fast GRO (napi_gro_frags() or
napi_gro_receive() with only Ethernet header in skb->data and all
the rest in shinfo->frags) and breaks GRO packet compilation and
the packet flow itself.
To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
are typically used. UDP even has an inline helper that makes use of
them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
to get rid of the out-of-order delivers.
Present since the introduction of plain UDP GRO in 5.0-rc1.
Since v1 [1]:
- added a NULL pointer check for "uh" as suggested by Willem.
[1] https://lore.kernel.org/netdev/[email protected]
Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
Signed-off-by: Alexander Lobakin <[email protected]>
---
net/ipv4/udp_offload.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index e67a66fbf27b..7f6bd221880a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *skb)
{
- struct udphdr *uh = udp_hdr(skb);
+ struct udphdr *uh = udp_gro_udphdr(skb);
struct sk_buff *pp = NULL;
struct udphdr *uh2;
struct sk_buff *p;
unsigned int ulen;
int ret = 0;
+ if (unlikely(!uh)) {
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+ }
+
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
NAPI_GRO_CB(skb)->flush = 1;
--
2.29.2
On Mon, Nov 9, 2020 at 11:57 AM Alexander Lobakin <[email protected]> wrote:
>
> While testing UDP GSO fraglists forwarding through driver that uses
> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> iperf packets:
>
> [ ID] Interval Transfer Bitrate Jitter
> [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
>
> Simple switch to napi_gro_receive() any other method without frag0
> shortcut completely resolved them.
>
> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> callback. While it's probably OK for non-frag0 paths (when all
> headers or even the entire frame are already in skb->data), this
> inline points to junk when using Fast GRO (napi_gro_frags() or
> napi_gro_receive() with only Ethernet header in skb->data and all
> the rest in shinfo->frags) and breaks GRO packet compilation and
> the packet flow itself.
> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> are typically used. UDP even has an inline helper that makes use of
> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> to get rid of the out-of-order delivers.
>
> Present since the introduction of plain UDP GRO in 5.0-rc1.
>
> Since v1 [1]:
> - added a NULL pointer check for "uh" as suggested by Willem.
>
> [1] https://lore.kernel.org/netdev/[email protected]
>
> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> Signed-off-by: Alexander Lobakin <[email protected]>
Acked-by: Willem de Bruijn <[email protected]>
Thanks
On 11/9/20 5:56 PM, Alexander Lobakin wrote:
> While testing UDP GSO fraglists forwarding through driver that uses
> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> iperf packets:
>
> [ ID] Interval Transfer Bitrate Jitter
> [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
>
> Simple switch to napi_gro_receive() any other method without frag0
> shortcut completely resolved them.
>
> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> callback. While it's probably OK for non-frag0 paths (when all
> headers or even the entire frame are already in skb->data), this
> inline points to junk when using Fast GRO (napi_gro_frags() or
> napi_gro_receive() with only Ethernet header in skb->data and all
> the rest in shinfo->frags) and breaks GRO packet compilation and
> the packet flow itself.
> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> are typically used. UDP even has an inline helper that makes use of
> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> to get rid of the out-of-order delivers.
>
> Present since the introduction of plain UDP GRO in 5.0-rc1.
>
> Since v1 [1]:
> - added a NULL pointer check for "uh" as suggested by Willem.
>
> [1] https://lore.kernel.org/netdev/[email protected]
>
> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> Signed-off-by: Alexander Lobakin <[email protected]>
> ---
> net/ipv4/udp_offload.c | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index e67a66fbf27b..7f6bd221880a 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> struct sk_buff *skb)
> {
> - struct udphdr *uh = udp_hdr(skb);
> + struct udphdr *uh = udp_gro_udphdr(skb);
> struct sk_buff *pp = NULL;
> struct udphdr *uh2;
> struct sk_buff *p;
> unsigned int ulen;
> int ret = 0;
>
> + if (unlikely(!uh)) {
How uh could be NULL here ?
My understanding is that udp_gro_receive() is called
only after udp4_gro_receive() or udp6_gro_receive()
validated that udp_gro_udphdr(skb) was not NULL.
> + NAPI_GRO_CB(skb)->flush = 1;
> + return NULL;
> + }
> +
> /* requires non zero csum, for symmetry with GSO */
> if (!uh->check) {
> NAPI_GRO_CB(skb)->flush = 1;
>
Why uh2 is left unchanged ?
uh2 = udp_hdr(p);
...
From: Eric Dumazet <[email protected]>
Date: Mon, 9 Nov 2020 18:37:36 +0100
> On 11/9/20 5:56 PM, Alexander Lobakin wrote:
>> While testing UDP GSO fraglists forwarding through driver that uses
>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
>> iperf packets:
>>
>> [ ID] Interval Transfer Bitrate Jitter
>> [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
>>
>> Simple switch to napi_gro_receive() any other method without frag0
>> shortcut completely resolved them.
>>
>> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
>> callback. While it's probably OK for non-frag0 paths (when all
>> headers or even the entire frame are already in skb->data), this
>> inline points to junk when using Fast GRO (napi_gro_frags() or
>> napi_gro_receive() with only Ethernet header in skb->data and all
>> the rest in shinfo->frags) and breaks GRO packet compilation and
>> the packet flow itself.
>> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
>> are typically used. UDP even has an inline helper that makes use of
>> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
>> to get rid of the out-of-order delivers.
>>
>> Present since the introduction of plain UDP GRO in 5.0-rc1.
>>
>> Since v1 [1]:
>> - added a NULL pointer check for "uh" as suggested by Willem.
>>
>> [1] https://lore.kernel.org/netdev/[email protected]
>>
>> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
>> Signed-off-by: Alexander Lobakin <[email protected]>
>> ---
>> net/ipv4/udp_offload.c | 7 ++++++-
>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>> index e67a66fbf27b..7f6bd221880a 100644
>> --- a/net/ipv4/udp_offload.c
>> +++ b/net/ipv4/udp_offload.c
>> @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
>> static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>> struct sk_buff *skb)
>> {
>> - struct udphdr *uh = udp_hdr(skb);
>> + struct udphdr *uh = udp_gro_udphdr(skb);
>> struct sk_buff *pp = NULL;
>> struct udphdr *uh2;
>> struct sk_buff *p;
>> unsigned int ulen;
>> int ret = 0;
>>
>> + if (unlikely(!uh)) {
>
> How uh could be NULL here ?
>
> My understanding is that udp_gro_receive() is called
> only after udp4_gro_receive() or udp6_gro_receive()
> validated that udp_gro_udphdr(skb) was not NULL.
Right, but only after udp{4,6}_lib_lookup_skb() in certain cases.
I don't know for sure if their logic can actually edit skb->data,
so it's better to check from my point of view.
>> + NAPI_GRO_CB(skb)->flush = 1;
>> + return NULL;
>> + }
>> +
>> /* requires non zero csum, for symmetry with GSO */
>> if (!uh->check) {
>> NAPI_GRO_CB(skb)->flush = 1;
>>
>
>Why uh2 is left unchanged ?
>
> uh2 = udp_hdr(p);
>
>...
Packets from list_head *head have their headers already pulled to
skb->data in 100% cases, no need to change anything here.
I double-checked that udp_hdr(p) always returns the same pointer as
"p->data + network offset" and left it as it is.
Thanks,
Al
On Mon, Nov 9, 2020 at 12:37 PM Eric Dumazet <[email protected]> wrote:
>
>
>
> On 11/9/20 5:56 PM, Alexander Lobakin wrote:
> > While testing UDP GSO fraglists forwarding through driver that uses
> > Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> > iperf packets:
> >
> > [ ID] Interval Transfer Bitrate Jitter
> > [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
> >
> > Simple switch to napi_gro_receive() any other method without frag0
> > shortcut completely resolved them.
> >
> > I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> > callback. While it's probably OK for non-frag0 paths (when all
> > headers or even the entire frame are already in skb->data), this
> > inline points to junk when using Fast GRO (napi_gro_frags() or
> > napi_gro_receive() with only Ethernet header in skb->data and all
> > the rest in shinfo->frags) and breaks GRO packet compilation and
> > the packet flow itself.
> > To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> > are typically used. UDP even has an inline helper that makes use of
> > them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> > to get rid of the out-of-order delivers.
> >
> > Present since the introduction of plain UDP GRO in 5.0-rc1.
> >
> > Since v1 [1]:
> > - added a NULL pointer check for "uh" as suggested by Willem.
> >
> > [1] https://lore.kernel.org/netdev/[email protected]
> >
> > Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> > Signed-off-by: Alexander Lobakin <[email protected]>
> > ---
> > net/ipv4/udp_offload.c | 7 ++++++-
> > 1 file changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> > index e67a66fbf27b..7f6bd221880a 100644
> > --- a/net/ipv4/udp_offload.c
> > +++ b/net/ipv4/udp_offload.c
> > @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> > static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> > struct sk_buff *skb)
> > {
> > - struct udphdr *uh = udp_hdr(skb);
> > + struct udphdr *uh = udp_gro_udphdr(skb);
> > struct sk_buff *pp = NULL;
> > struct udphdr *uh2;
> > struct sk_buff *p;
> > unsigned int ulen;
> > int ret = 0;
> >
> > + if (unlikely(!uh)) {
>
> How uh could be NULL here ?
>
> My understanding is that udp_gro_receive() is called
> only after udp4_gro_receive() or udp6_gro_receive()
> validated that udp_gro_udphdr(skb) was not NULL.
Oh indeed. This has already been checked before.
> > + NAPI_GRO_CB(skb)->flush = 1;
> > + return NULL;
> > + }
> > +
> > /* requires non zero csum, for symmetry with GSO */
> > if (!uh->check) {
> > NAPI_GRO_CB(skb)->flush = 1;
> >
>
> Why uh2 is left unchanged ?
>
> uh2 = udp_hdr(p);
Isn't that the same as th2 = tcp_hdr(p) in tcp_gro_receive? no frag0
optimization to worry about for packets on the list.
> ...
>
On 11/9/20 7:28 PM, Willem de Bruijn wrote:
> On Mon, Nov 9, 2020 at 12:37 PM Eric Dumazet <[email protected]> wrote:
>>
>>
>>
>> On 11/9/20 5:56 PM, Alexander Lobakin wrote:
>>> While testing UDP GSO fraglists forwarding through driver that uses
>>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
>>> iperf packets:
>>>
>>> [ ID] Interval Transfer Bitrate Jitter
>>> [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
>>>
>>> Simple switch to napi_gro_receive() any other method without frag0
>>> shortcut completely resolved them.
>>>
>>> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
>>> callback. While it's probably OK for non-frag0 paths (when all
>>> headers or even the entire frame are already in skb->data), this
>>> inline points to junk when using Fast GRO (napi_gro_frags() or
>>> napi_gro_receive() with only Ethernet header in skb->data and all
>>> the rest in shinfo->frags) and breaks GRO packet compilation and
>>> the packet flow itself.
>>> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
>>> are typically used. UDP even has an inline helper that makes use of
>>> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
>>> to get rid of the out-of-order delivers.
>>>
>>> Present since the introduction of plain UDP GRO in 5.0-rc1.
>>>
>>> Since v1 [1]:
>>> - added a NULL pointer check for "uh" as suggested by Willem.
>>>
>>> [1] https://lore.kernel.org/netdev/[email protected]
>>>
>>> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
>>> Signed-off-by: Alexander Lobakin <[email protected]>
>>> ---
>>> net/ipv4/udp_offload.c | 7 ++++++-
>>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>>> index e67a66fbf27b..7f6bd221880a 100644
>>> --- a/net/ipv4/udp_offload.c
>>> +++ b/net/ipv4/udp_offload.c
>>> @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
>>> static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>>> struct sk_buff *skb)
>>> {
>>> - struct udphdr *uh = udp_hdr(skb);
>>> + struct udphdr *uh = udp_gro_udphdr(skb);
>>> struct sk_buff *pp = NULL;
>>> struct udphdr *uh2;
>>> struct sk_buff *p;
>>> unsigned int ulen;
>>> int ret = 0;
>>>
>>> + if (unlikely(!uh)) {
>>
>> How uh could be NULL here ?
>>
>> My understanding is that udp_gro_receive() is called
>> only after udp4_gro_receive() or udp6_gro_receive()
>> validated that udp_gro_udphdr(skb) was not NULL.
>
> Oh indeed. This has already been checked before.
>
>>> + NAPI_GRO_CB(skb)->flush = 1;
>>> + return NULL;
>>> + }
>>> +
>>> /* requires non zero csum, for symmetry with GSO */
>>> if (!uh->check) {
>>> NAPI_GRO_CB(skb)->flush = 1;
>>>
>>
>> Why uh2 is left unchanged ?
>>
>> uh2 = udp_hdr(p);
>
> Isn't that the same as th2 = tcp_hdr(p) in tcp_gro_receive? no frag0
> optimization to worry about for packets on the list.
My feeling was that tcp_gro_receive() is terminal in the GRO stack.
While UDP could be encapsulated in UDP :)
I guess we do not support this yet.
Years ago we made sure to propagate the current header offset into GRO stack
(when we added SIT/IPIP/GRE support to GRO)
299603e8370a93dd5d8e8d800f0dff1ce2c53d36 ("net-gro: Prepare GRO stack for the upcoming tunneling support")
udp_hdr() is using transport header, which is unique for skb "on the list"
On Mon, Nov 9, 2020 at 1:54 PM Eric Dumazet <[email protected]> wrote:
>
>
>
> On 11/9/20 7:28 PM, Willem de Bruijn wrote:
> > On Mon, Nov 9, 2020 at 12:37 PM Eric Dumazet <[email protected]> wrote:
> >>
> >>
> >>
> >> On 11/9/20 5:56 PM, Alexander Lobakin wrote:
> >>> While testing UDP GSO fraglists forwarding through driver that uses
> >>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
> >>> iperf packets:
> >>>
> >>> [ ID] Interval Transfer Bitrate Jitter
> >>> [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
> >>>
> >>> Simple switch to napi_gro_receive() any other method without frag0
> >>> shortcut completely resolved them.
> >>>
> >>> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
> >>> callback. While it's probably OK for non-frag0 paths (when all
> >>> headers or even the entire frame are already in skb->data), this
> >>> inline points to junk when using Fast GRO (napi_gro_frags() or
> >>> napi_gro_receive() with only Ethernet header in skb->data and all
> >>> the rest in shinfo->frags) and breaks GRO packet compilation and
> >>> the packet flow itself.
> >>> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
> >>> are typically used. UDP even has an inline helper that makes use of
> >>> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
> >>> to get rid of the out-of-order delivers.
> >>>
> >>> Present since the introduction of plain UDP GRO in 5.0-rc1.
> >>>
> >>> Since v1 [1]:
> >>> - added a NULL pointer check for "uh" as suggested by Willem.
> >>>
> >>> [1] https://lore.kernel.org/netdev/[email protected]
> >>>
> >>> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
> >>> Signed-off-by: Alexander Lobakin <[email protected]>
> >>> ---
> >>> net/ipv4/udp_offload.c | 7 ++++++-
> >>> 1 file changed, 6 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> >>> index e67a66fbf27b..7f6bd221880a 100644
> >>> --- a/net/ipv4/udp_offload.c
> >>> +++ b/net/ipv4/udp_offload.c
> >>> @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
> >>> static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
> >>> struct sk_buff *skb)
> >>> {
> >>> - struct udphdr *uh = udp_hdr(skb);
> >>> + struct udphdr *uh = udp_gro_udphdr(skb);
> >>> struct sk_buff *pp = NULL;
> >>> struct udphdr *uh2;
> >>> struct sk_buff *p;
> >>> unsigned int ulen;
> >>> int ret = 0;
> >>>
> >>> + if (unlikely(!uh)) {
> >>
> >> How uh could be NULL here ?
> >>
> >> My understanding is that udp_gro_receive() is called
> >> only after udp4_gro_receive() or udp6_gro_receive()
> >> validated that udp_gro_udphdr(skb) was not NULL.
> >
> > Oh indeed. This has already been checked before.
> >
> >>> + NAPI_GRO_CB(skb)->flush = 1;
> >>> + return NULL;
> >>> + }
> >>> +
> >>> /* requires non zero csum, for symmetry with GSO */
> >>> if (!uh->check) {
> >>> NAPI_GRO_CB(skb)->flush = 1;
> >>>
> >>
> >> Why uh2 is left unchanged ?
> >>
> >> uh2 = udp_hdr(p);
> >
> > Isn't that the same as th2 = tcp_hdr(p) in tcp_gro_receive? no frag0
> > optimization to worry about for packets on the list.
>
> My feeling was that tcp_gro_receive() is terminal in the GRO stack.
>
> While UDP could be encapsulated in UDP :)
>
> I guess we do not support this yet.
>
> Years ago we made sure to propagate the current header offset into GRO stack
> (when we added SIT/IPIP/GRE support to GRO)
> 299603e8370a93dd5d8e8d800f0dff1ce2c53d36 ("net-gro: Prepare GRO stack for the upcoming tunneling support")
On which note, and Alexander's mention of udp4_lib_lookup_skb(), that
performs a standard ip_hdr on the possibly frag0 optimized incoming
packet:
struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
__be16 sport, __be16 dport)
{
const struct iphdr *iph = ip_hdr(skb);
return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
iph->daddr, dport, inet_iif(skb),
inet_sdif(skb), &udp_table, NULL);
}
This should use skb_gro_header_.. too, then?
On 11/9/20 7:26 PM, Alexander Lobakin wrote:
> From: Eric Dumazet <[email protected]>
> Date: Mon, 9 Nov 2020 18:37:36 +0100
>
>> On 11/9/20 5:56 PM, Alexander Lobakin wrote:
>>> While testing UDP GSO fraglists forwarding through driver that uses
>>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
>>> iperf packets:
>>>
.
>>>
>>> Since v1 [1]:
>>> - added a NULL pointer check for "uh" as suggested by Willem.
>>>
>>> [1] https://lore.kernel.org/netdev/[email protected]
>>>
>>> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
>>> Signed-off-by: Alexander Lobakin <[email protected]>
>>> ---
>>> net/ipv4/udp_offload.c | 7 ++++++-
>>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>>> index e67a66fbf27b..7f6bd221880a 100644
>>> --- a/net/ipv4/udp_offload.c
>>> +++ b/net/ipv4/udp_offload.c
>>> @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
>>> static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>>> struct sk_buff *skb)
>>> {
>>> - struct udphdr *uh = udp_hdr(skb);
>>> + struct udphdr *uh = udp_gro_udphdr(skb);
>>> struct sk_buff *pp = NULL;
>>> struct udphdr *uh2;
>>> struct sk_buff *p;
>>> unsigned int ulen;
>>> int ret = 0;
>>>
>>> + if (unlikely(!uh)) {
>>
>> How uh could be NULL here ?
>>
>> My understanding is that udp_gro_receive() is called
>> only after udp4_gro_receive() or udp6_gro_receive()
>> validated that udp_gro_udphdr(skb) was not NULL.
>
> Right, but only after udp{4,6}_lib_lookup_skb() in certain cases.
> I don't know for sure if their logic can actually edit skb->data,
> so it's better to check from my point of view.
Not really. This would send a wrong signal to readers of this code.
I do not think these functions can mess with GRO internals.
This would be a nightmare, GRO is already way too complex.
In fact these functions should use a const qualifier
for their " struct sk_buff *skb" argument to prevent future bugs.
I will test and submit this patch :
diff --git a/include/net/ip.h b/include/net/ip.h
index 2d6b985d11ccaa75827b3a15ac3f898d7a193242..e20874059f826eb0f9e899aed556bfbc9c9d71e8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -99,7 +99,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
#define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
/* return enslaved device index if relevant */
-static inline int inet_sdif(struct sk_buff *skb)
+static inline int inet_sdif(const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
diff --git a/include/net/udp.h b/include/net/udp.h
index 295d52a73598277dc5071536f777d1a87e7df1d1..877832bed4713a011a514a2f6f522728c8c89e20 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -164,7 +164,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
}
-typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
+typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
__be16 dport);
INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
@@ -313,7 +313,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif, int sdif,
struct udp_table *tbl, struct sk_buff *skb);
-struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
+struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
struct sock *udp6_lib_lookup(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@ -324,7 +324,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *tbl,
struct sk_buff *skb);
-struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport);
/* UDP uses skb->dev_scratch to cache as much information as possible and avoid
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 09f0a23d1a01741d335ce45f25fe70a4e00698c7..8b8dadfea6c9854e6bfaa0fabcb774db39976da3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -541,7 +541,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
inet_sdif(skb), udptable, skb);
}
-struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
+struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
const struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 29d9691359b9c49ccb56a11f79e3658b1a76700d..adfe9ca6f516612b5aad6d6c654c7da1dd56a50e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -276,7 +276,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
inet6_sdif(skb), udptable, skb);
}
-struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
From: Willem de Bruijn <[email protected]>
Date: Mon, 9 Nov 2020 14:06:07 -0500
> On Mon, Nov 9, 2020 at 1:54 PM Eric Dumazet <[email protected]> wrote:
>>
>>
>>
>> On 11/9/20 7:28 PM, Willem de Bruijn wrote:
>>> On Mon, Nov 9, 2020 at 12:37 PM Eric Dumazet <[email protected]> wrote:
>>>>
>>>>
>>>>
>>>> On 11/9/20 5:56 PM, Alexander Lobakin wrote:
>>>>> While testing UDP GSO fraglists forwarding through driver that uses
>>>>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
>>>>> iperf packets:
>>>>>
>>>>> [ ID] Interval Transfer Bitrate Jitter
>>>>> [SUM] 0.0-40.0 sec 12106 datagrams received out-of-order
>>>>>
>>>>> Simple switch to napi_gro_receive() any other method without frag0
>>>>> shortcut completely resolved them.
>>>>>
>>>>> I've found that UDP GRO uses udp_hdr(skb) in its .gro_receive()
>>>>> callback. While it's probably OK for non-frag0 paths (when all
>>>>> headers or even the entire frame are already in skb->data), this
>>>>> inline points to junk when using Fast GRO (napi_gro_frags() or
>>>>> napi_gro_receive() with only Ethernet header in skb->data and all
>>>>> the rest in shinfo->frags) and breaks GRO packet compilation and
>>>>> the packet flow itself.
>>>>> To support both modes, skb_gro_header_fast() + skb_gro_header_slow()
>>>>> are typically used. UDP even has an inline helper that makes use of
>>>>> them, udp_gro_udphdr(). Use that instead of troublemaking udp_hdr()
>>>>> to get rid of the out-of-order delivers.
>>>>>
>>>>> Present since the introduction of plain UDP GRO in 5.0-rc1.
>>>>>
>>>>> Since v1 [1]:
>>>>> - added a NULL pointer check for "uh" as suggested by Willem.
>>>>>
>>>>> [1] https://lore.kernel.org/netdev/[email protected]
>>>>>
>>>>> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
>>>>> Signed-off-by: Alexander Lobakin <[email protected]>
>>>>> ---
>>>>> net/ipv4/udp_offload.c | 7 ++++++-
>>>>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>>>>> index e67a66fbf27b..7f6bd221880a 100644
>>>>> --- a/net/ipv4/udp_offload.c
>>>>> +++ b/net/ipv4/udp_offload.c
>>>>> @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
>>>>> static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>>>>> struct sk_buff *skb)
>>>>> {
>>>>> - struct udphdr *uh = udp_hdr(skb);
>>>>> + struct udphdr *uh = udp_gro_udphdr(skb);
>>>>> struct sk_buff *pp = NULL;
>>>>> struct udphdr *uh2;
>>>>> struct sk_buff *p;
>>>>> unsigned int ulen;
>>>>> int ret = 0;
>>>>>
>>>>> + if (unlikely(!uh)) {
>>>>
>>>> How uh could be NULL here ?
>>>>
>>>> My understanding is that udp_gro_receive() is called
>>>> only after udp4_gro_receive() or udp6_gro_receive()
>>>> validated that udp_gro_udphdr(skb) was not NULL.
>>>
>>> Oh indeed. This has already been checked before.
>>>
>>>>> + NAPI_GRO_CB(skb)->flush = 1;
>>>>> + return NULL;
>>>>> + }
>>>>> +
>>>>> /* requires non zero csum, for symmetry with GSO */
>>>>> if (!uh->check) {
>>>>> NAPI_GRO_CB(skb)->flush = 1;
>>>>>
>>>>
>>>> Why uh2 is left unchanged ?
>>>>
>>>> uh2 = udp_hdr(p);
>>>
>>> Isn't that the same as th2 = tcp_hdr(p) in tcp_gro_receive? no frag0
>>> optimization to worry about for packets on the list.
>>
>> My feeling was that tcp_gro_receive() is terminal in the GRO stack.
>>
>> While UDP could be encapsulated in UDP :)
>>
>> I guess we do not support this yet.
>>
>> Years ago we made sure to propagate the current header offset into GRO stack
>> (when we added SIT/IPIP/GRE support to GRO)
>> 299603e8370a93dd5d8e8d800f0dff1ce2c53d36 ("net-gro: Prepare GRO stack for the upcoming tunneling support")
>
> On which note, and Alexander's mention of udp4_lib_lookup_skb(), that
> performs a standard ip_hdr on the possibly frag0 optimized incoming
> packet:
>
> struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
> __be16 sport, __be16 dport)
> {
> const struct iphdr *iph = ip_hdr(skb);
>
> return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport,
> iph->daddr, dport, inet_iif(skb),
> inet_sdif(skb), &udp_table, NULL);
> }
>
> This should use skb_gro_header_.. too, then?
Well, both of udp{4,6}_lib_lookup_skb() and a couple of neighbour
functions use ip_hdr(skb). I didn't check this path before as my
test case was NAT/forwarding with no sk.
I suspect they should use skb_gro_network_header() to obtain an IP
header. I'll fix them in v3, thanks for pointing this out!
Al
From: Eric Dumazet <[email protected]>
Date: Mon, 9 Nov 2020 20:29:03 +0100
> On 11/9/20 7:26 PM, Alexander Lobakin wrote:
>> From: Eric Dumazet <[email protected]>
>> Date: Mon, 9 Nov 2020 18:37:36 +0100
>>
>>> On 11/9/20 5:56 PM, Alexander Lobakin wrote:
>>>> While testing UDP GSO fraglists forwarding through driver that uses
>>>> Fast GRO (via napi_gro_frags()), I was observing lots of out-of-order
>>>> iperf packets:
>>>>
>.
>>>>
>>>> Since v1 [1]:
>>>> - added a NULL pointer check for "uh" as suggested by Willem.
>>>>
>>>> [1] https://lore.kernel.org/netdev/[email protected]
>>>>
>>>> Fixes: e20cf8d3f1f7 ("udp: implement GRO for plain UDP sockets.")
>>>> Signed-off-by: Alexander Lobakin <[email protected]>
>>>> ---
>>>> net/ipv4/udp_offload.c | 7 ++++++-
>>>> 1 file changed, 6 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
>>>> index e67a66fbf27b..7f6bd221880a 100644
>>>> --- a/net/ipv4/udp_offload.c
>>>> +++ b/net/ipv4/udp_offload.c
>>>> @@ -366,13 +366,18 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
>>>> static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
>>>> struct sk_buff *skb)
>>>> {
>>>> - struct udphdr *uh = udp_hdr(skb);
>>>> + struct udphdr *uh = udp_gro_udphdr(skb);
>>>> struct sk_buff *pp = NULL;
>>>> struct udphdr *uh2;
>>>> struct sk_buff *p;
>>>> unsigned int ulen;
>>>> int ret = 0;
>>>>
>>>> + if (unlikely(!uh)) {
>>>
>>> How uh could be NULL here ?
>>>
>>> My understanding is that udp_gro_receive() is called
>>> only after udp4_gro_receive() or udp6_gro_receive()
>>> validated that udp_gro_udphdr(skb) was not NULL.
>>
>> Right, but only after udp{4,6}_lib_lookup_skb() in certain cases.
>> I don't know for sure if their logic can actually edit skb->data,
>> so it's better to check from my point of view.
>
> Not really. This would send a wrong signal to readers of this code.
>
> I do not think these functions can mess with GRO internals.
>
> This would be a nightmare, GRO is already way too complex.
>
> In fact these functions should use a const qualifier
> for their " struct sk_buff *skb" argument to prevent future bugs.
Agree, you're right. Lack of const qualifiers gave me a doubt that
these functions can't edit skbs. They really should use it if they
really can't.
I'll omit the check in v3.
> I will test and submit this patch :
That would be a very nice one, thanks.
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 2d6b985d11ccaa75827b3a15ac3f898d7a193242..e20874059f826eb0f9e899aed556bfbc9c9d71e8 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -99,7 +99,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
> #define PKTINFO_SKB_CB(skb) ((struct in_pktinfo *)((skb)->cb))
>
> /* return enslaved device index if relevant */
> -static inline int inet_sdif(struct sk_buff *skb)
> +static inline int inet_sdif(const struct sk_buff *skb)
> {
> #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
> if (skb && ipv4_l3mdev_skb(IPCB(skb)->flags))
> diff --git a/include/net/udp.h b/include/net/udp.h
> index 295d52a73598277dc5071536f777d1a87e7df1d1..877832bed4713a011a514a2f6f522728c8c89e20 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -164,7 +164,7 @@ static inline void udp_csum_pull_header(struct sk_buff *skb)
> UDP_SKB_CB(skb)->cscov -= sizeof(struct udphdr);
> }
>
> -typedef struct sock *(*udp_lookup_t)(struct sk_buff *skb, __be16 sport,
> +typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
> __be16 dport);
>
> INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *,
> @@ -313,7 +313,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
> struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
> __be32 daddr, __be16 dport, int dif, int sdif,
> struct udp_table *tbl, struct sk_buff *skb);
> -struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
> +struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
> __be16 sport, __be16 dport);
> struct sock *udp6_lib_lookup(struct net *net,
> const struct in6_addr *saddr, __be16 sport,
> @@ -324,7 +324,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
> const struct in6_addr *daddr, __be16 dport,
> int dif, int sdif, struct udp_table *tbl,
> struct sk_buff *skb);
> -struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
> +struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
> __be16 sport, __be16 dport);
>
> /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 09f0a23d1a01741d335ce45f25fe70a4e00698c7..8b8dadfea6c9854e6bfaa0fabcb774db39976da3 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -541,7 +541,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
> inet_sdif(skb), udptable, skb);
> }
>
> -struct sock *udp4_lib_lookup_skb(struct sk_buff *skb,
> +struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
> __be16 sport, __be16 dport)
> {
> const struct iphdr *iph = ip_hdr(skb);
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 29d9691359b9c49ccb56a11f79e3658b1a76700d..adfe9ca6f516612b5aad6d6c654c7da1dd56a50e 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -276,7 +276,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
> inet6_sdif(skb), udptable, skb);
> }
>
> -struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
> +struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
> __be16 sport, __be16 dport)
> {
> const struct ipv6hdr *iph = ipv6_hdr(skb);
>
Al